From a7e05afe1ccca74e6c98ac8d1d4fca9df85e1788 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 13 Jun 2021 04:07:04 -0400 Subject: [PATCH 01/78] PoC for Migen version of the SBus slave FSM; is recognized by PROM --- sbus-to-ztex-gateware-migen/sbus-to-fpga.py | 127 +++++++++ .../sbus_to_fpga_slave.py | 265 ++++++++++++++++++ 2 files changed, 392 insertions(+) create mode 100644 sbus-to-ztex-gateware-migen/sbus-to-fpga.py create mode 100644 sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py diff --git a/sbus-to-ztex-gateware-migen/sbus-to-fpga.py b/sbus-to-ztex-gateware-migen/sbus-to-fpga.py new file mode 100644 index 0000000..3fdc22b --- /dev/null +++ b/sbus-to-ztex-gateware-migen/sbus-to-fpga.py @@ -0,0 +1,127 @@ +import os +import argparse +from migen import * +import litex +from litex.build.generic_platform import * +from litex.build.xilinx.vivado import vivado_build_args, vivado_build_argdict +from litex.soc.integration.soc import * +from litex.soc.integration.soc_core import * +from litex.soc.integration.builder import * +from litex.soc.cores.clock import * +from litex_boards.platforms import ztex213 + +from sbus_to_fpga_slave import *; + +_sbus_sbus = [ + ("SBUS_3V3_CLK", 0, Pins("D15"), IOStandard("lvttl")), + ("SBUS_3V3_ASs", 0, Pins("T4"), IOStandard("lvttl")), + ("SBUS_3V3_BGs", 0, Pins("T6"), IOStandard("lvttl")), + ("SBUS_3V3_BRs", 0, Pins("R6"), IOStandard("lvttl")), + ("SBUS_3V3_ERRs", 0, Pins("V2"), IOStandard("lvttl")), + ("SBUS_DATA_OE_LED", 0, Pins("U1"), IOStandard("lvttl")), + ("SBUS_DATA_OE_LED_2", 0, Pins("T3"), IOStandard("lvttl")), + ("SBUS_3V3_RSTs", 0, Pins("U2"), IOStandard("lvttl")), + ("SBUS_3V3_SELs", 0, Pins("K6"), IOStandard("lvttl")), + ("SBUS_3V3_INT1s", 0, Pins("R3"), IOStandard("lvttl")), + ("SBUS_3V3_INT7s", 0, Pins("N5"), IOStandard("lvttl")), + ("SBUS_3V3_PPRD", 0, Pins("N6"), IOStandard("lvttl")), + ("SBUS_OE", 0, Pins("P5"), IOStandard("lvttl")), + ("SBUS_3V3_ACKs", 0, Pins("M6 L6 N4"), IOStandard("lvttl")), + ("SBUS_3V3_SIZ", 0, Pins("R7 U3 V1"), IOStandard("lvttl")), + ("SBUS_3V3_D", 0, Pins("J18 K16 J17 K15 K13 J15 J13 J14 H14 H17 G14 G17 G16 G18 H16 F18 F16 E18 F15 D18 E17 G13 D17 F13 F14 E16 E15 C17 C16 A18 B18 C15"), IOStandard("lvttl")), + ("SBUS_3V3_PA", 0, Pins(" B16 B17 D14 C14 D12 A16 A15 B14 B13 B12 C12 A14 A13 B11 A11 M4 R2 M3 P2 M2 N2 K5 N1 L4 M1 L3 L1 K3"), IOStandard("lvttl")), +] +# CRG ---------------------------------------------------------------------------------------------- + +class _CRG(Module): + def __init__(self, platform, sys_clk_freq): + self.clock_domains.cd_sys = ClockDomain() + self.clock_domains.cd_native = ClockDomain(reset_less=True) + self.clock_domains.cd_sbus = ClockDomain() + self.clock_domains.cd_por = ClockDomain() + + # # # + clk48 = platform.request("clk48") + clk_sbus = platform.request("SBUS_3V3_CLK") + + self.submodules.pll = pll = S7MMCM(speedgrade=-1) + pll.register_clkin(clk48, 48e6) + pll.create_clkout(self.cd_sys, sys_clk_freq) + + self.comb += self.cd_sbus.clk.eq(clk_sbus) + self.comb += self.cd_sbus.rst.eq(~platform.request("SBUS_3V3_RSTs")) + + self.comb += self.cd_native.clk.eq(clk48) + + platform.add_false_path_constraints(self.cd_native.clk, self.cd_sbus.clk) + + # FIXME: add SBUS_3V3_RSTs + + # Power on reset, 20 seconds + por_count = Signal(30, reset=20*48*1000000) + por_done = Signal() + self.comb += self.cd_por.clk.eq(clk48) + self.comb += por_done.eq(por_count == 0) + self.sync.por += If(~por_done, por_count.eq(por_count - 1)) + self.comb += pll.reset.eq(~por_done) + +class SBusFPGA(SoCCore): + def __init__(self, **kwargs): + + kwargs["cpu_type"] = "None" + kwargs["integrated_sram_size"] = 0 + kwargs["with_uart"] = True + kwargs["with_timer"] = False + + self.sys_clk_freq = sys_clk_freq = 100e6 + + self.platform = platform = ztex213.Platform(variant="ztex2.13a", expansion="sbus") + self.platform.add_extension(_sbus_sbus) + SoCCore.__init__(self, platform=platform, sys_clk_freq=sys_clk_freq, clk_freq=sys_clk_freq, **kwargs) + self.submodules.crg = _CRG(platform=platform, sys_clk_freq=sys_clk_freq) + self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) + + prom_file = "prom_mini.fc" + prom_data = soc_core.get_mem_data(prom_file, "big") + prom = Array(prom_data) + #print("\n****************************************\n") + #for i in range(len(prom)): + # print(hex(prom[i])) + #print("\n****************************************\n") + #self.add_ram("prom", origin=0x0, size=2**14, contents=prom_data, mode="r") + #getattr(self,"prom").mem.init = prom_data + #getattr(self,"prom").mem.depth = 2**14 + + hold_reset_ctr = Signal(30, reset=960000000) + self.sync.native += If(hold_reset_ctr>0, hold_reset_ctr.eq(hold_reset_ctr - 1)) + hold_reset = Signal(reset=1) + self.comb += hold_reset.eq(~(hold_reset_ctr == 0)) + self.submodules.slave = ClockDomainsRenamer("sbus")(SBusFPGASlave(platform=self.platform, soc=self, prom=prom, hold_reset=hold_reset)) + + # self.soc = Module() + # self.soc.mem_regions = self.mem_regions = {} + # region = litex.soc.integration.soc.SoCRegion(origin=0x0, size=0x0) + # region.length = 0 + # self.mem_regions['csr'] = region + # self.soc.constants = self.constants = {} + # self.soc.csr_regions = self.csr_regions = {} + # self.soc.cpu_type = self.cpu_type = None + +# def do_finalize(self): +# self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) + +def main(): + parser = argparse.ArgumentParser(description="SbusFPGA") + parser.add_argument("--build", action="store_true", help="Build bitstream") + builder_args(parser) + vivado_build_args(parser) + args = parser.parse_args() + + soc = SBusFPGA(**soc_core_argdict(args)) + #soc.add_uart(name="uart", baudrate=115200, fifo_depth=16) + + builder = Builder(soc, **builder_argdict(args)) + builder.build(**vivado_build_argdict(args), run=args.build) + +if __name__ == "__main__": + main() diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py new file mode 100644 index 0000000..5cd19ad --- /dev/null +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py @@ -0,0 +1,265 @@ + +from migen import * +from migen.genlib.fifo import SyncFIFOBuffered +from migen.fhdl.specials import Tristate + +SIZ_WORD = 0x0 +SIZ_BYTE = 0x1 +SIZ_HWORD = 0x2 +SIZ_EXT = 0x3 +SIZ_BURST4 = 0x4 +SIZ_BURST8 = 0x5 +SIZ_BURST16 = 0x6 +SIZ_BURST2 = 0x7 + +ACK_IDLE = 0x7 +ACK_ERR = 0x6 +ACK_BYTE = 0x5 +ACK_RERUN = 0x4 +ACK_WORD = 0x3 +ACK_DWORD = 0x2 +ACK_HWORD = 0x1 +ACK_RECV = 0x0 + +def siz_is_word(siz): + return (SIZ_WORD == siz) or (SIZ_BURST2 == siz) or (SIZ_BURST4 == siz) or (SIZ_BURST8 == siz) or (SIZ_BURST16 == siz) + +def index_with_wrap(counter, limit_m1, value): + if (limit_m1 == 0): + return value[0:4] + elif (limit_m1 == 1): + return Cat((value + counter)[0:1], value[1:4]) + elif (limit_m1 == 3): + return Cat((value + counter)[0:2], value[2:4]) + elif (limit_m1 == 7): + return Cat((value + counter)[0:3], value[3:4]) + elif (limit_m1 == 15): + return (value + counter)[0:4] + return value[0:4] + +def siz_to_burst_size_m1(siz): + if (SIZ_WORD == siz): + return 0 + elif (SIZ_BURST2 == siz): + return 1 + elif (SIZ_BURST4 == siz): + return 3 + elif (SIZ_BURST8 == siz): + return 7 + elif (SIZ_BURST16 == siz): + return 15 + return 1 + +# siz_to_burst_size_m1 = { +# SIZ_WORD: 0, +# SIZ_BURST2: 1, +# SIZ_BURST4: 3, +# SIZ_BURST8: 7, +# SIZ_BURST16: 15 +# }; + +class SBusFPGASlave(Module): + def __init__(self, platform, soc, prom, hold_reset): + + self.hold_reset = hold_reset + + #pad_SBUS_3V3_CLK = platform.request("SBUS_3V3_CLK") + pad_SBUS_3V3_ASs = platform.request("SBUS_3V3_ASs") + pad_SBUS_3V3_BGs = platform.request("SBUS_3V3_BGs") + pad_SBUS_3V3_BRs = platform.request("SBUS_3V3_BRs") + pad_SBUS_3V3_ERRs = platform.request("SBUS_3V3_ERRs") + pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") + pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") + #pad_SBUS_3V3_RSTs = platform.request("SBUS_3V3_RSTs") + pad_SBUS_3V3_SELs = platform.request("SBUS_3V3_SELs") + pad_SBUS_3V3_INT1s = platform.request("SBUS_3V3_INT1s") + pad_SBUS_3V3_INT7s = platform.request("SBUS_3V3_INT7s") + pad_SBUS_3V3_PPRD = platform.request("SBUS_3V3_PPRD") + pad_SBUS_OE = platform.request("SBUS_OE") + pad_SBUS_3V3_ACKs = platform.request("SBUS_3V3_ACKs") + pad_SBUS_3V3_SIZ = platform.request("SBUS_3V3_SIZ") + pad_SBUS_3V3_D = platform.request("SBUS_3V3_D") + pad_SBUS_3V3_PA = platform.request("SBUS_3V3_PA") + + leds = Signal(8, reset=0xF0) + self.comb += platform.request("user_led", 0).eq(leds[0]) + self.comb += platform.request("user_led", 1).eq(leds[1]) + self.comb += platform.request("user_led", 2).eq(leds[2]) + self.comb += platform.request("user_led", 3).eq(leds[3]) + self.comb += platform.request("user_led", 4).eq(leds[4]) + self.comb += platform.request("user_led", 5).eq(leds[5]) + self.comb += platform.request("user_led", 6).eq(leds[6]) + self.comb += platform.request("user_led", 7).eq(leds[7]) + + sbus_oe_data = Signal(reset=0) + sbus_oe_slave_in = Signal(reset=0) + sbus_oe_master_in = Signal(reset=0) + sbus_oe_int1 = Signal(reset=0) + sbus_oe_int7 = Signal(reset=0) + sbus_oe_master_br = Signal(reset=0) + + sbus_last_pa = Signal(28) + burst_index = Signal(4) + burst_counter = Signal(4) + burst_limit_m1 = Signal(4) + + #SBUS_3V3_CLK = Signal() + SBUS_3V3_ASs_i = Signal() + self.comb += SBUS_3V3_ASs_i.eq(pad_SBUS_3V3_ASs) + SBUS_3V3_BGs_i = Signal() + self.comb += SBUS_3V3_BGs_i.eq(pad_SBUS_3V3_BGs) + SBUS_3V3_BRs_o = Signal(reset=1) + self.specials += Tristate(pad_SBUS_3V3_BRs, SBUS_3V3_BRs_o, sbus_oe_master_br, None) + SBUS_3V3_ERRs_i = Signal() + SBUS_3V3_ERRs_o = Signal() + self.specials += Tristate(pad_SBUS_3V3_ERRs, SBUS_3V3_ERRs_o, sbus_oe_master_in, SBUS_3V3_ERRs_i) + SBUS_DATA_OE_LED_o = Signal() + self.comb += pad_SBUS_DATA_OE_LED.eq(SBUS_DATA_OE_LED_o) + SBUS_DATA_OE_LED_2_o = Signal() + self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) + #SBUS_3V3_RSTs = Signal() + SBUS_3V3_SELs_i = Signal() + self.comb += SBUS_3V3_SELs_i.eq(pad_SBUS_3V3_SELs) + SBUS_3V3_INT1s_o = Signal(reset=1) + self.specials += Tristate(pad_SBUS_3V3_INT1s, SBUS_3V3_INT1s_o, sbus_oe_int1, None) + SBUS_3V3_INT7s_o = Signal(reset=1) + self.specials += Tristate(pad_SBUS_3V3_INT7s, SBUS_3V3_INT7s_o, sbus_oe_int7, None) + SBUS_3V3_PPRD_i = Signal() + SBUS_3V3_PPRD_o = Signal() + self.specials += Tristate(pad_SBUS_3V3_PPRD, SBUS_3V3_PPRD_o, sbus_oe_slave_in, SBUS_3V3_PPRD_i) + #SBUS_OE_o = Signal() + self.comb += pad_SBUS_OE.eq(self.hold_reset) + SBUS_3V3_ACKs_i = Signal(3) + SBUS_3V3_ACKs_o = Signal(3) + self.specials += Tristate(pad_SBUS_3V3_ACKs, SBUS_3V3_ACKs_o, sbus_oe_master_in, SBUS_3V3_ACKs_i) + SBUS_3V3_SIZ_i = Signal(3) + SBUS_3V3_SIZ_o = Signal(3) + self.specials += Tristate(pad_SBUS_3V3_SIZ, SBUS_3V3_SIZ_o, sbus_oe_slave_in, SBUS_3V3_SIZ_i) + SBUS_3V3_D_i = Signal(32) + SBUS_3V3_D_o = Signal(32) + self.specials += Tristate(pad_SBUS_3V3_D, SBUS_3V3_D_o, sbus_oe_data, SBUS_3V3_D_i) + SBUS_3V3_PA_i = Signal(28) + self.comb += SBUS_3V3_PA_i.eq(pad_SBUS_3V3_PA) + + self.submodules.slave_fsm = slave_fsm = FSM(reset_state="Reset") + + p_data = Signal(32) # prom data + + slave_fsm.act("Reset", + NextValue(SBUS_DATA_OE_LED_o, 0), + NextValue(SBUS_DATA_OE_LED_2_o, 0), + NextValue(sbus_oe_int1, 0), + NextValue(sbus_oe_int7, 0), + NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextValue(sbus_oe_master_br, 0), + NextValue(p_data, 0), + NextValue(leds, 0x0F), + NextState("Start") + ) + slave_fsm.act("Start", + NextValue(SBUS_DATA_OE_LED_o, 0), + NextValue(SBUS_DATA_OE_LED_2_o, 0), + NextValue(sbus_oe_int1, 0), + NextValue(sbus_oe_int7, 0), + NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextValue(sbus_oe_master_br, 0), + NextValue(p_data, 0), + NextValue(leds, 0x01), + If((self.hold_reset == 0), NextState("Idle")) + ) + slave_fsm.act("Idle", + #NextValue(leds, 0x11), + If(((SBUS_3V3_SELs_i == 0) and + (SBUS_3V3_ASs_i == 0) and + (siz_is_word(SBUS_3V3_SIZ_i)) and + (SBUS_3V3_PPRD_i == 1)), + NextValue(SBUS_DATA_OE_LED_o, 1), + NextValue(sbus_oe_master_in, 1), + NextValue(sbus_last_pa, SBUS_3V3_PA_i), + NextValue(burst_counter, 0), + NextValue(burst_limit_m1, siz_to_burst_size_m1(SBUS_3V3_SIZ_i)), + If((SBUS_3V3_PA_i[16:28] == 0x000), + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(p_data, prom[SBUS_3V3_PA_i[2:16]]), + NextState("Slave_Ack_Read_Prom_Burst") + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Error") + ) + ).Elif(((SBUS_3V3_SELs_i == 0) and + (SBUS_3V3_ASs_i == 0) and + (SIZ_BYTE == SBUS_3V3_SIZ_i) and + (SBUS_3V3_PPRD_i == 1)), + NextValue(SBUS_DATA_OE_LED_2_o, 1), + NextValue(sbus_oe_master_in, 1), + NextValue(sbus_last_pa, SBUS_3V3_PA_i), + If((SBUS_3V3_PA_i[16:28] == 0x000), + NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), + NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(p_data, prom[SBUS_3V3_PA_i[2:16]]), + NextState("Slave_Ack_Read_Prom_Byte") + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Error") + ) + ) + ) + slave_fsm.act("Slave_Ack_Read_Prom_Burst", + NextValue(leds, 0x03), + NextValue(sbus_oe_data, 1), + NextValue(SBUS_3V3_D_o, p_data), + #NextValue(burst_index, index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[2:6])), + NextValue(p_data, prom[Cat(index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[2:6]), sbus_last_pa[6:16])]), + If((burst_counter == burst_limit_m1), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextState("Slave_Do_Read") + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextValue(burst_counter, burst_counter + 1) + ) + ) + slave_fsm.act("Slave_Ack_Read_Prom_Byte", + NextValue(leds, 0x0c), + NextValue(sbus_oe_data, 1), + If((sbus_last_pa[0:2] == 0x0), + NextValue(SBUS_3V3_D_o, Cat(C(0)[0:24], p_data[24:32])) + ).Elif((sbus_last_pa[0:2] == 0x1), + NextValue(SBUS_3V3_D_o, Cat(C(0)[0:24], p_data[16:24])) + ).Elif((sbus_last_pa[0:2] == 0x2), + NextValue(SBUS_3V3_D_o, Cat(C(0)[0:24], p_data[8:16])) + ).Elif((sbus_last_pa[0:2] == 0x3), + NextValue(SBUS_3V3_D_o, Cat(C(0)[0:24], p_data[0:8])) + ), + NextState("Slave_Do_Read") + ) + slave_fsm.act("Slave_Do_Read", + NextValue(leds, 0x30), + NextValue(sbus_oe_int1, 0), + NextValue(sbus_oe_int7, 0), + NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextValue(sbus_oe_master_br, 0), + If((SBUS_3V3_ASs_i == 1), + NextState("Idle") + ) + ) + slave_fsm.act("Slave_Error", + NextValue(leds, 0xc0), + NextValue(sbus_oe_int1, 0), + NextValue(sbus_oe_int7, 0), + NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextValue(sbus_oe_master_br, 0), + If((SBUS_3V3_ASs_i == 1), + NextState("Idle") + ) + ) From 156c2960c8f768fdc3b34b6d690046568be80dea Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 13 Jun 2021 12:42:35 -0400 Subject: [PATCH 02/78] more stuff, with some weird bugs... --- sbus-to-ztex-gateware-migen/sbus-to-fpga.py | 68 +++-- .../sbus_to_fpga_slave.py | 273 +++++++++++++++--- 2 files changed, 284 insertions(+), 57 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus-to-fpga.py b/sbus-to-ztex-gateware-migen/sbus-to-fpga.py index 3fdc22b..b20c632 100644 --- a/sbus-to-ztex-gateware-migen/sbus-to-fpga.py +++ b/sbus-to-ztex-gateware-migen/sbus-to-fpga.py @@ -8,6 +8,7 @@ from litex.soc.integration.soc import * from litex.soc.integration.soc_core import * from litex.soc.integration.builder import * from litex.soc.cores.clock import * +from litex.soc.cores.led import LedChaser from litex_boards.platforms import ztex213 from sbus_to_fpga_slave import *; @@ -29,7 +30,7 @@ _sbus_sbus = [ ("SBUS_3V3_ACKs", 0, Pins("M6 L6 N4"), IOStandard("lvttl")), ("SBUS_3V3_SIZ", 0, Pins("R7 U3 V1"), IOStandard("lvttl")), ("SBUS_3V3_D", 0, Pins("J18 K16 J17 K15 K13 J15 J13 J14 H14 H17 G14 G17 G16 G18 H16 F18 F16 E18 F15 D18 E17 G13 D17 F13 F14 E16 E15 C17 C16 A18 B18 C15"), IOStandard("lvttl")), - ("SBUS_3V3_PA", 0, Pins(" B16 B17 D14 C14 D12 A16 A15 B14 B13 B12 C12 A14 A13 B11 A11 M4 R2 M3 P2 M2 N2 K5 N1 L4 M1 L3 L1 K3"), IOStandard("lvttl")), + ("SBUS_3V3_PA", 0, Pins("B16 B17 D14 C14 D12 A16 A15 B14 B13 B12 C12 A14 A13 B11 A11 M4 R2 M3 P2 M2 N2 K5 N1 L4 M1 L3 L1 K3"), IOStandard("lvttl")), ] # CRG ---------------------------------------------------------------------------------------------- @@ -37,33 +38,39 @@ class _CRG(Module): def __init__(self, platform, sys_clk_freq): self.clock_domains.cd_sys = ClockDomain() self.clock_domains.cd_native = ClockDomain(reset_less=True) - self.clock_domains.cd_sbus = ClockDomain() + #self.clock_domains.cd_sbus = ClockDomain() self.clock_domains.cd_por = ClockDomain() # # # clk48 = platform.request("clk48") + self.cd_native.clk = clk48 clk_sbus = platform.request("SBUS_3V3_CLK") + self.cd_sys.clk = clk_sbus + rst_sbus = platform.request("SBUS_3V3_RSTs") - self.submodules.pll = pll = S7MMCM(speedgrade=-1) - pll.register_clkin(clk48, 48e6) - pll.create_clkout(self.cd_sys, sys_clk_freq) + #self.submodules.pll = pll = S7MMCM(speedgrade=-1) + #pll.register_clkin(clk48, 48e6) + #pll.create_clkout(self.cd_sys, sys_clk_freq) - self.comb += self.cd_sbus.clk.eq(clk_sbus) - self.comb += self.cd_sbus.rst.eq(~platform.request("SBUS_3V3_RSTs")) + #self.comb += self.cd_sbus.clk.eq(clk_sbus) + #self.comb += self.cd_sbus.rst.eq(~rst_sbus) + + #self.comb += self.cd_sys.clk.eq(clk_sbus) + self.comb += self.cd_sys.rst.eq(~rst_sbus) - self.comb += self.cd_native.clk.eq(clk48) + #self.comb += self.cd_native.clk.eq(clk48) - platform.add_false_path_constraints(self.cd_native.clk, self.cd_sbus.clk) - - # FIXME: add SBUS_3V3_RSTs + #platform.add_false_path_constraints(self.cd_native.clk, self.cd_sbus.clk) + platform.add_false_path_constraints(self.cd_native.clk, self.cd_sys.clk) + platform.add_false_path_constraints(self.cd_sys.clk, self.cd_native.clk) # Power on reset, 20 seconds - por_count = Signal(30, reset=20*48*1000000) - por_done = Signal() - self.comb += self.cd_por.clk.eq(clk48) - self.comb += por_done.eq(por_count == 0) - self.sync.por += If(~por_done, por_count.eq(por_count - 1)) - self.comb += pll.reset.eq(~por_done) + #por_count = Signal(30, reset=20*48*1000000) + #por_done = Signal() + #self.comb += self.cd_por.clk.eq(clk48) + #self.comb += por_done.eq(por_count == 0) + #self.sync.por += If(~por_done, por_count.eq(por_count - 1)) + #self.comb += pll.reset.eq(~por_done) class SBusFPGA(SoCCore): def __init__(self, **kwargs): @@ -73,14 +80,24 @@ class SBusFPGA(SoCCore): kwargs["with_uart"] = True kwargs["with_timer"] = False - self.sys_clk_freq = sys_clk_freq = 100e6 + self.sys_clk_freq = sys_clk_freq = 25e6 # SBus max self.platform = platform = ztex213.Platform(variant="ztex2.13a", expansion="sbus") self.platform.add_extension(_sbus_sbus) SoCCore.__init__(self, platform=platform, sys_clk_freq=sys_clk_freq, clk_freq=sys_clk_freq, **kwargs) + wb_mem_map = { + "prom": 0x00000000, + "csr" : 0x00040000, + } + self.mem_map.update(wb_mem_map) self.submodules.crg = _CRG(platform=platform, sys_clk_freq=sys_clk_freq) self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) +# self.submodules.leds = LedChaser( +# pads = platform.request_all("user_led"), +# sys_clk_freq = sys_clk_freq) +# self.add_csr("leds") + prom_file = "prom_mini.fc" prom_data = soc_core.get_mem_data(prom_file, "big") prom = Array(prom_data) @@ -88,15 +105,26 @@ class SBusFPGA(SoCCore): #for i in range(len(prom)): # print(hex(prom[i])) #print("\n****************************************\n") - #self.add_ram("prom", origin=0x0, size=2**14, contents=prom_data, mode="r") + self.add_ram("prom", origin=self.mem_map["prom"], size=2**16, contents=prom_data, mode="r") # for show #getattr(self,"prom").mem.init = prom_data #getattr(self,"prom").mem.depth = 2**14 + # don't enable anything on the SBus side for 20 seconds after power up + # this avoids FPGA initialization messing with the cold boot process + # requires us to reset the SPARCstation afterward so the FPGA board + # is properly identified hold_reset_ctr = Signal(30, reset=960000000) self.sync.native += If(hold_reset_ctr>0, hold_reset_ctr.eq(hold_reset_ctr - 1)) hold_reset = Signal(reset=1) self.comb += hold_reset.eq(~(hold_reset_ctr == 0)) - self.submodules.slave = ClockDomainsRenamer("sbus")(SBusFPGASlave(platform=self.platform, soc=self, prom=prom, hold_reset=hold_reset)) + + #self.submodules.sbus_slave = ClockDomainsRenamer("sbus")(SBusFPGASlave(platform=self.platform, soc=self, prom=prom, hold_reset=hold_reset)) + self.submodules.sbus_slave = SBusFPGASlave(platform=self.platform, + prom=prom, + hold_reset=hold_reset, + wishbone=wishbone.Interface(data_width=self.bus.data_width, adr_width=self.bus.address_width)) + + self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_slave.wishbone) # self.soc = Module() # self.soc.mem_regions = self.mem_regions = {} diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py index 5cd19ad..b165504 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py @@ -2,6 +2,7 @@ from migen import * from migen.genlib.fifo import SyncFIFOBuffered from migen.fhdl.specials import Tristate +from litex.soc.interconnect import wishbone SIZ_WORD = 0x0 SIZ_BYTE = 0x1 @@ -21,6 +22,14 @@ ACK_DWORD = 0x2 ACK_HWORD = 0x1 ACK_RECV = 0x0 +ADDR_PHYS_HIGH = 27 +ADDR_PHYS_LOW = 0 +ADDR_PFX_HIGH = ADDR_PHYS_HIGH +ADDR_PFX_LOW = 16 ## 64 KiB per prefix +ADDR_PFX_LENGTH = 12 #(1 + ADDR_PFX_HIGH - ADDR_PFX_LOW) +ROM_ADDR_PFX = C(0x000)[0:12] +WISHBONE_CSR_ADDR_PFX = C(0x004)[0:12] + def siz_is_word(siz): return (SIZ_WORD == siz) or (SIZ_BURST2 == siz) or (SIZ_BURST4 == siz) or (SIZ_BURST8 == siz) or (SIZ_BURST16 == siz) @@ -50,18 +59,93 @@ def siz_to_burst_size_m1(siz): return 15 return 1 -# siz_to_burst_size_m1 = { -# SIZ_WORD: 0, -# SIZ_BURST2: 1, -# SIZ_BURST4: 3, -# SIZ_BURST8: 7, -# SIZ_BURST16: 15 -# }; +class LedDisplay(Module): + def __init__(self, pads): + n = len(pads) + self.value = Signal(32, reset = 0x18244281) + old_value = Signal(32) + display = Signal(8) + + self.submodules.fsm = fsm = FSM(reset_state="Reset") + time_counter = Signal(32, reset = 0) + blink_counter = Signal(4, reset = 0) + self.comb += pads.eq(display) + fsm.act("Reset", + NextValue(time_counter, 25000000//10), + NextValue(blink_counter, 10), + NextValue(display, 0x00), + NextValue(old_value, self.value), + NextState("Quick")) + fsm.act("Quick", + If (old_value != self.value, + NextState("Reset") + ).Elif(time_counter == 0, + If (blink_counter == 0, + NextValue(time_counter, 25000000//2), + NextValue(display, self.value[0:8]), + NextState("Byte0") + ).Else( + NextValue(display, ~display), + NextValue(time_counter, 25000000//10), + NextValue(blink_counter, blink_counter - 1) + ) + ).Else( + NextValue(time_counter, time_counter - 1) + ) + ) + fsm.act("Byte0", + If (old_value != self.value, + NextState("Reset") + ).Elif(time_counter == 0, + NextValue(time_counter, 25000000//2), + NextValue(display, self.value[8:16]), + NextState("Byte1") + ).Else( + NextValue(time_counter, time_counter - 1) + ) + ) + fsm.act("Byte1", + If (old_value != self.value, + NextState("Reset") + ).Elif(time_counter == 0, + NextValue(time_counter, 25000000//2), + NextValue(display, self.value[16:24]), + NextState("Byte2") + ).Else( + NextValue(time_counter, time_counter - 1) + ) + ) + fsm.act("Byte2", + If (old_value != self.value, + NextState("Reset") + ).Elif(time_counter == 0, + NextValue(time_counter, 25000000//2), + NextValue(display, self.value[24:32]), + NextState("Byte3") + ).Else( + NextValue(time_counter, time_counter - 1) + ) + ) + fsm.act("Byte3", + If (old_value != self.value, + NextState("Reset") + ).Elif(time_counter == 0, + NextValue(time_counter, 25000000//10), + NextValue(blink_counter, 10), + NextValue(display, 0x00), + NextState("Quick") + ).Else( + NextValue(time_counter, time_counter - 1) + ) + ) class SBusFPGASlave(Module): - def __init__(self, platform, soc, prom, hold_reset): - + def __init__(self, platform, prom, hold_reset, wishbone): + self.platform = platform self.hold_reset = hold_reset + self.wishbone = wishbone + + self.submodules.led_display = LedDisplay(pads=platform.request_all("user_led")) #pad_SBUS_3V3_CLK = platform.request("SBUS_3V3_CLK") pad_SBUS_3V3_ASs = platform.request("SBUS_3V3_ASs") @@ -80,16 +164,18 @@ class SBusFPGASlave(Module): pad_SBUS_3V3_SIZ = platform.request("SBUS_3V3_SIZ") pad_SBUS_3V3_D = platform.request("SBUS_3V3_D") pad_SBUS_3V3_PA = platform.request("SBUS_3V3_PA") + assert len(pad_SBUS_3V3_D) == 32, "len(pad_SBUS_3V3_D) should be 32" + assert len(pad_SBUS_3V3_PA) == 28, "len(pad_SBUS_3V3_PA) should be 28" - leds = Signal(8, reset=0xF0) - self.comb += platform.request("user_led", 0).eq(leds[0]) - self.comb += platform.request("user_led", 1).eq(leds[1]) - self.comb += platform.request("user_led", 2).eq(leds[2]) - self.comb += platform.request("user_led", 3).eq(leds[3]) - self.comb += platform.request("user_led", 4).eq(leds[4]) - self.comb += platform.request("user_led", 5).eq(leds[5]) - self.comb += platform.request("user_led", 6).eq(leds[6]) - self.comb += platform.request("user_led", 7).eq(leds[7]) + #leds = Signal(8, reset=0xF0) + #self.comb += platform.request("user_led", 0).eq(leds[0]) + #self.comb += platform.request("user_led", 1).eq(leds[1]) + #self.comb += platform.request("user_led", 2).eq(leds[2]) + #self.comb += platform.request("user_led", 3).eq(leds[3]) + #self.comb += platform.request("user_led", 4).eq(leds[4]) + #self.comb += platform.request("user_led", 5).eq(leds[5]) + #self.comb += platform.request("user_led", 6).eq(leds[6]) + #self.comb += platform.request("user_led", 7).eq(leds[7]) sbus_oe_data = Signal(reset=0) sbus_oe_slave_in = Signal(reset=0) @@ -143,7 +229,11 @@ class SBusFPGASlave(Module): self.submodules.slave_fsm = slave_fsm = FSM(reset_state="Reset") - p_data = Signal(32) # prom data + p_data = Signal(32) # prom data to read + + csr_data_w_data = Signal(32) # csr data to write + csr_data_w_addr = Signal(32) # address thereof + csr_data_w_we = Signal(reset = 0) # write enable slave_fsm.act("Reset", NextValue(SBUS_DATA_OE_LED_o, 0), @@ -155,7 +245,7 @@ class SBusFPGASlave(Module): NextValue(sbus_oe_master_in, 0), NextValue(sbus_oe_master_br, 0), NextValue(p_data, 0), - NextValue(leds, 0x0F), + #NextValue(leds, 0x0F), NextState("Start") ) slave_fsm.act("Start", @@ -168,7 +258,7 @@ class SBusFPGASlave(Module): NextValue(sbus_oe_master_in, 0), NextValue(sbus_oe_master_br, 0), NextValue(p_data, 0), - NextValue(leds, 0x01), + #NextValue(leds, 0x01), If((self.hold_reset == 0), NextState("Idle")) ) slave_fsm.act("Idle", @@ -176,47 +266,80 @@ class SBusFPGASlave(Module): If(((SBUS_3V3_SELs_i == 0) and (SBUS_3V3_ASs_i == 0) and (siz_is_word(SBUS_3V3_SIZ_i)) and - (SBUS_3V3_PPRD_i == 1)), + (SBUS_3V3_PPRD_i == 1) and + (SBUS_3V3_PA_i[0:2] == 0)), NextValue(SBUS_DATA_OE_LED_o, 1), + NextValue(SBUS_DATA_OE_LED_2_o, 0), NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), NextValue(burst_counter, 0), NextValue(burst_limit_m1, siz_to_burst_size_m1(SBUS_3V3_SIZ_i)), - If((SBUS_3V3_PA_i[16:28] == 0x000), + If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX), NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextValue(SBUS_3V3_ERRs_o, 1), - NextValue(p_data, prom[SBUS_3V3_PA_i[2:16]]), + NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), NextState("Slave_Ack_Read_Prom_Burst") + ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX), + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(1)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), + NextValue(p_data, Cat(SBUS_3V3_PA_i, C(1)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), # FIXME + NextState("Slave_Ack_Read_Reg_Burst") ).Else( + NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(1)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") ) ).Elif(((SBUS_3V3_SELs_i == 0) and - (SBUS_3V3_ASs_i == 0) and - (SIZ_BYTE == SBUS_3V3_SIZ_i) and - (SBUS_3V3_PPRD_i == 1)), + (SBUS_3V3_ASs_i == 0) and + (SIZ_BYTE == SBUS_3V3_SIZ_i) and + (SBUS_3V3_PPRD_i == 1)), + NextValue(SBUS_DATA_OE_LED_o, 1), + NextValue(SBUS_DATA_OE_LED_2_o, 0), + NextValue(sbus_oe_master_in, 1), + NextValue(sbus_last_pa, SBUS_3V3_PA_i), + If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX), + NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), + NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), + NextState("Slave_Ack_Read_Prom_Byte") + ).Else( + NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(2)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Error") + ) + ).Elif(((SBUS_3V3_SELs_i == 0) and + (SBUS_3V3_ASs_i == 0) and + (siz_is_word(SBUS_3V3_SIZ_i)) and + (SBUS_3V3_PPRD_i == 0) and + (SBUS_3V3_PA_i[0:2] == 0)), + NextValue(SBUS_DATA_OE_LED_o, 0), NextValue(SBUS_DATA_OE_LED_2_o, 1), NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), - If((SBUS_3V3_PA_i[16:28] == 0x000), - NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), + NextValue(burst_counter, 0), + NextValue(burst_limit_m1, siz_to_burst_size_m1(SBUS_3V3_SIZ_i)), + If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX), + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextValue(SBUS_3V3_ERRs_o, 1), - NextValue(p_data, prom[SBUS_3V3_PA_i[2:16]]), - NextState("Slave_Ack_Read_Prom_Byte") + NextState("Slave_Ack_Reg_Write_Burst") ).Else( + NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(3)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") ) ) ) + # ##### READ ##### slave_fsm.act("Slave_Ack_Read_Prom_Burst", - NextValue(leds, 0x03), + #NextValue(leds, 0x03), NextValue(sbus_oe_data, 1), NextValue(SBUS_3V3_D_o, p_data), - #NextValue(burst_index, index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[2:6])), - NextValue(p_data, prom[Cat(index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[2:6]), sbus_last_pa[6:16])]), + #NextValue(burst_index, index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])), + NextValue(p_data, prom[Cat(index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW])]), If((burst_counter == burst_limit_m1), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), NextState("Slave_Do_Read") @@ -226,7 +349,7 @@ class SBusFPGASlave(Module): ) ) slave_fsm.act("Slave_Ack_Read_Prom_Byte", - NextValue(leds, 0x0c), + #NextValue(leds, 0x0c), NextValue(sbus_oe_data, 1), If((sbus_last_pa[0:2] == 0x0), NextValue(SBUS_3V3_D_o, Cat(C(0)[0:24], p_data[24:32])) @@ -240,7 +363,7 @@ class SBusFPGASlave(Module): NextState("Slave_Do_Read") ) slave_fsm.act("Slave_Do_Read", - NextValue(leds, 0x30), + #NextValue(leds, 0x30), NextValue(sbus_oe_int1, 0), NextValue(sbus_oe_int7, 0), NextValue(sbus_oe_data, 0), @@ -251,8 +374,53 @@ class SBusFPGASlave(Module): NextState("Idle") ) ) + slave_fsm.act("Slave_Ack_Read_Reg_Burst", + #NextValue(leds, 0x03), + NextValue(sbus_oe_data, 1), + NextValue(SBUS_3V3_D_o, p_data), # FIXME + NextValue(p_data, Cat(C(0)[0:2],index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PHYS_HIGH+1], C(0)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), # FIXME + If((burst_counter == burst_limit_m1), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextState("Slave_Do_Read") + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextValue(burst_counter, burst_counter + 1) + ) + ) + # ##### WRITE ##### + slave_fsm.act("Slave_Ack_Reg_Write_Burst", + #NextValue(leds, 0x03), + #NextValue(burst_index, index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])), + NextValue(csr_data_w_data, SBUS_3V3_D_i), + NextValue(csr_data_w_addr, Cat(C(0)[0:2], + index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), + sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], + WISHBONE_CSR_ADDR_PFX)), + NextValue(csr_data_w_we, 1), + If((burst_counter == burst_limit_m1), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextState("Slave_Ack_Reg_Write_Final") + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextValue(burst_counter, burst_counter + 1) + ) + ) + slave_fsm.act("Slave_Ack_Reg_Write_Final", + NextValue(sbus_oe_int1, 0), + NextValue(sbus_oe_int7, 0), + NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextValue(sbus_oe_master_br, 0), + If((SBUS_3V3_ASs_i == 1), + NextState("Idle") + ) + ) + # ##### ERROR ##### slave_fsm.act("Slave_Error", - NextValue(leds, 0xc0), + #NextValue(leds, 0xc0), + NextValue(SBUS_DATA_OE_LED_o, 1), + NextValue(SBUS_DATA_OE_LED_2_o, 1), NextValue(sbus_oe_int1, 0), NextValue(sbus_oe_int7, 0), NextValue(sbus_oe_data, 0), @@ -263,3 +431,34 @@ class SBusFPGASlave(Module): NextState("Idle") ) ) + + # ##### Iface to WB ##### + + + self.submodules.wb_fsm = wb_fsm = FSM(reset_state="Reset") + wb_fsm.act("Reset", + self.wishbone.we.eq(0), + self.wishbone.cyc.eq(0), + self.wishbone.stb.eq(0), + self.wishbone.sel.eq(2**len(self.wishbone.sel)-1), + NextState("Idle") + ) + wb_fsm.act("Idle", + If(csr_data_w_we, + self.wishbone.adr.eq(csr_data_w_addr), + self.wishbone.dat_w.eq(csr_data_w_data), + self.wishbone.we.eq(1), + self.wishbone.cyc.eq(1), + self.wishbone.stb.eq(1), + NextValue(csr_data_w_we, 0), + NextState("Wait") + ) + ) + wb_fsm.act("Wait", + If(self.wishbone.ack, + self.wishbone.we.eq(0), + self.wishbone.cyc.eq(0), + self.wishbone.stb.eq(0), + NextState("Idle") + ) + ) From 914e44a4638548a05c29d942066a6b560010d3fc Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 13 Jun 2021 13:37:24 -0400 Subject: [PATCH 03/78] grrrr, syntax --- .../sbus_to_fpga_slave.py | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py index b165504..e881e36 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py @@ -31,7 +31,7 @@ ROM_ADDR_PFX = C(0x000)[0:12] WISHBONE_CSR_ADDR_PFX = C(0x004)[0:12] def siz_is_word(siz): - return (SIZ_WORD == siz) or (SIZ_BURST2 == siz) or (SIZ_BURST4 == siz) or (SIZ_BURST8 == siz) or (SIZ_BURST16 == siz) + return (SIZ_WORD == siz) | (SIZ_BURST2 == siz) | (SIZ_BURST4 == siz) | (SIZ_BURST8 == siz) | (SIZ_BURST16 == siz) def index_with_wrap(counter, limit_m1, value): if (limit_m1 == 0): @@ -46,6 +46,7 @@ def index_with_wrap(counter, limit_m1, value): return (value + counter)[0:4] return value[0:4] +# FIXME: this doesn't work. Verilog aways use 1 def siz_to_burst_size_m1(siz): if (SIZ_WORD == siz): return 0 @@ -59,6 +60,7 @@ def siz_to_burst_size_m1(siz): return 15 return 1 + class LedDisplay(Module): def __init__(self, pads): n = len(pads) @@ -263,17 +265,22 @@ class SBusFPGASlave(Module): ) slave_fsm.act("Idle", #NextValue(leds, 0x11), - If(((SBUS_3V3_SELs_i == 0) and - (SBUS_3V3_ASs_i == 0) and - (siz_is_word(SBUS_3V3_SIZ_i)) and - (SBUS_3V3_PPRD_i == 1) and + If(((SBUS_3V3_SELs_i == 0) & + (SBUS_3V3_ASs_i == 0) & + (siz_is_word(SBUS_3V3_SIZ_i)) & + (SBUS_3V3_PPRD_i == 1) & (SBUS_3V3_PA_i[0:2] == 0)), NextValue(SBUS_DATA_OE_LED_o, 1), NextValue(SBUS_DATA_OE_LED_2_o, 0), NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), NextValue(burst_counter, 0), - NextValue(burst_limit_m1, siz_to_burst_size_m1(SBUS_3V3_SIZ_i)), + Case(SBUS_3V3_SIZ_i, { + SIZ_WORD: NextValue(burst_limit_m1, 0), + SIZ_BURST2: NextValue(burst_limit_m1, 1), + SIZ_BURST4: NextValue(burst_limit_m1, 3), + SIZ_BURST8: NextValue(burst_limit_m1, 7), + SIZ_BURST16: NextValue(burst_limit_m1, 15)}), If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX), NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextValue(SBUS_3V3_ERRs_o, 1), @@ -291,9 +298,9 @@ class SBusFPGASlave(Module): NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") ) - ).Elif(((SBUS_3V3_SELs_i == 0) and - (SBUS_3V3_ASs_i == 0) and - (SIZ_BYTE == SBUS_3V3_SIZ_i) and + ).Elif(((SBUS_3V3_SELs_i == 0) & + (SBUS_3V3_ASs_i == 0) & + (SIZ_BYTE == SBUS_3V3_SIZ_i) & (SBUS_3V3_PPRD_i == 1)), NextValue(SBUS_DATA_OE_LED_o, 1), NextValue(SBUS_DATA_OE_LED_2_o, 0), @@ -310,17 +317,22 @@ class SBusFPGASlave(Module): NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") ) - ).Elif(((SBUS_3V3_SELs_i == 0) and - (SBUS_3V3_ASs_i == 0) and - (siz_is_word(SBUS_3V3_SIZ_i)) and - (SBUS_3V3_PPRD_i == 0) and + ).Elif(((SBUS_3V3_SELs_i == 0) & + (SBUS_3V3_ASs_i == 0) & + (siz_is_word(SBUS_3V3_SIZ_i)) & + (SBUS_3V3_PPRD_i == 0) & (SBUS_3V3_PA_i[0:2] == 0)), NextValue(SBUS_DATA_OE_LED_o, 0), NextValue(SBUS_DATA_OE_LED_2_o, 1), NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), NextValue(burst_counter, 0), - NextValue(burst_limit_m1, siz_to_burst_size_m1(SBUS_3V3_SIZ_i)), + Case(SBUS_3V3_SIZ_i, { + SIZ_WORD: NextValue(burst_limit_m1, 0), + SIZ_BURST2: NextValue(burst_limit_m1, 1), + SIZ_BURST4: NextValue(burst_limit_m1, 3), + SIZ_BURST8: NextValue(burst_limit_m1, 7), + SIZ_BURST16: NextValue(burst_limit_m1, 15)}), If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX), NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextValue(SBUS_3V3_ERRs_o, 1), From 94a2a7eb7a8cba9d2207ed6b50bdd2d76e29b846 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 13 Jun 2021 14:19:55 -0400 Subject: [PATCH 04/78] more grrrr syntax --- .../sbus_to_fpga_slave.py | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py index e881e36..81f6767 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py @@ -289,11 +289,11 @@ class SBusFPGASlave(Module): ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX), NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextValue(SBUS_3V3_ERRs_o, 1), - NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(1)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), + #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(1)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), NextValue(p_data, Cat(SBUS_3V3_PA_i, C(1)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), # FIXME NextState("Slave_Ack_Read_Reg_Burst") ).Else( - NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(1)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), + #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(1)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") @@ -302,8 +302,8 @@ class SBusFPGASlave(Module): (SBUS_3V3_ASs_i == 0) & (SIZ_BYTE == SBUS_3V3_SIZ_i) & (SBUS_3V3_PPRD_i == 1)), - NextValue(SBUS_DATA_OE_LED_o, 1), - NextValue(SBUS_DATA_OE_LED_2_o, 0), + NextValue(SBUS_DATA_OE_LED_o, 0), + NextValue(SBUS_DATA_OE_LED_2_o, 1), NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX), @@ -312,7 +312,7 @@ class SBusFPGASlave(Module): NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), NextState("Slave_Ack_Read_Prom_Byte") ).Else( - NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(2)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), + #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(2)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") @@ -322,8 +322,8 @@ class SBusFPGASlave(Module): (siz_is_word(SBUS_3V3_SIZ_i)) & (SBUS_3V3_PPRD_i == 0) & (SBUS_3V3_PA_i[0:2] == 0)), - NextValue(SBUS_DATA_OE_LED_o, 0), - NextValue(SBUS_DATA_OE_LED_2_o, 1), + #NextValue(SBUS_DATA_OE_LED_o, 0), + #NextValue(SBUS_DATA_OE_LED_2_o, 1), NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), NextValue(burst_counter, 0), @@ -338,7 +338,7 @@ class SBusFPGASlave(Module): NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Ack_Reg_Write_Burst") ).Else( - NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(3)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), + #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(3)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") @@ -363,14 +363,15 @@ class SBusFPGASlave(Module): slave_fsm.act("Slave_Ack_Read_Prom_Byte", #NextValue(leds, 0x0c), NextValue(sbus_oe_data, 1), + NextValue(self.led_display.value, sbus_last_pa[0:2]), If((sbus_last_pa[0:2] == 0x0), - NextValue(SBUS_3V3_D_o, Cat(C(0)[0:24], p_data[24:32])) + NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[24:32])) ).Elif((sbus_last_pa[0:2] == 0x1), - NextValue(SBUS_3V3_D_o, Cat(C(0)[0:24], p_data[16:24])) + NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[16:24])) ).Elif((sbus_last_pa[0:2] == 0x2), - NextValue(SBUS_3V3_D_o, Cat(C(0)[0:24], p_data[8:16])) + NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[ 8:16])) ).Elif((sbus_last_pa[0:2] == 0x3), - NextValue(SBUS_3V3_D_o, Cat(C(0)[0:24], p_data[0:8])) + NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[ 0: 8])) ), NextState("Slave_Do_Read") ) From 49b4dae59dcac2374a9d3c8ef5de311d88eee629 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Mon, 14 Jun 2021 09:20:44 -0400 Subject: [PATCH 05/78] more cleanups, Chaser still not going to control mode :-/ --- sbus-to-ztex-gateware-migen/sbus-to-fpga.py | 11 ++- .../sbus_to_fpga_slave.py | 96 +++++++++++-------- 2 files changed, 63 insertions(+), 44 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus-to-fpga.py b/sbus-to-ztex-gateware-migen/sbus-to-fpga.py index b20c632..98e426d 100644 --- a/sbus-to-ztex-gateware-migen/sbus-to-fpga.py +++ b/sbus-to-ztex-gateware-migen/sbus-to-fpga.py @@ -93,10 +93,10 @@ class SBusFPGA(SoCCore): self.submodules.crg = _CRG(platform=platform, sys_clk_freq=sys_clk_freq) self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) -# self.submodules.leds = LedChaser( -# pads = platform.request_all("user_led"), -# sys_clk_freq = sys_clk_freq) -# self.add_csr("leds") + self.submodules.leds = LedChaser( + pads = platform.request_all("user_led"), + sys_clk_freq = sys_clk_freq) + self.add_csr("leds") prom_file = "prom_mini.fc" prom_data = soc_core.get_mem_data(prom_file, "big") @@ -122,7 +122,8 @@ class SBusFPGA(SoCCore): self.submodules.sbus_slave = SBusFPGASlave(platform=self.platform, prom=prom, hold_reset=hold_reset, - wishbone=wishbone.Interface(data_width=self.bus.data_width, adr_width=self.bus.address_width)) + wishbone=wishbone.Interface(data_width=self.bus.data_width, adr_width=self.bus.address_width), + chaser=self.leds) self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_slave.wishbone) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py index 81f6767..0c0a562 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py @@ -27,12 +27,13 @@ ADDR_PHYS_LOW = 0 ADDR_PFX_HIGH = ADDR_PHYS_HIGH ADDR_PFX_LOW = 16 ## 64 KiB per prefix ADDR_PFX_LENGTH = 12 #(1 + ADDR_PFX_HIGH - ADDR_PFX_LOW) -ROM_ADDR_PFX = C(0x000)[0:12] -WISHBONE_CSR_ADDR_PFX = C(0x004)[0:12] +ROM_ADDR_PFX = Signal(12, reset = 0) +WISHBONE_CSR_ADDR_PFX = Signal(12, reset = 4) def siz_is_word(siz): return (SIZ_WORD == siz) | (SIZ_BURST2 == siz) | (SIZ_BURST4 == siz) | (SIZ_BURST8 == siz) | (SIZ_BURST16 == siz) +# FIXME: this doesn't work. Verilog aways use 1 def index_with_wrap(counter, limit_m1, value): if (limit_m1 == 0): return value[0:4] @@ -46,6 +47,10 @@ def index_with_wrap(counter, limit_m1, value): return (value + counter)[0:4] return value[0:4] +# doesn't compile +#def index_with_wrap(counter, limit_m1, value): +# return Cat((value+counter)[0:limit_m1], value[limit_m1:4]) + # FIXME: this doesn't work. Verilog aways use 1 def siz_to_burst_size_m1(siz): if (SIZ_WORD == siz): @@ -60,7 +65,6 @@ def siz_to_burst_size_m1(siz): return 15 return 1 - class LedDisplay(Module): def __init__(self, pads): n = len(pads) @@ -140,14 +144,15 @@ class LedDisplay(Module): NextValue(time_counter, time_counter - 1) ) ) - + class SBusFPGASlave(Module): - def __init__(self, platform, prom, hold_reset, wishbone): + def __init__(self, platform, prom, hold_reset, wishbone, chaser): self.platform = platform self.hold_reset = hold_reset self.wishbone = wishbone + self.chaser = chaser - self.submodules.led_display = LedDisplay(pads=platform.request_all("user_led")) + #self.submodules.led_display = LedDisplay(pads=platform.request_all("user_led")) #pad_SBUS_3V3_CLK = platform.request("SBUS_3V3_CLK") pad_SBUS_3V3_ASs = platform.request("SBUS_3V3_ASs") @@ -238,8 +243,8 @@ class SBusFPGASlave(Module): csr_data_w_we = Signal(reset = 0) # write enable slave_fsm.act("Reset", - NextValue(SBUS_DATA_OE_LED_o, 0), - NextValue(SBUS_DATA_OE_LED_2_o, 0), + #NextValue(SBUS_DATA_OE_LED_o, 0), + #NextValue(SBUS_DATA_OE_LED_2_o, 0), NextValue(sbus_oe_int1, 0), NextValue(sbus_oe_int7, 0), NextValue(sbus_oe_data, 0), @@ -251,8 +256,8 @@ class SBusFPGASlave(Module): NextState("Start") ) slave_fsm.act("Start", - NextValue(SBUS_DATA_OE_LED_o, 0), - NextValue(SBUS_DATA_OE_LED_2_o, 0), + #NextValue(SBUS_DATA_OE_LED_o, 0), + #NextValue(SBUS_DATA_OE_LED_2_o, 0), NextValue(sbus_oe_int1, 0), NextValue(sbus_oe_int7, 0), NextValue(sbus_oe_data, 0), @@ -270,8 +275,8 @@ class SBusFPGASlave(Module): (siz_is_word(SBUS_3V3_SIZ_i)) & (SBUS_3V3_PPRD_i == 1) & (SBUS_3V3_PA_i[0:2] == 0)), - NextValue(SBUS_DATA_OE_LED_o, 1), - NextValue(SBUS_DATA_OE_LED_2_o, 0), + #NextValue(SBUS_DATA_OE_LED_o, 1), + #NextValue(SBUS_DATA_OE_LED_2_o, 0), NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), NextValue(burst_counter, 0), @@ -289,11 +294,11 @@ class SBusFPGASlave(Module): ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX), NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextValue(SBUS_3V3_ERRs_o, 1), - #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(1)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), - NextValue(p_data, Cat(SBUS_3V3_PA_i, C(1)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), # FIXME + #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, Signal(2, reset = 1), SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), + NextValue(p_data, Cat(SBUS_3V3_PA_i, Signal(2, reset = 1), SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), # FIXME NextState("Slave_Ack_Read_Reg_Burst") ).Else( - #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(1)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), + #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, Signal(2, reset = 1), SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") @@ -302,8 +307,8 @@ class SBusFPGASlave(Module): (SBUS_3V3_ASs_i == 0) & (SIZ_BYTE == SBUS_3V3_SIZ_i) & (SBUS_3V3_PPRD_i == 1)), - NextValue(SBUS_DATA_OE_LED_o, 0), - NextValue(SBUS_DATA_OE_LED_2_o, 1), + #NextValue(SBUS_DATA_OE_LED_o, 0), + #NextValue(SBUS_DATA_OE_LED_2_o, 1), NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX), @@ -312,7 +317,7 @@ class SBusFPGASlave(Module): NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), NextState("Slave_Ack_Read_Prom_Byte") ).Else( - #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(2)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), + #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, Signal(2, reset = 2), SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") @@ -322,7 +327,7 @@ class SBusFPGASlave(Module): (siz_is_word(SBUS_3V3_SIZ_i)) & (SBUS_3V3_PPRD_i == 0) & (SBUS_3V3_PA_i[0:2] == 0)), - #NextValue(SBUS_DATA_OE_LED_o, 0), + #NextValue(SBUS_DATA_OE_LED_o, 1), #NextValue(SBUS_DATA_OE_LED_2_o, 1), NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), @@ -338,7 +343,7 @@ class SBusFPGASlave(Module): NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Ack_Reg_Write_Burst") ).Else( - #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, C(3)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), + #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, Signal(2, reset = 3), SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") @@ -352,6 +357,13 @@ class SBusFPGASlave(Module): NextValue(SBUS_3V3_D_o, p_data), #NextValue(burst_index, index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])), NextValue(p_data, prom[Cat(index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW])]), + Case(burst_limit_m1, { + 0: NextValue(p_data, prom[Cat(((burst_counter+1)+sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])[0:4], sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW])]), + 1: NextValue(p_data, prom[Cat(((burst_counter+1)+sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])[0:1], sbus_last_pa[ADDR_PHYS_LOW+3:ADDR_PFX_LOW])]), + 3: NextValue(p_data, prom[Cat(((burst_counter+1)+sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])[0:2], sbus_last_pa[ADDR_PHYS_LOW+4:ADDR_PFX_LOW])]), + 7: NextValue(p_data, prom[Cat(((burst_counter+1)+sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])[0:3], sbus_last_pa[ADDR_PHYS_LOW+5:ADDR_PFX_LOW])]), + 15: NextValue(p_data, prom[Cat(((burst_counter+1)+sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])[0:4], sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW])]), + }), If((burst_counter == burst_limit_m1), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), NextState("Slave_Do_Read") @@ -363,7 +375,7 @@ class SBusFPGASlave(Module): slave_fsm.act("Slave_Ack_Read_Prom_Byte", #NextValue(leds, 0x0c), NextValue(sbus_oe_data, 1), - NextValue(self.led_display.value, sbus_last_pa[0:2]), + #NextValue(self.led_display.value, sbus_last_pa[0:2]), If((sbus_last_pa[0:2] == 0x0), NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[24:32])) ).Elif((sbus_last_pa[0:2] == 0x1), @@ -391,7 +403,7 @@ class SBusFPGASlave(Module): #NextValue(leds, 0x03), NextValue(sbus_oe_data, 1), NextValue(SBUS_3V3_D_o, p_data), # FIXME - NextValue(p_data, Cat(C(0)[0:2],index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PHYS_HIGH+1], C(0)[0:2], SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), # FIXME + NextValue(p_data, Cat(Signal(2, reset = 0),index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PHYS_HIGH+1], Signal(2, reset = 0), SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), # FIXME If((burst_counter == burst_limit_m1), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), NextState("Slave_Do_Read") @@ -402,13 +414,16 @@ class SBusFPGASlave(Module): ) # ##### WRITE ##### slave_fsm.act("Slave_Ack_Reg_Write_Burst", + #NextValue(SBUS_DATA_OE_LED_o, 1), + #NextValue(SBUS_DATA_OE_LED_2_o, 1), #NextValue(leds, 0x03), #NextValue(burst_index, index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])), NextValue(csr_data_w_data, SBUS_3V3_D_i), - NextValue(csr_data_w_addr, Cat(C(0)[0:2], - index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), - sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], - WISHBONE_CSR_ADDR_PFX)), + #NextValue(csr_data_w_addr, Cat(Signal(2, reset = 0), + # index_with_wrap(burst_counter, burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), + # sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], + # WISHBONE_CSR_ADDR_PFX)), + NextValue(csr_data_w_addr, 0x00040000), NextValue(csr_data_w_we, 1), If((burst_counter == burst_limit_m1), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), @@ -419,6 +434,8 @@ class SBusFPGASlave(Module): ) ) slave_fsm.act("Slave_Ack_Reg_Write_Final", + #NextValue(SBUS_DATA_OE_LED_o, 1), + #NextValue(SBUS_DATA_OE_LED_2_o, 0), NextValue(sbus_oe_int1, 0), NextValue(sbus_oe_int7, 0), NextValue(sbus_oe_data, 0), @@ -432,8 +449,8 @@ class SBusFPGASlave(Module): # ##### ERROR ##### slave_fsm.act("Slave_Error", #NextValue(leds, 0xc0), - NextValue(SBUS_DATA_OE_LED_o, 1), - NextValue(SBUS_DATA_OE_LED_2_o, 1), + #NextValue(SBUS_DATA_OE_LED_o, 1), + #NextValue(SBUS_DATA_OE_LED_2_o, 1), NextValue(sbus_oe_int1, 0), NextValue(sbus_oe_int7, 0), NextValue(sbus_oe_data, 0), @@ -446,8 +463,6 @@ class SBusFPGASlave(Module): ) # ##### Iface to WB ##### - - self.submodules.wb_fsm = wb_fsm = FSM(reset_state="Reset") wb_fsm.act("Reset", self.wishbone.we.eq(0), @@ -457,18 +472,21 @@ class SBusFPGASlave(Module): NextState("Idle") ) wb_fsm.act("Idle", - If(csr_data_w_we, - self.wishbone.adr.eq(csr_data_w_addr), - self.wishbone.dat_w.eq(csr_data_w_data), - self.wishbone.we.eq(1), - self.wishbone.cyc.eq(1), - self.wishbone.stb.eq(1), + If(csr_data_w_we == 1, + #NextValue(SBUS_DATA_OE_LED_o, 0), + NextValue(SBUS_DATA_OE_LED_2_o, 1), NextValue(csr_data_w_we, 0), - NextState("Wait") + NextState("Write") ) ) - wb_fsm.act("Wait", - If(self.wishbone.ack, + wb_fsm.act("Write", + self.wishbone.adr.eq(csr_data_w_addr), + self.wishbone.dat_w.eq(csr_data_w_data), + self.wishbone.we.eq(1), + self.wishbone.cyc.eq(1), + self.wishbone.stb.eq(1), + If(self.wishbone.ack == 1, + NextValue(SBUS_DATA_OE_LED_o, 1), self.wishbone.we.eq(0), self.wishbone.cyc.eq(0), self.wishbone.stb.eq(0), From 5c66170a3a23a950e5579da0ce21d8962683473b Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Tue, 15 Jun 2021 02:31:07 -0400 Subject: [PATCH 06/78] Wishbone is word-addressed (still no lock) --- sbus-to-ztex-gateware-migen/sbus-to-fpga.py | 2 +- sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus-to-fpga.py b/sbus-to-ztex-gateware-migen/sbus-to-fpga.py index 98e426d..57b0653 100644 --- a/sbus-to-ztex-gateware-migen/sbus-to-fpga.py +++ b/sbus-to-ztex-gateware-migen/sbus-to-fpga.py @@ -122,7 +122,7 @@ class SBusFPGA(SoCCore): self.submodules.sbus_slave = SBusFPGASlave(platform=self.platform, prom=prom, hold_reset=hold_reset, - wishbone=wishbone.Interface(data_width=self.bus.data_width, adr_width=self.bus.address_width), + wishbone=wishbone.Interface(data_width=self.bus.data_width), chaser=self.leds) self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_slave.wishbone) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py index 0c0a562..dfae0fb 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py @@ -480,7 +480,7 @@ class SBusFPGASlave(Module): ) ) wb_fsm.act("Write", - self.wishbone.adr.eq(csr_data_w_addr), + self.wishbone.adr.eq(csr_data_w_addr[2:32]), self.wishbone.dat_w.eq(csr_data_w_data), self.wishbone.we.eq(1), self.wishbone.cyc.eq(1), From 98ec770e689be1bde759cc9fad7c847d50fd17f5 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Tue, 15 Jun 2021 05:32:36 -0400 Subject: [PATCH 07/78] fix wishbone.sel, now the PROM can control the LEDs :-) --- sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py index dfae0fb..21ecb57 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py @@ -468,7 +468,6 @@ class SBusFPGASlave(Module): self.wishbone.we.eq(0), self.wishbone.cyc.eq(0), self.wishbone.stb.eq(0), - self.wishbone.sel.eq(2**len(self.wishbone.sel)-1), NextState("Idle") ) wb_fsm.act("Idle", @@ -485,6 +484,7 @@ class SBusFPGASlave(Module): self.wishbone.we.eq(1), self.wishbone.cyc.eq(1), self.wishbone.stb.eq(1), + self.wishbone.sel.eq(2**len(self.wishbone.sel)-1), If(self.wishbone.ack == 1, NextValue(SBUS_DATA_OE_LED_o, 1), self.wishbone.we.eq(0), From 0759bad4023011c443839ab45bc03a10ef933e14 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Tue, 15 Jun 2021 09:59:51 -0400 Subject: [PATCH 08/78] Add a FIFO for CDC between SBus (in cd_sbus) and Wishbone (in a 100 MHz cd_sys) --- sbus-to-ztex-gateware-migen/sbus-to-fpga.py | 71 ++++++++++--------- .../sbus_to_fpga_slave.py | 62 +++++----------- .../sbus_to_fpga_wishbone.py | 42 +++++++++++ 3 files changed, 97 insertions(+), 78 deletions(-) create mode 100644 sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py diff --git a/sbus-to-ztex-gateware-migen/sbus-to-fpga.py b/sbus-to-ztex-gateware-migen/sbus-to-fpga.py index 57b0653..770cfa6 100644 --- a/sbus-to-ztex-gateware-migen/sbus-to-fpga.py +++ b/sbus-to-ztex-gateware-migen/sbus-to-fpga.py @@ -10,8 +10,10 @@ from litex.soc.integration.builder import * from litex.soc.cores.clock import * from litex.soc.cores.led import LedChaser from litex_boards.platforms import ztex213 +from migen.genlib.fifo import * from sbus_to_fpga_slave import *; +from sbus_to_fpga_wishbone import *; _sbus_sbus = [ ("SBUS_3V3_CLK", 0, Pins("D15"), IOStandard("lvttl")), @@ -38,39 +40,34 @@ class _CRG(Module): def __init__(self, platform, sys_clk_freq): self.clock_domains.cd_sys = ClockDomain() self.clock_domains.cd_native = ClockDomain(reset_less=True) - #self.clock_domains.cd_sbus = ClockDomain() + self.clock_domains.cd_sbus = ClockDomain() self.clock_domains.cd_por = ClockDomain() # # # clk48 = platform.request("clk48") self.cd_native.clk = clk48 clk_sbus = platform.request("SBUS_3V3_CLK") - self.cd_sys.clk = clk_sbus + self.cd_sbus.clk = clk_sbus rst_sbus = platform.request("SBUS_3V3_RSTs") - #self.submodules.pll = pll = S7MMCM(speedgrade=-1) - #pll.register_clkin(clk48, 48e6) - #pll.create_clkout(self.cd_sys, sys_clk_freq) + self.comb += self.cd_sbus.rst.eq(~rst_sbus) - #self.comb += self.cd_sbus.clk.eq(clk_sbus) - #self.comb += self.cd_sbus.rst.eq(~rst_sbus) + self.submodules.pll = pll = S7MMCM(speedgrade=-1) + pll.register_clkin(clk48, 48e6) + pll.create_clkout(self.cd_sys, sys_clk_freq) + + platform.add_false_path_constraints(self.cd_native.clk, self.cd_sbus.clk) + platform.add_false_path_constraints(self.cd_sys.clk, self.cd_sbus.clk) + platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_native.clk) + platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_sys.clk) - #self.comb += self.cd_sys.clk.eq(clk_sbus) - self.comb += self.cd_sys.rst.eq(~rst_sbus) - - #self.comb += self.cd_native.clk.eq(clk48) - - #platform.add_false_path_constraints(self.cd_native.clk, self.cd_sbus.clk) - platform.add_false_path_constraints(self.cd_native.clk, self.cd_sys.clk) - platform.add_false_path_constraints(self.cd_sys.clk, self.cd_native.clk) - - # Power on reset, 20 seconds - #por_count = Signal(30, reset=20*48*1000000) - #por_done = Signal() - #self.comb += self.cd_por.clk.eq(clk48) - #self.comb += por_done.eq(por_count == 0) - #self.sync.por += If(~por_done, por_count.eq(por_count - 1)) - #self.comb += pll.reset.eq(~por_done) + # Power on reset, reset propagate from SBus to SYS + por_count = Signal(16, reset=2**16-1) + por_done = Signal() + self.comb += self.cd_por.clk.eq(clk48) + self.comb += por_done.eq(por_count == 0) + self.sync.por += If(~por_done, por_count.eq(por_count - 1)) + self.comb += pll.reset.eq(~por_done | ~rst_sbus) class SBusFPGA(SoCCore): def __init__(self, **kwargs): @@ -80,7 +77,7 @@ class SBusFPGA(SoCCore): kwargs["with_uart"] = True kwargs["with_timer"] = False - self.sys_clk_freq = sys_clk_freq = 25e6 # SBus max + self.sys_clk_freq = sys_clk_freq = 100e6 self.platform = platform = ztex213.Platform(variant="ztex2.13a", expansion="sbus") self.platform.add_extension(_sbus_sbus) @@ -91,7 +88,7 @@ class SBusFPGA(SoCCore): } self.mem_map.update(wb_mem_map) self.submodules.crg = _CRG(platform=platform, sys_clk_freq=sys_clk_freq) - self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) + self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) # SBus max self.submodules.leds = LedChaser( pads = platform.request_all("user_led"), @@ -113,19 +110,27 @@ class SBusFPGA(SoCCore): # this avoids FPGA initialization messing with the cold boot process # requires us to reset the SPARCstation afterward so the FPGA board # is properly identified + # This is in the 'native' ClockDomain that is never reset hold_reset_ctr = Signal(30, reset=960000000) self.sync.native += If(hold_reset_ctr>0, hold_reset_ctr.eq(hold_reset_ctr - 1)) hold_reset = Signal(reset=1) self.comb += hold_reset.eq(~(hold_reset_ctr == 0)) - - #self.submodules.sbus_slave = ClockDomainsRenamer("sbus")(SBusFPGASlave(platform=self.platform, soc=self, prom=prom, hold_reset=hold_reset)) - self.submodules.sbus_slave = SBusFPGASlave(platform=self.platform, - prom=prom, - hold_reset=hold_reset, - wishbone=wishbone.Interface(data_width=self.bus.data_width), - chaser=self.leds) - self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_slave.wishbone) + + + + sbus_to_wishbone_fifo = AsyncFIFOBuffered(width=32+30, depth=8) + sbus_to_wishbone_fifo = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(sbus_to_wishbone_fifo) + self.submodules += sbus_to_wishbone_fifo + self.submodules.sbus_to_wishbone = SBusToWishbone(fifo=sbus_to_wishbone_fifo, wishbone=wishbone.Interface(data_width=self.bus.data_width)) + + _sbus_slave = SBusFPGASlave(platform=self.platform, + prom=prom, + hold_reset=hold_reset, + write_fifo=sbus_to_wishbone_fifo) + self.submodules.sbus_slave = ClockDomainsRenamer("sbus")(_sbus_slave) + + self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_to_wishbone.wishbone) # self.soc = Module() # self.soc.mem_regions = self.mem_regions = {} diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py index 21ecb57..d30911f 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py @@ -1,8 +1,6 @@ from migen import * -from migen.genlib.fifo import SyncFIFOBuffered from migen.fhdl.specials import Tristate -from litex.soc.interconnect import wishbone SIZ_WORD = 0x0 SIZ_BYTE = 0x1 @@ -33,7 +31,7 @@ WISHBONE_CSR_ADDR_PFX = Signal(12, reset = 4) def siz_is_word(siz): return (SIZ_WORD == siz) | (SIZ_BURST2 == siz) | (SIZ_BURST4 == siz) | (SIZ_BURST8 == siz) | (SIZ_BURST16 == siz) -# FIXME: this doesn't work. Verilog aways use 1 +# FIXME: this doesn't work. Verilog aways use value[0:4] def index_with_wrap(counter, limit_m1, value): if (limit_m1 == 0): return value[0:4] @@ -146,11 +144,10 @@ class LedDisplay(Module): ) class SBusFPGASlave(Module): - def __init__(self, platform, prom, hold_reset, wishbone, chaser): + def __init__(self, platform, prom, hold_reset, write_fifo): self.platform = platform self.hold_reset = hold_reset - self.wishbone = wishbone - self.chaser = chaser + self.write_fifo = write_fifo #self.submodules.led_display = LedDisplay(pads=platform.request_all("user_led")) @@ -238,9 +235,9 @@ class SBusFPGASlave(Module): p_data = Signal(32) # prom data to read - csr_data_w_data = Signal(32) # csr data to write - csr_data_w_addr = Signal(32) # address thereof - csr_data_w_we = Signal(reset = 0) # write enable + #csr_data_w_data = Signal(32) # csr data to write + #csr_data_w_addr = Signal(32) # address thereof + #csr_data_w_we = Signal(reset = 0) # write enable slave_fsm.act("Reset", #NextValue(SBUS_DATA_OE_LED_o, 0), @@ -326,7 +323,8 @@ class SBusFPGASlave(Module): (SBUS_3V3_ASs_i == 0) & (siz_is_word(SBUS_3V3_SIZ_i)) & (SBUS_3V3_PPRD_i == 0) & - (SBUS_3V3_PA_i[0:2] == 0)), + (SBUS_3V3_PA_i[0:2] == 0) & + (self.write_fifo.writable)), # maybe we should check for enough space? not that we'll encounter write burst... #NextValue(SBUS_DATA_OE_LED_o, 1), #NextValue(SBUS_DATA_OE_LED_2_o, 1), NextValue(sbus_oe_master_in, 1), @@ -418,13 +416,19 @@ class SBusFPGASlave(Module): #NextValue(SBUS_DATA_OE_LED_2_o, 1), #NextValue(leds, 0x03), #NextValue(burst_index, index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])), - NextValue(csr_data_w_data, SBUS_3V3_D_i), #NextValue(csr_data_w_addr, Cat(Signal(2, reset = 0), # index_with_wrap(burst_counter, burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), # sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], # WISHBONE_CSR_ADDR_PFX)), - NextValue(csr_data_w_addr, 0x00040000), - NextValue(csr_data_w_we, 1), + #NextValue(csr_data_w_data, SBUS_3V3_D_i), + #NextValue(csr_data_w_addr, 0x00040000), + #NextValue(csr_data_w_we, 1), + self.write_fifo.din.eq(Cat(index_with_wrap(burst_counter, burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), # 4 bits, adr FIXME + sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], # 10 bits, adr + WISHBONE_CSR_ADDR_PFX, # 12 bits, adr + Signal(4, reset = 0), # 4 bits, adr (could be removed) + SBUS_3V3_D_i)), # 32 bits, data + self.write_fifo.we.eq(1), If((burst_counter == burst_limit_m1), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), NextState("Slave_Ack_Reg_Write_Final") @@ -461,35 +465,3 @@ class SBusFPGASlave(Module): NextState("Idle") ) ) - - # ##### Iface to WB ##### - self.submodules.wb_fsm = wb_fsm = FSM(reset_state="Reset") - wb_fsm.act("Reset", - self.wishbone.we.eq(0), - self.wishbone.cyc.eq(0), - self.wishbone.stb.eq(0), - NextState("Idle") - ) - wb_fsm.act("Idle", - If(csr_data_w_we == 1, - #NextValue(SBUS_DATA_OE_LED_o, 0), - NextValue(SBUS_DATA_OE_LED_2_o, 1), - NextValue(csr_data_w_we, 0), - NextState("Write") - ) - ) - wb_fsm.act("Write", - self.wishbone.adr.eq(csr_data_w_addr[2:32]), - self.wishbone.dat_w.eq(csr_data_w_data), - self.wishbone.we.eq(1), - self.wishbone.cyc.eq(1), - self.wishbone.stb.eq(1), - self.wishbone.sel.eq(2**len(self.wishbone.sel)-1), - If(self.wishbone.ack == 1, - NextValue(SBUS_DATA_OE_LED_o, 1), - self.wishbone.we.eq(0), - self.wishbone.cyc.eq(0), - self.wishbone.stb.eq(0), - NextState("Idle") - ) - ) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py new file mode 100644 index 0000000..2b20b3d --- /dev/null +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py @@ -0,0 +1,42 @@ + +from migen import * +from litex.soc.interconnect import wishbone + +class SBusToWishbone(Module): + def __init__(self, fifo, wishbone): + self.fifo = fifo + self.wishbone = wishbone + + data = Signal(32) + adr = Signal(30) + + # ##### Iface to WB ##### + self.submodules.wb_fsm = wb_fsm = FSM(reset_state="Reset") + wb_fsm.act("Reset", + self.wishbone.we.eq(0), + self.wishbone.cyc.eq(0), + self.wishbone.stb.eq(0), + NextState("Idle") + ) + wb_fsm.act("Idle", + If(fifo.readable & ~self.wishbone.cyc, + fifo.re.eq(1), + NextValue(adr, fifo.dout[0:30]), + NextValue(data, fifo.dout[30:62]), + NextState("Write") + ) + ) + wb_fsm.act("Write", + self.wishbone.adr.eq(adr), + self.wishbone.dat_w.eq(data), + self.wishbone.we.eq(1), + self.wishbone.cyc.eq(1), + self.wishbone.stb.eq(1), + self.wishbone.sel.eq(2**len(self.wishbone.sel)-1), + If(self.wishbone.ack == 1, + self.wishbone.we.eq(0), + self.wishbone.cyc.eq(0), + self.wishbone.stb.eq(0), + NextState("Idle") + ) + ) From 3b1a8bf5cf97a601a66d0a4f58da9443629f74bc Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Tue, 15 Jun 2021 12:03:04 -0400 Subject: [PATCH 09/78] read/write registers in Wishbone space (burst untested) --- sbus-to-ztex-gateware-migen/sbus-to-fpga.py | 23 +++- .../sbus_to_fpga_slave.py | 118 +++++++++++------- .../sbus_to_fpga_wishbone.py | 46 +++++-- 3 files changed, 128 insertions(+), 59 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus-to-fpga.py b/sbus-to-ztex-gateware-migen/sbus-to-fpga.py index 770cfa6..28b2c3e 100644 --- a/sbus-to-ztex-gateware-migen/sbus-to-fpga.py +++ b/sbus-to-ztex-gateware-migen/sbus-to-fpga.py @@ -119,15 +119,28 @@ class SBusFPGA(SoCCore): - sbus_to_wishbone_fifo = AsyncFIFOBuffered(width=32+30, depth=8) - sbus_to_wishbone_fifo = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(sbus_to_wishbone_fifo) - self.submodules += sbus_to_wishbone_fifo - self.submodules.sbus_to_wishbone = SBusToWishbone(fifo=sbus_to_wishbone_fifo, wishbone=wishbone.Interface(data_width=self.bus.data_width)) + sbus_to_wishbone_wr_fifo = AsyncFIFOBuffered(width=32+30, depth=16) + sbus_to_wishbone_wr_fifo = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(sbus_to_wishbone_wr_fifo) + self.submodules += sbus_to_wishbone_wr_fifo + + sbus_to_wishbone_rd_fifo_addr = AsyncFIFOBuffered(width=30, depth=16) + sbus_to_wishbone_rd_fifo_addr = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(sbus_to_wishbone_rd_fifo_addr) + self.submodules += sbus_to_wishbone_rd_fifo_addr + sbus_to_wishbone_rd_fifo_data = AsyncFIFOBuffered(width=32, depth=16) + sbus_to_wishbone_rd_fifo_data = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(sbus_to_wishbone_rd_fifo_data) + self.submodules += sbus_to_wishbone_rd_fifo_data + + self.submodules.sbus_to_wishbone = SBusToWishbone(wr_fifo=sbus_to_wishbone_wr_fifo, + rd_fifo_addr=sbus_to_wishbone_rd_fifo_addr, + rd_fifo_data=sbus_to_wishbone_rd_fifo_data, + wishbone=wishbone.Interface(data_width=self.bus.data_width)) _sbus_slave = SBusFPGASlave(platform=self.platform, prom=prom, hold_reset=hold_reset, - write_fifo=sbus_to_wishbone_fifo) + wr_fifo=sbus_to_wishbone_wr_fifo, + rd_fifo_addr=sbus_to_wishbone_rd_fifo_addr, + rd_fifo_data=sbus_to_wishbone_rd_fifo_data,) self.submodules.sbus_slave = ClockDomainsRenamer("sbus")(_sbus_slave) self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_to_wishbone.wishbone) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py index d30911f..e83b6a5 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py @@ -32,22 +32,21 @@ def siz_is_word(siz): return (SIZ_WORD == siz) | (SIZ_BURST2 == siz) | (SIZ_BURST4 == siz) | (SIZ_BURST8 == siz) | (SIZ_BURST16 == siz) # FIXME: this doesn't work. Verilog aways use value[0:4] -def index_with_wrap(counter, limit_m1, value): - if (limit_m1 == 0): - return value[0:4] - elif (limit_m1 == 1): - return Cat((value + counter)[0:1], value[1:4]) - elif (limit_m1 == 3): - return Cat((value + counter)[0:2], value[2:4]) - elif (limit_m1 == 7): - return Cat((value + counter)[0:3], value[3:4]) - elif (limit_m1 == 15): - return (value + counter)[0:4] - return value[0:4] +#def _index_with_wrap(counter, limit_m1, value): +# if (limit_m1 == 0): +# return value[0:4] +# elif (limit_m1 == 1): +# return Cat((value + counter)[0:1], value[1:4]) +# elif (limit_m1 == 3): +# return Cat((value + counter)[0:2], value[2:4]) +# elif (limit_m1 == 7): +# return Cat((value + counter)[0:3], value[3:4]) +# elif (limit_m1 == 15): +# return (value + counter)[0:4] +# return value[0:4] -# doesn't compile -#def index_with_wrap(counter, limit_m1, value): -# return Cat((value+counter)[0:limit_m1], value[limit_m1:4]) +def index_with_wrap(counter, limit_m1, value): + return ((value+counter) & limit_m1)[0:4] | (value&(~limit_m1))[0:4] # FIXME: this doesn't work. Verilog aways use 1 def siz_to_burst_size_m1(siz): @@ -144,10 +143,12 @@ class LedDisplay(Module): ) class SBusFPGASlave(Module): - def __init__(self, platform, prom, hold_reset, write_fifo): + def __init__(self, platform, prom, hold_reset, wr_fifo, rd_fifo_addr, rd_fifo_data): self.platform = platform self.hold_reset = hold_reset - self.write_fifo = write_fifo + self.wr_fifo = wr_fifo + self.rd_fifo_addr = rd_fifo_addr + self.rd_fifo_data = rd_fifo_data #self.submodules.led_display = LedDisplay(pads=platform.request_all("user_led")) @@ -231,14 +232,13 @@ class SBusFPGASlave(Module): SBUS_3V3_PA_i = Signal(28) self.comb += SBUS_3V3_PA_i.eq(pad_SBUS_3V3_PA) + p_data = Signal(32) # data to read/write + + data_read_addr = Signal(30) # first addr of req. when reading from WB + data_read_enable = Signal() # start enqueuing req. to read from WB + self.submodules.slave_fsm = slave_fsm = FSM(reset_state="Reset") - p_data = Signal(32) # prom data to read - - #csr_data_w_data = Signal(32) # csr data to write - #csr_data_w_addr = Signal(32) # address thereof - #csr_data_w_we = Signal(reset = 0) # write enable - slave_fsm.act("Reset", #NextValue(SBUS_DATA_OE_LED_o, 0), #NextValue(SBUS_DATA_OE_LED_2_o, 0), @@ -287,13 +287,16 @@ class SBusFPGASlave(Module): NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextValue(SBUS_3V3_ERRs_o, 1), NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), + NextValue(SBUS_DATA_OE_LED_o, 1), NextState("Slave_Ack_Read_Prom_Burst") ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX), - NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), - #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, Signal(2, reset = 1), SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), - NextValue(p_data, Cat(SBUS_3V3_PA_i, Signal(2, reset = 1), SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), # FIXME - NextState("Slave_Ack_Read_Reg_Burst") + NextValue(p_data, 0xDEADBEEF), + NextValue(data_read_addr, (Cat(SBUS_3V3_PA_i[2:], Signal(4, reset=0)))), # enqueue all the request to the wishbone + NextValue(data_read_enable, 1), # enqueue all the request to the wishbone + NextValue(SBUS_DATA_OE_LED_2_o, 1), + NextState("Slave_Ack_Read_Reg_Burst_Wait_For_Data") ).Else( #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, Signal(2, reset = 1), SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), @@ -324,7 +327,7 @@ class SBusFPGASlave(Module): (siz_is_word(SBUS_3V3_SIZ_i)) & (SBUS_3V3_PPRD_i == 0) & (SBUS_3V3_PA_i[0:2] == 0) & - (self.write_fifo.writable)), # maybe we should check for enough space? not that we'll encounter write burst... + (self.wr_fifo.writable)), # maybe we should check for enough space? not that we'll encounter write burst... #NextValue(SBUS_DATA_OE_LED_o, 1), #NextValue(SBUS_DATA_OE_LED_2_o, 1), NextValue(sbus_oe_master_in, 1), @@ -355,13 +358,6 @@ class SBusFPGASlave(Module): NextValue(SBUS_3V3_D_o, p_data), #NextValue(burst_index, index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])), NextValue(p_data, prom[Cat(index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW])]), - Case(burst_limit_m1, { - 0: NextValue(p_data, prom[Cat(((burst_counter+1)+sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])[0:4], sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW])]), - 1: NextValue(p_data, prom[Cat(((burst_counter+1)+sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])[0:1], sbus_last_pa[ADDR_PHYS_LOW+3:ADDR_PFX_LOW])]), - 3: NextValue(p_data, prom[Cat(((burst_counter+1)+sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])[0:2], sbus_last_pa[ADDR_PHYS_LOW+4:ADDR_PFX_LOW])]), - 7: NextValue(p_data, prom[Cat(((burst_counter+1)+sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])[0:3], sbus_last_pa[ADDR_PHYS_LOW+5:ADDR_PFX_LOW])]), - 15: NextValue(p_data, prom[Cat(((burst_counter+1)+sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])[0:4], sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW])]), - }), If((burst_counter == burst_limit_m1), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), NextState("Slave_Do_Read") @@ -398,18 +394,31 @@ class SBusFPGASlave(Module): ) ) slave_fsm.act("Slave_Ack_Read_Reg_Burst", - #NextValue(leds, 0x03), NextValue(sbus_oe_data, 1), - NextValue(SBUS_3V3_D_o, p_data), # FIXME - NextValue(p_data, Cat(Signal(2, reset = 0),index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PHYS_HIGH+1], Signal(2, reset = 0), SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), # FIXME + NextValue(SBUS_3V3_D_o, p_data), If((burst_counter == burst_limit_m1), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), NextState("Slave_Do_Read") ).Else( - NextValue(SBUS_3V3_ACKs_o, ACK_WORD), - NextValue(burst_counter, burst_counter + 1) + NextValue(burst_counter, burst_counter + 1), + If(rd_fifo_data.readable, + NextValue(p_data, rd_fifo_data.dout), + rd_fifo_data.re.eq(1), + NextValue(SBUS_3V3_ACKs_o, ACK_WORD) + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextState("Slave_Ack_Read_Reg_Burst_Wait_For_Data") + ) ) ) + slave_fsm.act("Slave_Ack_Read_Reg_Burst_Wait_For_Data", + If(rd_fifo_data.readable, + NextValue(p_data, rd_fifo_data.dout), + rd_fifo_data.re.eq(1), + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextState("Slave_Ack_Read_Reg_Burst") + ) + ) # ##### WRITE ##### slave_fsm.act("Slave_Ack_Reg_Write_Burst", #NextValue(SBUS_DATA_OE_LED_o, 1), @@ -423,12 +432,12 @@ class SBusFPGASlave(Module): #NextValue(csr_data_w_data, SBUS_3V3_D_i), #NextValue(csr_data_w_addr, 0x00040000), #NextValue(csr_data_w_we, 1), - self.write_fifo.din.eq(Cat(index_with_wrap(burst_counter, burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), # 4 bits, adr FIXME + self.wr_fifo.din.eq(Cat(index_with_wrap(burst_counter, burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), # 4 bits, adr FIXME sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], # 10 bits, adr WISHBONE_CSR_ADDR_PFX, # 12 bits, adr Signal(4, reset = 0), # 4 bits, adr (could be removed) SBUS_3V3_D_i)), # 32 bits, data - self.write_fifo.we.eq(1), + self.wr_fifo.we.eq(1), If((burst_counter == burst_limit_m1), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), NextState("Slave_Ack_Reg_Write_Final") @@ -465,3 +474,28 @@ class SBusFPGASlave(Module): NextState("Idle") ) ) + + self.submodules.request_fsm = request_fsm = FSM(reset_state="Reset") + request_fsm.act("Reset", + NextState("Idle") + ) + request_fsm.act("Idle", + If(data_read_enable, + NextValue(data_read_enable, 0), + self.rd_fifo_addr.we.eq(1), + self.rd_fifo_addr.din.eq(data_read_addr), + If (burst_limit_m1 != burst_counter, # 0 the first time + NextValue(burst_counter, burst_counter + 1), + NextState("Queue") + ) + ) + ) + request_fsm.act("Queue", + self.rd_fifo_addr.we.eq(1), + self.rd_fifo_addr.din.eq(Cat(index_with_wrap(burst_counter, burst_limit_m1, data_read_addr[0:4]), data_read_addr[4:])), + If (burst_limit_m1 != burst_counter, + NextValue(burst_counter, burst_counter + 1), + ).Else( + NextState("Idle") + ) + ) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py index 2b20b3d..33cb1ef 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py @@ -3,37 +3,59 @@ from migen import * from litex.soc.interconnect import wishbone class SBusToWishbone(Module): - def __init__(self, fifo, wishbone): - self.fifo = fifo + def __init__(self, wr_fifo, rd_fifo_addr, rd_fifo_data, wishbone): + self.wr_fifo = wr_fifo + self.rd_fifo_addr = rd_fifo_addr + self.rd_fifo_data = rd_fifo_data self.wishbone = wishbone data = Signal(32) adr = Signal(30) - # ##### Iface to WB ##### - self.submodules.wb_fsm = wb_fsm = FSM(reset_state="Reset") - wb_fsm.act("Reset", + # ##### FSM: write to WB ##### + self.submodules.fsm = fsm = FSM(reset_state="Reset") + fsm.act("Reset", self.wishbone.we.eq(0), self.wishbone.cyc.eq(0), self.wishbone.stb.eq(0), NextState("Idle") ) - wb_fsm.act("Idle", - If(fifo.readable & ~self.wishbone.cyc, - fifo.re.eq(1), - NextValue(adr, fifo.dout[0:30]), - NextValue(data, fifo.dout[30:62]), + fsm.act("Idle", + If(self.wr_fifo.readable & ~self.wishbone.cyc, + self.wr_fifo.re.eq(1), + NextValue(adr, self.wr_fifo.dout[0:30]), + NextValue(data, self.wr_fifo.dout[30:62]), NextState("Write") + ), + If (rd_fifo_addr.readable & ~self.wishbone.cyc & self.rd_fifo_data.writable, + rd_fifo_addr.re.eq(1), + NextValue(adr, self.rd_fifo_addr.dout[0:30]), + NextState("Read") ) ) - wb_fsm.act("Write", + fsm.act("Write", self.wishbone.adr.eq(adr), self.wishbone.dat_w.eq(data), self.wishbone.we.eq(1), self.wishbone.cyc.eq(1), self.wishbone.stb.eq(1), self.wishbone.sel.eq(2**len(self.wishbone.sel)-1), - If(self.wishbone.ack == 1, + If(self.wishbone.ack, + self.wishbone.we.eq(0), + self.wishbone.cyc.eq(0), + self.wishbone.stb.eq(0), + NextState("Idle") + ) + ) + fsm.act("Read", + self.wishbone.adr.eq(adr), + self.wishbone.we.eq(0), + self.wishbone.cyc.eq(1), + self.wishbone.stb.eq(1), + self.wishbone.sel.eq(2**len(self.wishbone.sel)-1), + If(self.wishbone.ack, + self.rd_fifo_data.we.eq(1), + self.rd_fifo_data.din.eq(self.wishbone.dat_r), self.wishbone.we.eq(0), self.wishbone.cyc.eq(0), self.wishbone.stb.eq(0), From 8f4d05c872e00d00070019a2e1e3674b27244786 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Thu, 17 Jun 2021 10:00:08 -0400 Subject: [PATCH 10/78] timeout to avoid hang when something on the wishbone is a bit slow --- .../sbus_to_fpga_fsm.py | 488 ++++++++++++++++++ .../sbus_to_fpga_soc.py | 196 +++++++ .../sbus_to_fpga_wishbone.py | 103 ++-- 3 files changed, 747 insertions(+), 40 deletions(-) create mode 100644 sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py create mode 100644 sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py new file mode 100644 index 0000000..cac43c0 --- /dev/null +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -0,0 +1,488 @@ + +from migen import * +from migen.fhdl.specials import Tristate + +SIZ_WORD = 0x0 +SIZ_BYTE = 0x1 +SIZ_HWORD = 0x2 +SIZ_EXT = 0x3 +SIZ_BURST4 = 0x4 +SIZ_BURST8 = 0x5 +SIZ_BURST16 = 0x6 +SIZ_BURST2 = 0x7 + +ACK_IDLE = 0x7 +ACK_ERR = 0x6 +ACK_BYTE = 0x5 +ACK_RERUN = 0x4 +ACK_WORD = 0x3 +ACK_DWORD = 0x2 +ACK_HWORD = 0x1 +ACK_RECV = 0x0 + +ADDR_PHYS_HIGH = 27 +ADDR_PHYS_LOW = 0 +ADDR_PFX_HIGH = ADDR_PHYS_HIGH +ADDR_PFX_LOW = 16 ## 64 KiB per prefix +ADDR_PFX_LENGTH = 12 #(1 + ADDR_PFX_HIGH - ADDR_PFX_LOW) +ROM_ADDR_PFX = Signal(12, reset = 0) +WISHBONE_CSR_ADDR_PFX = Signal(12, reset = 4) +USBOHCI_ADDR_PFX = Signal(12, reset = 8) + +def siz_is_word(siz): + return (SIZ_WORD == siz) | (SIZ_BURST2 == siz) | (SIZ_BURST4 == siz) | (SIZ_BURST8 == siz) | (SIZ_BURST16 == siz) + +# FIXME: this doesn't work. Verilog aways use value[0:4] +#def _index_with_wrap(counter, limit_m1, value): +# if (limit_m1 == 0): +# return value[0:4] +# elif (limit_m1 == 1): +# return Cat((value + counter)[0:1], value[1:4]) +# elif (limit_m1 == 3): +# return Cat((value + counter)[0:2], value[2:4]) +# elif (limit_m1 == 7): +# return Cat((value + counter)[0:3], value[3:4]) +# elif (limit_m1 == 15): +# return (value + counter)[0:4] +# return value[0:4] + +def index_with_wrap(counter, limit_m1, value): + return ((value+counter) & limit_m1)[0:4] | (value&(~limit_m1))[0:4] + +# FIXME: this doesn't work. Verilog aways use 1 +def siz_to_burst_size_m1(siz): + if (SIZ_WORD == siz): + return 0 + elif (SIZ_BURST2 == siz): + return 1 + elif (SIZ_BURST4 == siz): + return 3 + elif (SIZ_BURST8 == siz): + return 7 + elif (SIZ_BURST16 == siz): + return 15 + return 1 + +class LedDisplay(Module): + def __init__(self, pads): + n = len(pads) + self.value = Signal(32, reset = 0x18244281) + old_value = Signal(32) + display = Signal(8) + + self.submodules.fsm = fsm = FSM(reset_state="Reset") + time_counter = Signal(32, reset = 0) + blink_counter = Signal(4, reset = 0) + self.comb += pads.eq(display) + fsm.act("Reset", + NextValue(time_counter, 25000000//10), + NextValue(blink_counter, 10), + NextValue(display, 0x00), + NextValue(old_value, self.value), + NextState("Quick")) + fsm.act("Quick", + If (old_value != self.value, + NextState("Reset") + ).Elif(time_counter == 0, + If (blink_counter == 0, + NextValue(time_counter, 25000000//2), + NextValue(display, self.value[0:8]), + NextState("Byte0") + ).Else( + NextValue(display, ~display), + NextValue(time_counter, 25000000//10), + NextValue(blink_counter, blink_counter - 1) + ) + ).Else( + NextValue(time_counter, time_counter - 1) + ) + ) + fsm.act("Byte0", + If (old_value != self.value, + NextState("Reset") + ).Elif(time_counter == 0, + NextValue(time_counter, 25000000//2), + NextValue(display, self.value[8:16]), + NextState("Byte1") + ).Else( + NextValue(time_counter, time_counter - 1) + ) + ) + fsm.act("Byte1", + If (old_value != self.value, + NextState("Reset") + ).Elif(time_counter == 0, + NextValue(time_counter, 25000000//2), + NextValue(display, self.value[16:24]), + NextState("Byte2") + ).Else( + NextValue(time_counter, time_counter - 1) + ) + ) + fsm.act("Byte2", + If (old_value != self.value, + NextState("Reset") + ).Elif(time_counter == 0, + NextValue(time_counter, 25000000//2), + NextValue(display, self.value[24:32]), + NextState("Byte3") + ).Else( + NextValue(time_counter, time_counter - 1) + ) + ) + fsm.act("Byte3", + If (old_value != self.value, + NextState("Reset") + ).Elif(time_counter == 0, + NextValue(time_counter, 25000000//10), + NextValue(blink_counter, 10), + NextValue(display, 0x00), + NextState("Quick") + ).Else( + NextValue(time_counter, time_counter - 1) + ) + ) + +class SBusFPGABus(Module): + def __init__(self, platform, prom, hold_reset, wr_fifo, rd_fifo_addr, rd_fifo_data): + self.platform = platform + self.hold_reset = hold_reset + self.wr_fifo = wr_fifo + self.rd_fifo_addr = rd_fifo_addr + self.rd_fifo_data = rd_fifo_data + + #self.submodules.led_display = LedDisplay(pads=platform.request_all("user_led")) + + #pad_SBUS_3V3_CLK = platform.request("SBUS_3V3_CLK") + pad_SBUS_3V3_ASs = platform.request("SBUS_3V3_ASs") + pad_SBUS_3V3_BGs = platform.request("SBUS_3V3_BGs") + pad_SBUS_3V3_BRs = platform.request("SBUS_3V3_BRs") + pad_SBUS_3V3_ERRs = platform.request("SBUS_3V3_ERRs") + pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") + ###pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") + #pad_SBUS_3V3_RSTs = platform.request("SBUS_3V3_RSTs") + pad_SBUS_3V3_SELs = platform.request("SBUS_3V3_SELs") + #pad_SBUS_3V3_INT1s = platform.request("SBUS_3V3_INT1s") + #pad_SBUS_3V3_INT7s = platform.request("SBUS_3V3_INT7s") + pad_SBUS_3V3_PPRD = platform.request("SBUS_3V3_PPRD") + pad_SBUS_OE = platform.request("SBUS_OE") + pad_SBUS_3V3_ACKs = platform.request("SBUS_3V3_ACKs") + pad_SBUS_3V3_SIZ = platform.request("SBUS_3V3_SIZ") + pad_SBUS_3V3_D = platform.request("SBUS_3V3_D") + pad_SBUS_3V3_PA = platform.request("SBUS_3V3_PA") + assert len(pad_SBUS_3V3_D) == 32, "len(pad_SBUS_3V3_D) should be 32" + assert len(pad_SBUS_3V3_PA) == 28, "len(pad_SBUS_3V3_PA) should be 28" + + sbus_oe_data = Signal(reset=0) + sbus_oe_slave_in = Signal(reset=0) + sbus_oe_master_in = Signal(reset=0) + sbus_oe_int1 = Signal(reset=0) + sbus_oe_int7 = Signal(reset=0) + sbus_oe_master_br = Signal(reset=0) + + sbus_last_pa = Signal(28) + burst_index = Signal(4) + burst_counter = Signal(4) + burst_limit_m1 = Signal(4) + + #SBUS_3V3_CLK = Signal() + SBUS_3V3_ASs_i = Signal() + self.comb += SBUS_3V3_ASs_i.eq(pad_SBUS_3V3_ASs) + SBUS_3V3_BGs_i = Signal() + self.comb += SBUS_3V3_BGs_i.eq(pad_SBUS_3V3_BGs) + SBUS_3V3_BRs_o = Signal(reset=1) + self.specials += Tristate(pad_SBUS_3V3_BRs, SBUS_3V3_BRs_o, sbus_oe_master_br, None) + SBUS_3V3_ERRs_i = Signal() + SBUS_3V3_ERRs_o = Signal() + self.specials += Tristate(pad_SBUS_3V3_ERRs, SBUS_3V3_ERRs_o, sbus_oe_master_in, SBUS_3V3_ERRs_i) + SBUS_DATA_OE_LED_o = Signal() + self.comb += pad_SBUS_DATA_OE_LED.eq(SBUS_DATA_OE_LED_o) + ###SBUS_DATA_OE_LED_2_o = Signal() + ###self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) + #SBUS_3V3_RSTs = Signal() + SBUS_3V3_SELs_i = Signal() + self.comb += SBUS_3V3_SELs_i.eq(pad_SBUS_3V3_SELs) + #SBUS_3V3_INT1s_o = Signal(reset=1) + #self.specials += Tristate(pad_SBUS_3V3_INT1s, SBUS_3V3_INT1s_o, sbus_oe_int1, None) + #SBUS_3V3_INT7s_o = Signal(reset=1) + #self.specials += Tristate(pad_SBUS_3V3_INT7s, SBUS_3V3_INT7s_o, sbus_oe_int7, None) + SBUS_3V3_PPRD_i = Signal() + SBUS_3V3_PPRD_o = Signal() + self.specials += Tristate(pad_SBUS_3V3_PPRD, SBUS_3V3_PPRD_o, sbus_oe_slave_in, SBUS_3V3_PPRD_i) + #SBUS_OE_o = Signal() + self.comb += pad_SBUS_OE.eq(self.hold_reset) + SBUS_3V3_ACKs_i = Signal(3) + SBUS_3V3_ACKs_o = Signal(3) + self.specials += Tristate(pad_SBUS_3V3_ACKs, SBUS_3V3_ACKs_o, sbus_oe_master_in, SBUS_3V3_ACKs_i) + SBUS_3V3_SIZ_i = Signal(3) + SBUS_3V3_SIZ_o = Signal(3) + self.specials += Tristate(pad_SBUS_3V3_SIZ, SBUS_3V3_SIZ_o, sbus_oe_slave_in, SBUS_3V3_SIZ_i) + SBUS_3V3_D_i = Signal(32) + SBUS_3V3_D_o = Signal(32) + self.specials += Tristate(pad_SBUS_3V3_D, SBUS_3V3_D_o, sbus_oe_data, SBUS_3V3_D_i) + SBUS_3V3_PA_i = Signal(28) + self.comb += SBUS_3V3_PA_i.eq(pad_SBUS_3V3_PA) + + p_data = Signal(32) # data to read/write + + data_read_addr = Signal(30) # first addr of req. when reading from WB + data_read_enable = Signal() # start enqueuing req. to read from WB + data_read_timeout = Signal(7) + data_read_stale = Signal(5, reset = 0) + + # clean the read FIFO from stale data + self.submodules.cleaning_fsm = cleaning_fsm = FSM(reset_state="Reset") + cleaning_fsm.act("Reset", + NextState("Idle")) + cleaning_fsm.act("Idle", + If(rd_fifo_data.readable & (data_read_stale != 0), + rd_fifo_data.re.eq(1), + NextValue(data_read_stale, data_read_stale - 1))) + self.comb += SBUS_DATA_OE_LED_o.eq(data_read_stale != 0) + + self.submodules.slave_fsm = slave_fsm = FSM(reset_state="Reset") + + slave_fsm.act("Reset", + NextValue(sbus_oe_int1, 0), + NextValue(sbus_oe_int7, 0), + NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextValue(sbus_oe_master_br, 0), + NextValue(p_data, 0), + NextState("Start") + ) + slave_fsm.act("Start", + NextValue(sbus_oe_int1, 0), + NextValue(sbus_oe_int7, 0), + NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextValue(sbus_oe_master_br, 0), + NextValue(p_data, 0), + If((self.hold_reset == 0), NextState("Idle")) + ) + slave_fsm.act("Idle", + If(((SBUS_3V3_SELs_i == 0) & + (SBUS_3V3_ASs_i == 0) & + (data_read_stale != 0)), ## refuse access until we've cleaned up the mess + NextValue(sbus_oe_master_in, 1), + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Error") + ).Elif(((SBUS_3V3_SELs_i == 0) & + (SBUS_3V3_ASs_i == 0) & + (siz_is_word(SBUS_3V3_SIZ_i)) & + (SBUS_3V3_PPRD_i == 1) & + (SBUS_3V3_PA_i[0:2] == 0)), + NextValue(sbus_oe_master_in, 1), + NextValue(sbus_last_pa, SBUS_3V3_PA_i), + NextValue(burst_counter, 0), + Case(SBUS_3V3_SIZ_i, { + SIZ_WORD: NextValue(burst_limit_m1, 0), + SIZ_BURST2: NextValue(burst_limit_m1, 1), + SIZ_BURST4: NextValue(burst_limit_m1, 3), + SIZ_BURST8: NextValue(burst_limit_m1, 7), + SIZ_BURST16: NextValue(burst_limit_m1, 15)}), + If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX), + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), + NextState("Slave_Ack_Read_Prom_Burst") + ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX)), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet + NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(p_data, 0xDEADBEEF), + NextValue(data_read_addr, (Cat(SBUS_3V3_PA_i[2:], Signal(4, reset=0)))), # enqueue all the request to the wishbone + NextValue(data_read_enable, 1), # enqueue all the request to the wishbone + NextValue(data_read_timeout, 0xFF), + NextState("Slave_Ack_Read_Reg_Burst_Wait_For_Data") + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Error") + ) + ).Elif(((SBUS_3V3_SELs_i == 0) & + (SBUS_3V3_ASs_i == 0) & + (SIZ_BYTE == SBUS_3V3_SIZ_i) & + (SBUS_3V3_PPRD_i == 1)), + NextValue(sbus_oe_master_in, 1), + NextValue(sbus_last_pa, SBUS_3V3_PA_i), + If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX), + NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), + NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), + NextState("Slave_Ack_Read_Prom_Byte") + ).Else( + #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, Signal(2, reset = 2), SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Error") + ) + ).Elif(((SBUS_3V3_SELs_i == 0) & + (SBUS_3V3_ASs_i == 0) & + (siz_is_word(SBUS_3V3_SIZ_i)) & + (SBUS_3V3_PPRD_i == 0) & + (SBUS_3V3_PA_i[0:2] == 0) & + (self.wr_fifo.writable)), # maybe we should check for enough space? not that we'll encounter write burst... + NextValue(sbus_oe_master_in, 1), + NextValue(sbus_last_pa, SBUS_3V3_PA_i), + NextValue(burst_counter, 0), + Case(SBUS_3V3_SIZ_i, { + SIZ_WORD: NextValue(burst_limit_m1, 0), + SIZ_BURST2: NextValue(burst_limit_m1, 1), + SIZ_BURST4: NextValue(burst_limit_m1, 3), + SIZ_BURST8: NextValue(burst_limit_m1, 7), + SIZ_BURST16: NextValue(burst_limit_m1, 15)}), + If(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX)), + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Ack_Reg_Write_Burst") + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Error") + ) + ) + ) + # ##### READ ##### + slave_fsm.act("Slave_Ack_Read_Prom_Burst", + NextValue(sbus_oe_data, 1), + NextValue(SBUS_3V3_D_o, p_data), + NextValue(p_data, prom[Cat(index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW])]), + If((burst_counter == burst_limit_m1), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextState("Slave_Do_Read") + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextValue(burst_counter, burst_counter + 1) + ) + ) + slave_fsm.act("Slave_Ack_Read_Prom_Byte", + NextValue(sbus_oe_data, 1), + If((sbus_last_pa[0:2] == 0x0), + NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[24:32])) + ).Elif((sbus_last_pa[0:2] == 0x1), + NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[16:24])) + ).Elif((sbus_last_pa[0:2] == 0x2), + NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[ 8:16])) + ).Elif((sbus_last_pa[0:2] == 0x3), + NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[ 0: 8])) + ), + NextState("Slave_Do_Read") + ) + slave_fsm.act("Slave_Do_Read", + NextValue(sbus_oe_int1, 0), + NextValue(sbus_oe_int7, 0), + NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextValue(sbus_oe_master_br, 0), + If((SBUS_3V3_ASs_i == 1), + NextState("Idle") + ) + ) + slave_fsm.act("Slave_Ack_Read_Reg_Burst", + NextValue(sbus_oe_data, 1), + NextValue(SBUS_3V3_D_o, p_data), + If((burst_counter == burst_limit_m1), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextState("Slave_Do_Read") + ).Else( + NextValue(burst_counter, burst_counter + 1), + If(rd_fifo_data.readable, + If(rd_fifo_data.dout[32] == 0, + NextValue(p_data, rd_fifo_data.dout), + rd_fifo_data.re.eq(1), + NextValue(SBUS_3V3_ACKs_o, ACK_WORD) + ).Else( + rd_fifo_data.re.eq(1), + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextValue(data_read_stale, burst_limit_m1 - burst_counter), + NextState("Slave_Do_Read"), + ) + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextState("Slave_Ack_Read_Reg_Burst_Wait_For_Data") + ) + ) + ) + slave_fsm.act("Slave_Ack_Read_Reg_Burst_Wait_For_Data", + NextValue(data_read_timeout, data_read_timeout - 1), + If(rd_fifo_data.readable, + NextValue(p_data, rd_fifo_data.dout), + rd_fifo_data.re.eq(1), + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextState("Slave_Ack_Read_Reg_Burst") + ).Elif(data_read_timeout == 0, + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextValue(data_read_stale, 1 + burst_limit_m1 - burst_counter), + NextState("Slave_Do_Read") + ) + ) + # ##### WRITE ##### + slave_fsm.act("Slave_Ack_Reg_Write_Burst", + self.wr_fifo.din.eq(Cat(index_with_wrap(burst_counter, burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), # 4 bits, adr FIXME + sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], # 10 bits, adr + sbus_last_pa[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH], # 12 bits, adr + Signal(4, reset = 0), # 4 bits, adr (could be removed) + SBUS_3V3_D_i)), # 32 bits, data + self.wr_fifo.we.eq(1), + If((burst_counter == burst_limit_m1), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextState("Slave_Ack_Reg_Write_Final") + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextValue(burst_counter, burst_counter + 1) + ) + ) + slave_fsm.act("Slave_Ack_Reg_Write_Final", + NextValue(sbus_oe_int1, 0), + NextValue(sbus_oe_int7, 0), + NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextValue(sbus_oe_master_br, 0), + If((SBUS_3V3_ASs_i == 1), + NextState("Idle") + ) + ) + # ##### ERROR ##### + slave_fsm.act("Slave_Error", + NextValue(sbus_oe_int1, 0), + NextValue(sbus_oe_int7, 0), + NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextValue(sbus_oe_master_br, 0), + If((SBUS_3V3_ASs_i == 1), + NextState("Idle") + ) + ) + + self.submodules.request_fsm = request_fsm = FSM(reset_state="Reset") + request_fsm.act("Reset", + NextState("Idle") + ) + request_fsm.act("Idle", + If(data_read_enable, + NextValue(data_read_enable, 0), + self.rd_fifo_addr.we.eq(1), + self.rd_fifo_addr.din.eq(data_read_addr), + If (burst_limit_m1 != burst_counter, # 0 the first time + NextValue(burst_counter, burst_counter + 1), + NextState("Queue") + ) + ) + ) + request_fsm.act("Queue", + self.rd_fifo_addr.we.eq(1), + self.rd_fifo_addr.din.eq(Cat(index_with_wrap(burst_counter, burst_limit_m1, data_read_addr[0:4]), data_read_addr[4:])), + If (burst_limit_m1 != burst_counter, + NextValue(burst_counter, burst_counter + 1), + ).Else( + NextState("Idle") + ) + ) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py new file mode 100644 index 0000000..89323e8 --- /dev/null +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -0,0 +1,196 @@ +import os +import argparse +from migen import * +import litex +from litex.build.generic_platform import * +from litex.build.xilinx.vivado import vivado_build_args, vivado_build_argdict +from litex.soc.integration.soc import * +from litex.soc.integration.soc_core import * +from litex.soc.integration.builder import * +from litex.soc.cores.clock import * +from litex.soc.cores.led import LedChaser +from litex_boards.platforms import ztex213 +from migen.genlib.fifo import * + +from sbus_to_fpga_fsm import *; +from sbus_to_fpga_wishbone import *; + +_sbus_sbus = [ + ("SBUS_3V3_CLK", 0, Pins("D15"), IOStandard("lvttl")), + ("SBUS_3V3_ASs", 0, Pins("T4"), IOStandard("lvttl")), + ("SBUS_3V3_BGs", 0, Pins("T6"), IOStandard("lvttl")), + ("SBUS_3V3_BRs", 0, Pins("R6"), IOStandard("lvttl")), + ("SBUS_3V3_ERRs", 0, Pins("V2"), IOStandard("lvttl")), + ("SBUS_DATA_OE_LED", 0, Pins("U1"), IOStandard("lvttl")), + ("SBUS_DATA_OE_LED_2", 0, Pins("T3"), IOStandard("lvttl")), + ("SBUS_3V3_RSTs", 0, Pins("U2"), IOStandard("lvttl")), + ("SBUS_3V3_SELs", 0, Pins("K6"), IOStandard("lvttl")), + ("SBUS_3V3_INT1s", 0, Pins("R3"), IOStandard("lvttl")), + ("SBUS_3V3_INT7s", 0, Pins("N5"), IOStandard("lvttl")), + ("SBUS_3V3_PPRD", 0, Pins("N6"), IOStandard("lvttl")), + ("SBUS_OE", 0, Pins("P5"), IOStandard("lvttl")), + ("SBUS_3V3_ACKs", 0, Pins("M6 L6 N4"), IOStandard("lvttl")), + ("SBUS_3V3_SIZ", 0, Pins("R7 U3 V1"), IOStandard("lvttl")), + ("SBUS_3V3_D", 0, Pins("J18 K16 J17 K15 K13 J15 J13 J14 H14 H17 G14 G17 G16 G18 H16 F18 F16 E18 F15 D18 E17 G13 D17 F13 F14 E16 E15 C17 C16 A18 B18 C15"), IOStandard("lvttl")), + ("SBUS_3V3_PA", 0, Pins("B16 B17 D14 C14 D12 A16 A15 B14 B13 B12 C12 A14 A13 B11 A11 M4 R2 M3 P2 M2 N2 K5 N1 L4 M1 L3 L1 K3"), IOStandard("lvttl")), +] + +_usb_io = [ + ("usb", 0, + Subsignal("dp", Pins("E3")), # Serial TX + Subsignal("dm", Pins("F3")), # Serial RX + IOStandard("LVCMOS33")) +] +# CRG ---------------------------------------------------------------------------------------------- + +class _CRG(Module): + def __init__(self, platform, sys_clk_freq): + self.clock_domains.cd_sys = ClockDomain() # 100 MHz PLL, reset'ed by SBus, SoC/Wishbone main clock + self.clock_domains.cd_native = ClockDomain(reset_less=True) # 48MHz native, non-reset'ed (for power-on long delay, never reset) + self.clock_domains.cd_sbus = ClockDomain() # 16.67-25 MHz SBus, reset'ed by SBus, native SBus clock domain + self.clock_domains.cd_por = ClockDomain() # 48 MHz native, reset'ed by SBus, power-on-reset timer + self.clock_domains.cd_usb = ClockDomain() # 48 MHZ PLL, reset'ed by SBus, for USB controller + + # # # + clk48 = platform.request("clk48") + self.cd_native.clk = clk48 + clk_sbus = platform.request("SBUS_3V3_CLK") + self.cd_sbus.clk = clk_sbus + rst_sbus = platform.request("SBUS_3V3_RSTs") + + self.comb += self.cd_sbus.rst.eq(~rst_sbus) + + self.submodules.pll = pll = S7MMCM(speedgrade=-1) + pll.register_clkin(clk48, 48e6) + pll.create_clkout(self.cd_sys, sys_clk_freq) + + platform.add_false_path_constraints(self.cd_native.clk, self.cd_sbus.clk) + platform.add_false_path_constraints(self.cd_sys.clk, self.cd_sbus.clk) + platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_native.clk) + platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_sys.clk) + + # Power on reset, reset propagate from SBus to SYS + por_count = Signal(16, reset=2**16-1) + por_done = Signal() + self.comb += self.cd_por.clk.eq(clk48) + self.comb += por_done.eq(por_count == 0) + self.sync.por += If(~por_done, por_count.eq(por_count - 1)) + self.comb += pll.reset.eq(~por_done | ~rst_sbus) + + # USB + self.submodules.usb_pll = usb_pll = S7MMCM(speedgrade=-1) + self.comb += usb_pll.reset.eq(~por_done | ~rst_sbus) + usb_pll.register_clkin(clk48, 48e6) + usb_pll.create_clkout(self.cd_usb, 48e6, margin = 0) + platform.add_false_path_constraints(self.cd_sys.clk, self.cd_usb.clk) + +class SBusFPGA(SoCCore): + def __init__(self, **kwargs): + + kwargs["cpu_type"] = "None" + kwargs["integrated_sram_size"] = 0 + kwargs["with_uart"] = False + kwargs["with_timer"] = False + + self.sys_clk_freq = sys_clk_freq = 100e6 + + self.platform = platform = ztex213.Platform(variant="ztex2.13a", expansion="sbus") + self.platform.add_extension(_sbus_sbus) + self.platform.add_extension(_usb_io) + SoCCore.__init__(self, platform=platform, sys_clk_freq=sys_clk_freq, clk_freq=sys_clk_freq, **kwargs) + wb_mem_map = { + "prom": 0x00000000, + "csr" : 0x00040000, + "usb_host": 0x00080000, + } + self.mem_map.update(wb_mem_map) + self.submodules.crg = _CRG(platform=platform, sys_clk_freq=sys_clk_freq) + self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) # SBus max + + self.submodules.leds = LedChaser( + pads = platform.request_all("user_led"), + sys_clk_freq = sys_clk_freq) + self.add_csr("leds") + + self.add_usb_host(pads=platform.request("usb"), usb_clk_freq=48e6) + #self.comb += self.cpu.interrupt[16].eq(self.usb_host.interrupt) #fixme: need to deal with interrupts + + prom_file = "prom_migen.fc" + prom_data = soc_core.get_mem_data(prom_file, "big") + prom = Array(prom_data) + #print("\n****************************************\n") + #for i in range(len(prom)): + # print(hex(prom[i])) + #print("\n****************************************\n") + self.add_ram("prom", origin=self.mem_map["prom"], size=2**16, contents=prom_data, mode="r") + #getattr(self,"prom").mem.init = prom_data + #getattr(self,"prom").mem.depth = 2**14 + + # don't enable anything on the SBus side for 20 seconds after power up + # this avoids FPGA initialization messing with the cold boot process + # requires us to reset the SPARCstation afterward so the FPGA board + # is properly identified + # This is in the 'native' ClockDomain that is never reset + hold_reset_ctr = Signal(30, reset=960000000) + self.sync.native += If(hold_reset_ctr>0, hold_reset_ctr.eq(hold_reset_ctr - 1)) + hold_reset = Signal(reset=1) + self.comb += hold_reset.eq(~(hold_reset_ctr == 0)) + + + # FIFO to send data & address from SBus to the Wishbone + sbus_to_wishbone_wr_fifo = AsyncFIFOBuffered(width=32+30, depth=16) + sbus_to_wishbone_wr_fifo = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(sbus_to_wishbone_wr_fifo) + self.submodules += sbus_to_wishbone_wr_fifo + + # FIFOs to send address / receive data from SBus to the Wishbone + sbus_to_wishbone_rd_fifo_addr = AsyncFIFOBuffered(width=30, depth=16) + sbus_to_wishbone_rd_fifo_addr = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(sbus_to_wishbone_rd_fifo_addr) + self.submodules += sbus_to_wishbone_rd_fifo_addr + sbus_to_wishbone_rd_fifo_data = AsyncFIFOBuffered(width=32+1, depth=16) + sbus_to_wishbone_rd_fifo_data = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(sbus_to_wishbone_rd_fifo_data) + self.submodules += sbus_to_wishbone_rd_fifo_data + + # SBus to Wishbone FSM, 'Slave' on the SBus side, 'Master' on the Wishbone side + self.submodules.sbus_to_wishbone = SBusToWishbone(platform=self.platform, + wr_fifo=sbus_to_wishbone_wr_fifo, + rd_fifo_addr=sbus_to_wishbone_rd_fifo_addr, + rd_fifo_data=sbus_to_wishbone_rd_fifo_data, + wishbone=wishbone.Interface(data_width=self.bus.data_width)) + + _sbus_bus = SBusFPGABus(platform=self.platform, + prom=prom, + hold_reset=hold_reset, + wr_fifo=sbus_to_wishbone_wr_fifo, + rd_fifo_addr=sbus_to_wishbone_rd_fifo_addr, + rd_fifo_data=sbus_to_wishbone_rd_fifo_data,) + self.submodules.sbus_bus = ClockDomainsRenamer("sbus")(_sbus_bus) + + self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_to_wishbone.wishbone) + + # self.soc = Module() + # self.soc.mem_regions = self.mem_regions = {} + # region = litex.soc.integration.soc.SoCRegion(origin=0x0, size=0x0) + # region.length = 0 + # self.mem_regions['csr'] = region + # self.soc.constants = self.constants = {} + # self.soc.csr_regions = self.csr_regions = {} + # self.soc.cpu_type = self.cpu_type = None + +# def do_finalize(self): +# self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) + +def main(): + parser = argparse.ArgumentParser(description="SbusFPGA") + parser.add_argument("--build", action="store_true", help="Build bitstream") + builder_args(parser) + vivado_build_args(parser) + args = parser.parse_args() + + soc = SBusFPGA(**soc_core_argdict(args)) + #soc.add_uart(name="uart", baudrate=115200, fifo_depth=16) + + builder = Builder(soc, **builder_argdict(args)) + builder.build(**vivado_build_argdict(args), run=args.build) + +if __name__ == "__main__": + main() diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py index 33cb1ef..73132bf 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py @@ -3,62 +3,85 @@ from migen import * from litex.soc.interconnect import wishbone class SBusToWishbone(Module): - def __init__(self, wr_fifo, rd_fifo_addr, rd_fifo_data, wishbone): + def __init__(self, platform, wr_fifo, rd_fifo_addr, rd_fifo_data, wishbone): + self.platform = platform self.wr_fifo = wr_fifo self.rd_fifo_addr = rd_fifo_addr self.rd_fifo_data = rd_fifo_data self.wishbone = wishbone + + pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") + SBUS_DATA_OE_LED_2_o = Signal() + self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) data = Signal(32) adr = Signal(30) + timeout = Signal(7) # ##### FSM: write to WB ##### self.submodules.fsm = fsm = FSM(reset_state="Reset") fsm.act("Reset", + self.wishbone.we.eq(0), + self.wishbone.cyc.eq(0), + self.wishbone.stb.eq(0), + NextState("Idle") + ) + fsm.act("Idle", + If (rd_fifo_addr.readable & ~self.wishbone.cyc & self.rd_fifo_data.writable, + rd_fifo_addr.re.eq(1), + NextValue(adr, self.rd_fifo_addr.dout[0:30]), + NextValue(timeout, 127), + NextState("Read") + ).Elif(self.wr_fifo.readable & ~self.wishbone.cyc, + self.wr_fifo.re.eq(1), + NextValue(adr, self.wr_fifo.dout[0:30]), + NextValue(data, self.wr_fifo.dout[30:62]), + NextValue(timeout, 127), + NextState("Write") + ) + ) + fsm.act("Write", + SBUS_DATA_OE_LED_2_o.eq(1), + self.wishbone.adr.eq(adr), + self.wishbone.dat_w.eq(data), + self.wishbone.we.eq(1), + self.wishbone.cyc.eq(1), + self.wishbone.stb.eq(1), + self.wishbone.sel.eq(2**len(self.wishbone.sel)-1), + NextValue(timeout, timeout - 1), + If(self.wishbone.ack, self.wishbone.we.eq(0), self.wishbone.cyc.eq(0), self.wishbone.stb.eq(0), NextState("Idle") - ) - fsm.act("Idle", - If(self.wr_fifo.readable & ~self.wishbone.cyc, - self.wr_fifo.re.eq(1), - NextValue(adr, self.wr_fifo.dout[0:30]), - NextValue(data, self.wr_fifo.dout[30:62]), - NextState("Write") - ), - If (rd_fifo_addr.readable & ~self.wishbone.cyc & self.rd_fifo_data.writable, - rd_fifo_addr.re.eq(1), - NextValue(adr, self.rd_fifo_addr.dout[0:30]), - NextState("Read") - ) - ) - fsm.act("Write", - self.wishbone.adr.eq(adr), - self.wishbone.dat_w.eq(data), - self.wishbone.we.eq(1), - self.wishbone.cyc.eq(1), - self.wishbone.stb.eq(1), - self.wishbone.sel.eq(2**len(self.wishbone.sel)-1), - If(self.wishbone.ack, - self.wishbone.we.eq(0), - self.wishbone.cyc.eq(0), - self.wishbone.stb.eq(0), - NextState("Idle") - ) + ).Elif(timeout == 0, # fixme, what to do to signal a problem ? + self.wishbone.we.eq(0), + self.wishbone.cyc.eq(0), + self.wishbone.stb.eq(0), + NextState("Idle") + ) ) fsm.act("Read", - self.wishbone.adr.eq(adr), + SBUS_DATA_OE_LED_2_o.eq(1), + self.wishbone.adr.eq(adr), + self.wishbone.we.eq(0), + self.wishbone.cyc.eq(1), + self.wishbone.stb.eq(1), + self.wishbone.sel.eq(2**len(self.wishbone.sel)-1), + NextValue(timeout, timeout - 1), + If(self.wishbone.ack, + self.rd_fifo_data.we.eq(1), + self.rd_fifo_data.din.eq(Cat(self.wishbone.dat_r, Signal(reset = 0))), self.wishbone.we.eq(0), - self.wishbone.cyc.eq(1), - self.wishbone.stb.eq(1), - self.wishbone.sel.eq(2**len(self.wishbone.sel)-1), - If(self.wishbone.ack, - self.rd_fifo_data.we.eq(1), - self.rd_fifo_data.din.eq(self.wishbone.dat_r), - self.wishbone.we.eq(0), - self.wishbone.cyc.eq(0), - self.wishbone.stb.eq(0), - NextState("Idle") - ) + self.wishbone.cyc.eq(0), + self.wishbone.stb.eq(0), + NextState("Idle") + ).Elif(timeout == 0, + self.rd_fifo_data.we.eq(1), + self.rd_fifo_data.din.eq(Cat(Signal(32, reset = 0xDEADBEEF), Signal(reset = 1))), + self.wishbone.we.eq(0), + self.wishbone.cyc.eq(0), + self.wishbone.stb.eq(0), + NextState("Idle") + ) ) From 3c7fefadb94ec811f6cd261f20f6111ffcb613c6 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 19 Jun 2021 02:10:54 -0400 Subject: [PATCH 11/78] still some issues on registers, master mode draft --- .../sbus_to_fpga_fsm.py | 309 ++++++++++++++---- .../sbus_to_fpga_soc.py | 71 +++- .../sbus_to_fpga_wishbone.py | 125 ++++++- 3 files changed, 412 insertions(+), 93 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index cac43c0..79b9471 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -64,20 +64,21 @@ def siz_to_burst_size_m1(siz): return 1 class LedDisplay(Module): - def __init__(self, pads): - n = len(pads) + def __init__(self): #, pads + #n = len(pads) + n = 8 self.value = Signal(32, reset = 0x18244281) old_value = Signal(32) - display = Signal(8) + self.display = Signal(8) + #self.comb += pads.eq(self.display) self.submodules.fsm = fsm = FSM(reset_state="Reset") time_counter = Signal(32, reset = 0) blink_counter = Signal(4, reset = 0) - self.comb += pads.eq(display) fsm.act("Reset", NextValue(time_counter, 25000000//10), NextValue(blink_counter, 10), - NextValue(display, 0x00), + NextValue(self.display, 0x00), NextValue(old_value, self.value), NextState("Quick")) fsm.act("Quick", @@ -86,10 +87,10 @@ class LedDisplay(Module): ).Elif(time_counter == 0, If (blink_counter == 0, NextValue(time_counter, 25000000//2), - NextValue(display, self.value[0:8]), + NextValue(self.display, self.value[0:8]), NextState("Byte0") ).Else( - NextValue(display, ~display), + NextValue(self.display, ~self.display), NextValue(time_counter, 25000000//10), NextValue(blink_counter, blink_counter - 1) ) @@ -102,7 +103,7 @@ class LedDisplay(Module): NextState("Reset") ).Elif(time_counter == 0, NextValue(time_counter, 25000000//2), - NextValue(display, self.value[8:16]), + NextValue(self.display, self.value[8:16]), NextState("Byte1") ).Else( NextValue(time_counter, time_counter - 1) @@ -113,7 +114,7 @@ class LedDisplay(Module): NextState("Reset") ).Elif(time_counter == 0, NextValue(time_counter, 25000000//2), - NextValue(display, self.value[16:24]), + NextValue(self.display, self.value[16:24]), NextState("Byte2") ).Else( NextValue(time_counter, time_counter - 1) @@ -124,7 +125,7 @@ class LedDisplay(Module): NextState("Reset") ).Elif(time_counter == 0, NextValue(time_counter, 25000000//2), - NextValue(display, self.value[24:32]), + NextValue(self.display, self.value[24:32]), NextState("Byte3") ).Else( NextValue(time_counter, time_counter - 1) @@ -136,7 +137,7 @@ class LedDisplay(Module): ).Elif(time_counter == 0, NextValue(time_counter, 25000000//10), NextValue(blink_counter, 10), - NextValue(display, 0x00), + NextValue(self.display, 0x00), NextState("Quick") ).Else( NextValue(time_counter, time_counter - 1) @@ -144,26 +145,36 @@ class LedDisplay(Module): ) class SBusFPGABus(Module): - def __init__(self, platform, prom, hold_reset, wr_fifo, rd_fifo_addr, rd_fifo_data): + def __init__(self, platform, prom, hold_reset, wr_fifo, rd_fifo_addr, rd_fifo_data, master_wr_fifo, master_rd_fifo_addr, master_rd_fifo_data): self.platform = platform self.hold_reset = hold_reset self.wr_fifo = wr_fifo self.rd_fifo_addr = rd_fifo_addr self.rd_fifo_data = rd_fifo_data - #self.submodules.led_display = LedDisplay(pads=platform.request_all("user_led")) + self.master_wr_fifo = master_wr_fifo + self.master_rd_fifo_addr = master_rd_fifo_addr + self.master_rd_fifo_data = master_rd_fifo_data + + ##pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") + ##SBUS_DATA_OE_LED_o = Signal() + ##self.comb += pad_SBUS_DATA_OE_LED.eq(SBUS_DATA_OE_LED_o) + ##pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") + ##SBUS_DATA_OE_LED_2_o = Signal() + ##self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) + + #self.comb += SBUS_DATA_OE_LED_o.eq(~rd_fifo_addr.writable) + #self.comb += SBUS_DATA_OE_LED_2_o.eq(rd_fifo_data.readable) #pad_SBUS_3V3_CLK = platform.request("SBUS_3V3_CLK") pad_SBUS_3V3_ASs = platform.request("SBUS_3V3_ASs") pad_SBUS_3V3_BGs = platform.request("SBUS_3V3_BGs") pad_SBUS_3V3_BRs = platform.request("SBUS_3V3_BRs") pad_SBUS_3V3_ERRs = platform.request("SBUS_3V3_ERRs") - pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") - ###pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") #pad_SBUS_3V3_RSTs = platform.request("SBUS_3V3_RSTs") pad_SBUS_3V3_SELs = platform.request("SBUS_3V3_SELs") #pad_SBUS_3V3_INT1s = platform.request("SBUS_3V3_INT1s") - #pad_SBUS_3V3_INT7s = platform.request("SBUS_3V3_INT7s") + pad_SBUS_3V3_INT7s = platform.request("SBUS_3V3_INT7s") pad_SBUS_3V3_PPRD = platform.request("SBUS_3V3_PPRD") pad_SBUS_OE = platform.request("SBUS_OE") pad_SBUS_3V3_ACKs = platform.request("SBUS_3V3_ACKs") @@ -176,9 +187,9 @@ class SBusFPGABus(Module): sbus_oe_data = Signal(reset=0) sbus_oe_slave_in = Signal(reset=0) sbus_oe_master_in = Signal(reset=0) - sbus_oe_int1 = Signal(reset=0) + #sbus_oe_int1 = Signal(reset=0) sbus_oe_int7 = Signal(reset=0) - sbus_oe_master_br = Signal(reset=0) + #sbus_oe_master_br = Signal(reset=0) sbus_last_pa = Signal(28) burst_index = Signal(4) @@ -191,21 +202,18 @@ class SBusFPGABus(Module): SBUS_3V3_BGs_i = Signal() self.comb += SBUS_3V3_BGs_i.eq(pad_SBUS_3V3_BGs) SBUS_3V3_BRs_o = Signal(reset=1) - self.specials += Tristate(pad_SBUS_3V3_BRs, SBUS_3V3_BRs_o, sbus_oe_master_br, None) + #self.specials += Tristate(pad_SBUS_3V3_BRs, SBUS_3V3_BRs_o, sbus_oe_master_br, None) + self.comb += pad_SBUS_3V3_BRs.eq(SBUS_3V3_BRs_o) SBUS_3V3_ERRs_i = Signal() SBUS_3V3_ERRs_o = Signal() self.specials += Tristate(pad_SBUS_3V3_ERRs, SBUS_3V3_ERRs_o, sbus_oe_master_in, SBUS_3V3_ERRs_i) - SBUS_DATA_OE_LED_o = Signal() - self.comb += pad_SBUS_DATA_OE_LED.eq(SBUS_DATA_OE_LED_o) - ###SBUS_DATA_OE_LED_2_o = Signal() - ###self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) #SBUS_3V3_RSTs = Signal() SBUS_3V3_SELs_i = Signal() self.comb += SBUS_3V3_SELs_i.eq(pad_SBUS_3V3_SELs) #SBUS_3V3_INT1s_o = Signal(reset=1) #self.specials += Tristate(pad_SBUS_3V3_INT1s, SBUS_3V3_INT1s_o, sbus_oe_int1, None) - #SBUS_3V3_INT7s_o = Signal(reset=1) - #self.specials += Tristate(pad_SBUS_3V3_INT7s, SBUS_3V3_INT7s_o, sbus_oe_int7, None) + SBUS_3V3_INT7s_o = Signal(reset=1) + self.specials += Tristate(pad_SBUS_3V3_INT7s, SBUS_3V3_INT7s_o, sbus_oe_int7, None) SBUS_3V3_PPRD_i = Signal() SBUS_3V3_PPRD_o = Signal() self.specials += Tristate(pad_SBUS_3V3_PPRD, SBUS_3V3_PPRD_o, sbus_oe_slave_in, SBUS_3V3_PPRD_i) @@ -230,35 +238,51 @@ class SBusFPGABus(Module): data_read_timeout = Signal(7) data_read_stale = Signal(5, reset = 0) + master_data = Signal(32) # could be merged with p_data + master_addr = Signal(30) # could be meged with data_read_addr + + master_we = Signal(); + +# self.submodules.led_display = LedDisplay() +# #self.comb += self.led_display.value.eq(Cat(Signal(2, reset=0), master_addr)) +# self.comb += self.led_display.value.eq(p_data) +# old_display = Signal(8) +# self.sync += old_display.eq(self.led_display.display) +# self.submodules.display_fsm = display_fsm = FSM(reset_state="Reset") +# display_fsm.act("Reset", +# NextState("Idle")) +# display_fsm.act("Idle", +# If(old_display != self.led_display.display, +# NextState("Update"))) +# display_fsm.act("Update", +# If(self.wr_fifo.writable & SBUS_3V3_ASs_i, ## available space and not in a slave cycle +# self.wr_fifo.we.eq(1), +# self.wr_fifo.din.eq(Cat(Signal(30, reset=0x00040000), self.led_display.display, Signal(24, reset=0))), +# NextState("Idle"))) + # clean the read FIFO from stale data self.submodules.cleaning_fsm = cleaning_fsm = FSM(reset_state="Reset") cleaning_fsm.act("Reset", NextState("Idle")) cleaning_fsm.act("Idle", - If(rd_fifo_data.readable & (data_read_stale != 0), - rd_fifo_data.re.eq(1), + If(self.rd_fifo_data.readable & (data_read_stale != 0), + self.rd_fifo_data.re.eq(1), NextValue(data_read_stale, data_read_stale - 1))) - self.comb += SBUS_DATA_OE_LED_o.eq(data_read_stale != 0) + #self.comb += SBUS_DATA_OE_LED_o.eq(data_read_stale != 0) self.submodules.slave_fsm = slave_fsm = FSM(reset_state="Reset") slave_fsm.act("Reset", - NextValue(sbus_oe_int1, 0), - NextValue(sbus_oe_int7, 0), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), - NextValue(sbus_oe_master_br, 0), NextValue(p_data, 0), NextState("Start") ) slave_fsm.act("Start", - NextValue(sbus_oe_int1, 0), - NextValue(sbus_oe_int7, 0), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), - NextValue(sbus_oe_master_br, 0), NextValue(p_data, 0), If((self.hold_reset == 0), NextState("Idle")) ) @@ -296,7 +320,7 @@ class SBusFPGABus(Module): NextValue(p_data, 0xDEADBEEF), NextValue(data_read_addr, (Cat(SBUS_3V3_PA_i[2:], Signal(4, reset=0)))), # enqueue all the request to the wishbone NextValue(data_read_enable, 1), # enqueue all the request to the wishbone - NextValue(data_read_timeout, 0xFF), + NextValue(data_read_timeout, 0x7F), NextState("Slave_Ack_Read_Reg_Burst_Wait_For_Data") ).Else( NextValue(SBUS_3V3_ACKs_o, ACK_ERR), @@ -315,7 +339,6 @@ class SBusFPGABus(Module): NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), NextState("Slave_Ack_Read_Prom_Byte") ).Else( - #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, Signal(2, reset = 2), SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") @@ -345,9 +368,39 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") ) + ).Elif(SBUS_3V3_BGs_i & + (self.master_wr_fifo.readable | self.master_rd_fifo_addr.readable), + NextValue(SBUS_3V3_BRs_o, 0) + ).Elif(~SBUS_3V3_BGs_i & + (self.master_wr_fifo.readable | self.master_rd_fifo_addr.readable), + NextValue(SBUS_3V3_BRs_o, 1), # relinquish the request + NextValue(sbus_oe_data, 1), ## output data (at least for @ during translation) + NextValue(sbus_oe_slave_in, 1), ## PPRD, SIZ becomes output + NextValue(sbus_oe_master_in, 0), ## ERRs, ACKs are input + NextValue(burst_counter, 0), + NextValue(burst_limit_m1, 0), ## only single word for now + If(self.master_wr_fifo.readable, + NextValue(master_addr, self.master_wr_fifo.dout[0:30]), + NextValue(master_data, self.master_wr_fifo.dout[30:32]), + self.master_wr_fifo.re.eq(1), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.master_wr_fifo.dout[0:30])), + NextValue(SBUS_3V3_PPRD_o, 0), + NextValue(master_we, 1), + NextState("Master_Translation") + ).Elif(self.master_rd_fifo_addr.readable, + NextValue(master_addr, self.master_rd_fifo_addr.dout), + self.master_rd_fifo_addr.re.eq(1), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.master_rd_fifo_addr.dout[0:30])), + NextValue(SBUS_3V3_PPRD_o, 1), + NextValue(master_we, 0), + NextState("Master_Translation") + ).Else( + # FIXME: handle error + ) + ) ) - # ##### READ ##### + # ##### SLAVE READ ##### slave_fsm.act("Slave_Ack_Read_Prom_Burst", NextValue(sbus_oe_data, 1), NextValue(SBUS_3V3_D_o, p_data), @@ -374,12 +427,9 @@ class SBusFPGABus(Module): NextState("Slave_Do_Read") ) slave_fsm.act("Slave_Do_Read", - NextValue(sbus_oe_int1, 0), - NextValue(sbus_oe_int7, 0), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), - NextValue(sbus_oe_master_br, 0), If((SBUS_3V3_ASs_i == 1), NextState("Idle") ) @@ -392,13 +442,14 @@ class SBusFPGABus(Module): NextState("Slave_Do_Read") ).Else( NextValue(burst_counter, burst_counter + 1), - If(rd_fifo_data.readable, - If(rd_fifo_data.dout[32] == 0, - NextValue(p_data, rd_fifo_data.dout), - rd_fifo_data.re.eq(1), + If(self.rd_fifo_data.readable, + If(self.rd_fifo_data.dout[32] == 0, + NextValue(p_data, self.rd_fifo_data.dout), + self.rd_fifo_data.re.eq(1), NextValue(SBUS_3V3_ACKs_o, ACK_WORD) ).Else( - rd_fifo_data.re.eq(1), + self.rd_fifo_data.re.eq(1), + NextValue(p_data, self.rd_fifo_data.dout), NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), NextValue(data_read_stale, burst_limit_m1 - burst_counter), NextState("Slave_Do_Read"), @@ -411,18 +462,27 @@ class SBusFPGABus(Module): ) slave_fsm.act("Slave_Ack_Read_Reg_Burst_Wait_For_Data", NextValue(data_read_timeout, data_read_timeout - 1), - If(rd_fifo_data.readable, - NextValue(p_data, rd_fifo_data.dout), - rd_fifo_data.re.eq(1), - NextValue(SBUS_3V3_ACKs_o, ACK_WORD), - NextState("Slave_Ack_Read_Reg_Burst") + If(self.rd_fifo_data.readable, + If(self.rd_fifo_data.dout[32] == 0, + NextValue(p_data, self.rd_fifo_data.dout), + self.rd_fifo_data.re.eq(1), + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextState("Slave_Ack_Read_Reg_Burst") + ).Else( + self.rd_fifo_data.re.eq(1), + NextValue(p_data, self.rd_fifo_data.dout), + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextValue(data_read_stale, burst_limit_m1 - burst_counter), + NextState("Slave_Do_Read"), + ) ).Elif(data_read_timeout == 0, - NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), - NextValue(data_read_stale, 1 + burst_limit_m1 - burst_counter), - NextState("Slave_Do_Read") + NextValue(p_data, 0x00C0FFEE), + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextValue(data_read_stale, 1 + burst_limit_m1 - burst_counter), + NextState("Slave_Do_Read") ) ) - # ##### WRITE ##### + # ##### SLAVE WRITE ##### slave_fsm.act("Slave_Ack_Reg_Write_Burst", self.wr_fifo.din.eq(Cat(index_with_wrap(burst_counter, burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), # 4 bits, adr FIXME sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], # 10 bits, adr @@ -439,30 +499,148 @@ class SBusFPGABus(Module): ) ) slave_fsm.act("Slave_Ack_Reg_Write_Final", - NextValue(sbus_oe_int1, 0), - NextValue(sbus_oe_int7, 0), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), - NextValue(sbus_oe_master_br, 0), If((SBUS_3V3_ASs_i == 1), NextState("Idle") ) ) - # ##### ERROR ##### + # ##### SLAVE ERROR ##### slave_fsm.act("Slave_Error", - NextValue(sbus_oe_int1, 0), - NextValue(sbus_oe_int7, 0), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), - NextValue(sbus_oe_master_br, 0), If((SBUS_3V3_ASs_i == 1), NextState("Idle") ) ) + # ##### MASTER ##### + slave_fsm.act("Master_Translation", + If(master_we, + NextValue(sbus_oe_data, 1), + NextValue(SBUS_3V3_D_o, master_data) + ).Else( + NextValue(sbus_oe_data, 0) + ), + Case(SBUS_3V3_ACKs_i, { + ACK_ERR: ## ouch + [NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextState("Idle")], + ACK_RERUN: ### dunno how to handle that yet, maybe delay the fifo re(1)? + [NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextState("Idle")], + ACK_IDLE: + [If(master_we, + NextState("Master_Write"), + ## FIXME: in burst mode, should update master_data with the next value + ## FIXME: we don't do burst mode yet + ).Else( + NextState("Master_Read"), + )], + "default": + [If(SBUS_3V3_BGs_i, ## oups, we lost our bus access without error ?!? + NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextState("Idle") + )], + }) + ) + slave_fsm.act("Master_Read", + Case(SBUS_3V3_ACKs_i, { + ACK_WORD: + [NextState("Master_Read_Ack") + ], + ACK_IDLE: + [NextState("Master_Read") ## redundant + ], + ACK_RERUN: ### dunno how to handle that yet, maybe delay the fifo re(1)? + [NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextState("Idle") + ], + "default": ## ACK_ERRS or other + [NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextState("Idle") + ], + }) + ) + slave_fsm.act("Master_Read_Ack", + self.master_rd_fifo_data.we.eq(1), + NextValue(self.master_rd_fifo_data.din, SBUS_3V3_D_i), + NextValue(burst_counter, burst_counter + 1), + If(burst_counter == burst_limit_m1, + NextState("Master_Read_Finish") + ).Else( + Case(SBUS_3V3_ACKs_i, { + ACK_WORD: NextState("Master_Read_Ack"), ## redundant + ACK_IDLE: NextState("Master_Read"), + ACK_RERUN: ### dunno how to handle that yet, maybe delay the fifo re(1)? + [NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextState("Idle") + ], + "default": + [NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextState("Idle") + ], + }), + ) + ) + slave_fsm.act("Master_Read_Finish", ## missing the handling of late error + NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextState("Idle") + ) + slave_fsm.act("Master_Write", + Case(SBUS_3V3_ACKs_i, { + ACK_WORD: + [If(burst_counter == burst_limit_m1, + NextState("Master_Write_Final"), + ).Else( + NextValue(SBUS_3V3_D_o, master_data), ## FIXME: we're not updating master_data for burst mode yet + NextValue(burst_counter, burst_counter + 1), + )], + ACK_IDLE: + [NextState("Master_Write") ## redundant + ], + ACK_RERUN: ### dunno how to handle that yet, maybe delay the fifo re(1)? + [NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextState("Idle") + ], + "default": ## ACK_ERRS or other + [NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextState("Idle") + ], + }) + ) + slave_fsm.act("Master_Write_Final", + NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextState("Idle") + ) + # ##### FINISHED ##### self.submodules.request_fsm = request_fsm = FSM(reset_state="Reset") + req_counter = Signal(4) + req_limit_m1 = Signal(4) request_fsm.act("Reset", NextState("Idle") ) @@ -472,16 +650,17 @@ class SBusFPGABus(Module): self.rd_fifo_addr.we.eq(1), self.rd_fifo_addr.din.eq(data_read_addr), If (burst_limit_m1 != burst_counter, # 0 the first time - NextValue(burst_counter, burst_counter + 1), + NextValue(req_counter, burst_counter + 1), + NextValue(req_limit_m1, burst_limit_m1), NextState("Queue") ) ) ) request_fsm.act("Queue", self.rd_fifo_addr.we.eq(1), - self.rd_fifo_addr.din.eq(Cat(index_with_wrap(burst_counter, burst_limit_m1, data_read_addr[0:4]), data_read_addr[4:])), - If (burst_limit_m1 != burst_counter, - NextValue(burst_counter, burst_counter + 1), + self.rd_fifo_addr.din.eq(Cat(index_with_wrap(req_counter, req_limit_m1, data_read_addr[0:4]), data_read_addr[4:])), + If(req_limit_m1 != req_counter, + NextValue(req_counter, req_counter + 1), ).Else( NextState("Idle") ) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 89323e8..85d5ea2 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -37,8 +37,8 @@ _sbus_sbus = [ _usb_io = [ ("usb", 0, - Subsignal("dp", Pins("E3")), # Serial TX - Subsignal("dm", Pins("F3")), # Serial RX + Subsignal("dp", Pins("V9")), # Serial TX + Subsignal("dm", Pins("U9")), # Serial RX IOStandard("LVCMOS33")) ] # CRG ---------------------------------------------------------------------------------------------- @@ -99,9 +99,10 @@ class SBusFPGA(SoCCore): self.platform.add_extension(_usb_io) SoCCore.__init__(self, platform=platform, sys_clk_freq=sys_clk_freq, clk_freq=sys_clk_freq, **kwargs) wb_mem_map = { - "prom": 0x00000000, - "csr" : 0x00040000, - "usb_host": 0x00080000, + "prom": 0x00000000, + "csr" : 0x00040000, + "usb_host": 0x00080000, + "usb_fake_dma": 0x000c0000, } self.mem_map.update(wb_mem_map) self.submodules.crg = _CRG(platform=platform, sys_clk_freq=sys_clk_freq) @@ -114,6 +115,24 @@ class SBusFPGA(SoCCore): self.add_usb_host(pads=platform.request("usb"), usb_clk_freq=48e6) #self.comb += self.cpu.interrupt[16].eq(self.usb_host.interrupt) #fixme: need to deal with interrupts + + pad_SBUS_3V3_INT1s = platform.request("SBUS_3V3_INT1s") + SBUS_3V3_INT1s_o = Signal(reset=1) + # the 74LVC2G07 takes care of the Z state: 1 -> Z on the bus, 0 -> 0 on the bus (asserted interrupt) + self.comb += pad_SBUS_3V3_INT1s.eq(SBUS_3V3_INT1s_o) + self.comb += SBUS_3V3_INT1s_o.eq(~self.usb_host.interrupt) + + + pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") + SBUS_DATA_OE_LED_o = Signal() + self.comb += pad_SBUS_DATA_OE_LED.eq(SBUS_DATA_OE_LED_o) + #pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") + #SBUS_DATA_OE_LED_2_o = Signal() + #self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) + interrupt_memory = Signal() + self.sync += interrupt_memory.eq(interrupt_memory | ~SBUS_3V3_INT1s_o) + self.comb += SBUS_DATA_OE_LED_o.eq(interrupt_memory) + #self.comb += SBUS_DATA_OE_LED_2_o.eq(~SBUS_3V3_INT1s_o) prom_file = "prom_migen.fc" prom_data = soc_core.get_mem_data(prom_file, "big") @@ -150,24 +169,50 @@ class SBusFPGA(SoCCore): sbus_to_wishbone_rd_fifo_data = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(sbus_to_wishbone_rd_fifo_data) self.submodules += sbus_to_wishbone_rd_fifo_data - # SBus to Wishbone FSM, 'Slave' on the SBus side, 'Master' on the Wishbone side + # SBus to Wishbone, 'Slave' on the SBus side, 'Master' on the Wishbone side self.submodules.sbus_to_wishbone = SBusToWishbone(platform=self.platform, wr_fifo=sbus_to_wishbone_wr_fifo, rd_fifo_addr=sbus_to_wishbone_rd_fifo_addr, rd_fifo_data=sbus_to_wishbone_rd_fifo_data, wishbone=wishbone.Interface(data_width=self.bus.data_width)) + + # FIFO to send data & address from Wishbone to the SBus + wishbone_to_sbus_wr_fifo = AsyncFIFOBuffered(width=32+30, depth=16) + wishbone_to_sbus_wr_fifo = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(wishbone_to_sbus_wr_fifo) + self.submodules += wishbone_to_sbus_wr_fifo + + # FIFOs to send address / receive data from Wishbone to the SBus + wishbone_to_sbus_rd_fifo_addr = AsyncFIFOBuffered(width=30, depth=4) + wishbone_to_sbus_rd_fifo_addr = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(wishbone_to_sbus_rd_fifo_addr) + self.submodules += wishbone_to_sbus_rd_fifo_addr + wishbone_to_sbus_rd_fifo_data = AsyncFIFOBuffered(width=32, depth=4) + wishbone_to_sbus_rd_fifo_data = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(wishbone_to_sbus_rd_fifo_data) + self.submodules += wishbone_to_sbus_rd_fifo_data + + # Wishbone to SBus, 'Master' on the SBus side, 'Slave' on the Wishbone side + self.submodules.wishbone_to_sbus = WishboneToSBus(platform=self.platform, + soc=self, + wr_fifo=wishbone_to_sbus_wr_fifo, + rd_fifo_addr=wishbone_to_sbus_rd_fifo_addr, + rd_fifo_data=wishbone_to_sbus_rd_fifo_data, + wishbone=wishbone.Interface(data_width=self.bus.data_width)) + _sbus_bus = SBusFPGABus(platform=self.platform, - prom=prom, - hold_reset=hold_reset, - wr_fifo=sbus_to_wishbone_wr_fifo, - rd_fifo_addr=sbus_to_wishbone_rd_fifo_addr, - rd_fifo_data=sbus_to_wishbone_rd_fifo_data,) + prom=prom, + hold_reset=hold_reset, + wr_fifo=sbus_to_wishbone_wr_fifo, + rd_fifo_addr=sbus_to_wishbone_rd_fifo_addr, + rd_fifo_data=sbus_to_wishbone_rd_fifo_data, + master_wr_fifo=wishbone_to_sbus_wr_fifo, + master_rd_fifo_addr=wishbone_to_sbus_rd_fifo_addr, + master_rd_fifo_data=wishbone_to_sbus_rd_fifo_data) self.submodules.sbus_bus = ClockDomainsRenamer("sbus")(_sbus_bus) self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_to_wishbone.wishbone) - - # self.soc = Module() + self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_to_sbus.wishbone, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x10000, cached=False)) + +# self.soc = Module() # self.soc.mem_regions = self.mem_regions = {} # region = litex.soc.integration.soc.SoCRegion(origin=0x0, size=0x0) # region.length = 0 diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py index 73132bf..063a0c5 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py @@ -2,6 +2,7 @@ from migen import * from litex.soc.interconnect import wishbone +# ******************************************************************************************************** class SBusToWishbone(Module): def __init__(self, platform, wr_fifo, rd_fifo_addr, rd_fifo_data, wishbone): self.platform = platform @@ -10,6 +11,9 @@ class SBusToWishbone(Module): self.rd_fifo_data = rd_fifo_data self.wishbone = wishbone + #pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") + #SBUS_DATA_OE_LED_o = Signal() + #self.comb += pad_SBUS_DATA_OE_LED.eq(SBUS_DATA_OE_LED_o) pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") SBUS_DATA_OE_LED_2_o = Signal() self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) @@ -17,8 +21,10 @@ class SBusToWishbone(Module): data = Signal(32) adr = Signal(30) timeout = Signal(7) + + self.real_hcca = Signal(32) - # ##### FSM: write to WB ##### + # ##### FSM: read/write from/to WB ##### self.submodules.fsm = fsm = FSM(reset_state="Reset") fsm.act("Reset", self.wishbone.we.eq(0), @@ -27,21 +33,30 @@ class SBusToWishbone(Module): NextState("Idle") ) fsm.act("Idle", - If (rd_fifo_addr.readable & ~self.wishbone.cyc & self.rd_fifo_data.writable, - rd_fifo_addr.re.eq(1), - NextValue(adr, self.rd_fifo_addr.dout[0:30]), - NextValue(timeout, 127), - NextState("Read") - ).Elif(self.wr_fifo.readable & ~self.wishbone.cyc, - self.wr_fifo.re.eq(1), - NextValue(adr, self.wr_fifo.dout[0:30]), - NextValue(data, self.wr_fifo.dout[30:62]), - NextValue(timeout, 127), - NextState("Write") + # write first, we don't want a read to pass before a previous write + If(self.wr_fifo.readable & ~self.wishbone.cyc, + self.wr_fifo.re.eq(1), + NextValue(adr, self.wr_fifo.dout[0:30]), + ## need to cheat with the USB HCCA registers + If((self.wr_fifo.dout[0:30] == 0x00020006), ## 80018 >> 2 == HCCA register for USB + NextValue(SBUS_DATA_OE_LED_2_o, 1), + NextValue(self.real_hcca, self.wr_fifo.dout[30:62]), + NextValue(data, Cat(self.wr_fifo.dout[30:46], Signal(16, reset=0x000c))) ## 0x000c: are reserved for DMA bridging + ).Elif((self.wr_fifo.dout[0:30] >= 0x00020007) & (self.wr_fifo.dout[0:30] <= 0x0002000c) & (self.wr_fifo.dout[30:62] != 0), + NextValue(data, Cat(self.wr_fifo.dout[30:46], Signal(16, reset=0x000c))) + ).Else( + NextValue(data, self.wr_fifo.dout[30:62]) + ), + NextValue(timeout, 127), + NextState("Write") + ).Elif (rd_fifo_addr.readable & ~self.wishbone.cyc & self.rd_fifo_data.writable, + rd_fifo_addr.re.eq(1), + NextValue(adr, self.rd_fifo_addr.dout[0:30]), + NextValue(timeout, 127), + NextState("Read") ) ) fsm.act("Write", - SBUS_DATA_OE_LED_2_o.eq(1), self.wishbone.adr.eq(adr), self.wishbone.dat_w.eq(data), self.wishbone.we.eq(1), @@ -62,7 +77,6 @@ class SBusToWishbone(Module): ) ) fsm.act("Read", - SBUS_DATA_OE_LED_2_o.eq(1), self.wishbone.adr.eq(adr), self.wishbone.we.eq(0), self.wishbone.cyc.eq(1), @@ -71,7 +85,11 @@ class SBusToWishbone(Module): NextValue(timeout, timeout - 1), If(self.wishbone.ack, self.rd_fifo_data.we.eq(1), - self.rd_fifo_data.din.eq(Cat(self.wishbone.dat_r, Signal(reset = 0))), + If((adr >= 0x00020006) & (adr <= 0x0002000c) & (self.wishbone.dat_r != 0), ## 80018 >> 2 == HCCA register for USB + self.rd_fifo_data.din.eq(Cat(self.wishbone.dat_r[0:16], self.real_hcca[16:32], Signal(reset = 0))) + ).Else( + self.rd_fifo_data.din.eq(Cat(self.wishbone.dat_r, Signal(reset = 0))) + ), self.wishbone.we.eq(0), self.wishbone.cyc.eq(0), self.wishbone.stb.eq(0), @@ -85,3 +103,80 @@ class SBusToWishbone(Module): NextState("Idle") ) ) + +# ******************************************************************************************************** +class WishboneToSBus(Module): + def __init__(self, platform, soc, wr_fifo, rd_fifo_addr, rd_fifo_data, wishbone): + self.platform = platform + self.wr_fifo = wr_fifo + self.rd_fifo_addr = rd_fifo_addr + self.rd_fifo_data = rd_fifo_data + self.wishbone = wishbone + self.soc = soc + + #pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") + #SBUS_DATA_OE_LED_2_o = Signal() + #self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) + + data = Signal(32) + adr = Signal(30) + + self.real_hcca = self.soc.sbus_to_wishbone.real_hcca + + # ##### FSM: read/write from/to SBus ##### + self.submodules.fsm = fsm = FSM(reset_state="Reset") + fsm.act("Reset", + NextState("Idle") + ) + fsm.act("Idle", + If(self.wishbone.stb & self.wishbone.cyc & self.wishbone.we & self.wr_fifo.writable, + If((self.wishbone.adr[14:30] == 0x000c) & (self.real_hcca != 0), ## in our DMA range + self.wr_fifo.we.eq(1), + self.wr_fifo.din.eq(Cat(self.wishbone.adr[0:14], self.real_hcca[16:32], self.wishbone.dat_w[30:62])) + ), + NextState("WriteWait") + ).Elif(self.wishbone.stb & self.wishbone.cyc & ~self.wishbone.we & self.rd_fifo_addr.writable, + If((self.wishbone.adr[14:30] == 0x000c) & (self.real_hcca != 0), ## in our DMA range + NextValue(adr, self.wishbone.adr), + self.rd_fifo_addr.we.eq(1), + self.rd_fifo_addr.din.eq(Cat(self.wishbone.adr[0:14], self.real_hcca[16:32])) + ), + NextState("ReadWait"), + ) + ) + fsm.act("WriteWait", + #SBUS_DATA_OE_LED_2_o.eq(1), + If((self.wishbone.adr[14:30] == 0x000c) & (self.real_hcca != 0), ## in our DMA range + self.wishbone.ack.eq(1), + ).Else( + self.wishbone.err.eq(1) + ), + If(~self.wishbone.stb, + NextState("Idle") + ) + ) + fsm.act("ReadWait", + #SBUS_DATA_OE_LED_2_o.eq(1), + If((adr[14:30] == 0x000c) & (self.real_hcca != 0), ## in our DMA range + If(self.rd_fifo_data.readable, + self.wishbone.ack.eq(1), + self.rd_fifo_data.re.eq(1), + NextValue(data, self.rd_fifo_data.dout), + self.wishbone.dat_r.eq(self.rd_fifo_data.dout), + NextState("ReadWait2") + ) + ).Else( + self.wishbone.err.eq(1), + If(~self.wishbone.stb, + NextState("Idle") + ) + ) + ) + fsm.act("ReadWait2", + #SBUS_DATA_OE_LED_2_o.eq(1), + self.wishbone.ack.eq(1), + self.wishbone.dat_r.eq(data), + If(~self.wishbone.stb, + NextState("Idle") + ) + ) From c649cc4440895028700d2af22fdbac7689fc4ace Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 19 Jun 2021 04:00:06 -0400 Subject: [PATCH 12/78] drop address mapping, simply map the NetBSd DVMA virtual space (?) as Wishbone physical address --- .../sbus_to_fpga_soc.py | 4 +- .../sbus_to_fpga_wishbone.py | 44 +++++-------------- 2 files changed, 14 insertions(+), 34 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 85d5ea2..26a4941 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -102,7 +102,7 @@ class SBusFPGA(SoCCore): "prom": 0x00000000, "csr" : 0x00040000, "usb_host": 0x00080000, - "usb_fake_dma": 0x000c0000, + "usb_fake_dma": 0xfc000000, } self.mem_map.update(wb_mem_map) self.submodules.crg = _CRG(platform=platform, sys_clk_freq=sys_clk_freq) @@ -210,7 +210,7 @@ class SBusFPGA(SoCCore): self.submodules.sbus_bus = ClockDomainsRenamer("sbus")(_sbus_bus) self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_to_wishbone.wishbone) - self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_to_sbus.wishbone, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x10000, cached=False)) + self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_to_sbus.wishbone, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) # self.soc = Module() # self.soc.mem_regions = self.mem_regions = {} diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py index 063a0c5..54766b8 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py @@ -14,15 +14,13 @@ class SBusToWishbone(Module): #pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") #SBUS_DATA_OE_LED_o = Signal() #self.comb += pad_SBUS_DATA_OE_LED.eq(SBUS_DATA_OE_LED_o) - pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") - SBUS_DATA_OE_LED_2_o = Signal() - self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) + #pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") + #SBUS_DATA_OE_LED_2_o = Signal() + #self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) data = Signal(32) adr = Signal(30) timeout = Signal(7) - - self.real_hcca = Signal(32) # ##### FSM: read/write from/to WB ##### self.submodules.fsm = fsm = FSM(reset_state="Reset") @@ -37,16 +35,7 @@ class SBusToWishbone(Module): If(self.wr_fifo.readable & ~self.wishbone.cyc, self.wr_fifo.re.eq(1), NextValue(adr, self.wr_fifo.dout[0:30]), - ## need to cheat with the USB HCCA registers - If((self.wr_fifo.dout[0:30] == 0x00020006), ## 80018 >> 2 == HCCA register for USB - NextValue(SBUS_DATA_OE_LED_2_o, 1), - NextValue(self.real_hcca, self.wr_fifo.dout[30:62]), - NextValue(data, Cat(self.wr_fifo.dout[30:46], Signal(16, reset=0x000c))) ## 0x000c: are reserved for DMA bridging - ).Elif((self.wr_fifo.dout[0:30] >= 0x00020007) & (self.wr_fifo.dout[0:30] <= 0x0002000c) & (self.wr_fifo.dout[30:62] != 0), - NextValue(data, Cat(self.wr_fifo.dout[30:46], Signal(16, reset=0x000c))) - ).Else( - NextValue(data, self.wr_fifo.dout[30:62]) - ), + NextValue(data, self.wr_fifo.dout[30:62]), NextValue(timeout, 127), NextState("Write") ).Elif (rd_fifo_addr.readable & ~self.wishbone.cyc & self.rd_fifo_data.writable, @@ -85,11 +74,7 @@ class SBusToWishbone(Module): NextValue(timeout, timeout - 1), If(self.wishbone.ack, self.rd_fifo_data.we.eq(1), - If((adr >= 0x00020006) & (adr <= 0x0002000c) & (self.wishbone.dat_r != 0), ## 80018 >> 2 == HCCA register for USB - self.rd_fifo_data.din.eq(Cat(self.wishbone.dat_r[0:16], self.real_hcca[16:32], Signal(reset = 0))) - ).Else( - self.rd_fifo_data.din.eq(Cat(self.wishbone.dat_r, Signal(reset = 0))) - ), + self.rd_fifo_data.din.eq(Cat(self.wishbone.dat_r, Signal(reset = 0))), self.wishbone.we.eq(0), self.wishbone.cyc.eq(0), self.wishbone.stb.eq(0), @@ -120,8 +105,6 @@ class WishboneToSBus(Module): data = Signal(32) adr = Signal(30) - - self.real_hcca = self.soc.sbus_to_wishbone.real_hcca # ##### FSM: read/write from/to SBus ##### self.submodules.fsm = fsm = FSM(reset_state="Reset") @@ -130,24 +113,23 @@ class WishboneToSBus(Module): ) fsm.act("Idle", If(self.wishbone.stb & self.wishbone.cyc & self.wishbone.we & self.wr_fifo.writable, - If((self.wishbone.adr[14:30] == 0x000c) & (self.real_hcca != 0), ## in our DMA range + If(self.wishbone.adr[24:30] == 0x3f, ## in our DMA range (3f == fc>>2) self.wr_fifo.we.eq(1), - self.wr_fifo.din.eq(Cat(self.wishbone.adr[0:14], self.real_hcca[16:32], self.wishbone.dat_w[30:62])) + self.wr_fifo.din.eq(Cat(self.wishbone.adr[0:30], self.wishbone.dat_w[0:32])) ), NextState("WriteWait") ).Elif(self.wishbone.stb & self.wishbone.cyc & ~self.wishbone.we & self.rd_fifo_addr.writable, - If((self.wishbone.adr[14:30] == 0x000c) & (self.real_hcca != 0), ## in our DMA range + If(self.wishbone.adr[24:30] == 0x3f, ## in our DMA range NextValue(adr, self.wishbone.adr), self.rd_fifo_addr.we.eq(1), - self.rd_fifo_addr.din.eq(Cat(self.wishbone.adr[0:14], self.real_hcca[16:32])) + self.rd_fifo_addr.din.eq(self.wishbone.adr[0:30]) ), NextState("ReadWait"), ) ) fsm.act("WriteWait", - #SBUS_DATA_OE_LED_2_o.eq(1), - If((self.wishbone.adr[14:30] == 0x000c) & (self.real_hcca != 0), ## in our DMA range - self.wishbone.ack.eq(1), + If(self.wishbone.adr[24:30] == 0x3f, ## in our DMA range + self.wishbone.ack.eq(1) ).Else( self.wishbone.err.eq(1) ), @@ -156,8 +138,7 @@ class WishboneToSBus(Module): ) ) fsm.act("ReadWait", - #SBUS_DATA_OE_LED_2_o.eq(1), - If((adr[14:30] == 0x000c) & (self.real_hcca != 0), ## in our DMA range + If(adr[24:30] == 0x3f, ## in our DMA range If(self.rd_fifo_data.readable, self.wishbone.ack.eq(1), self.rd_fifo_data.re.eq(1), @@ -173,7 +154,6 @@ class WishboneToSBus(Module): ) ) fsm.act("ReadWait2", - #SBUS_DATA_OE_LED_2_o.eq(1), self.wishbone.ack.eq(1), self.wishbone.dat_r.eq(data), If(~self.wishbone.stb, From 0297df0131e582f1d44bf272301658a9632ec35f Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 19 Jun 2021 11:40:36 -0400 Subject: [PATCH 13/78] not working, the DMA seems to fails at frame 3... --- .../sbus_to_fpga_fsm.py | 43 ++++++++++++++----- .../sbus_to_fpga_soc.py | 28 ++++++------ .../sbus_to_fpga_wishbone.py | 43 +++++++++++++++---- 3 files changed, 82 insertions(+), 32 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index 79b9471..26f55c8 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -64,13 +64,12 @@ def siz_to_burst_size_m1(siz): return 1 class LedDisplay(Module): - def __init__(self): #, pads - #n = len(pads) - n = 8 + def __init__(self, pads): + n = len(pads) self.value = Signal(32, reset = 0x18244281) old_value = Signal(32) self.display = Signal(8) - #self.comb += pads.eq(self.display) + self.comb += pads.eq(self.display) self.submodules.fsm = fsm = FSM(reset_state="Reset") time_counter = Signal(32, reset = 0) @@ -165,6 +164,24 @@ class SBusFPGABus(Module): #self.comb += SBUS_DATA_OE_LED_o.eq(~rd_fifo_addr.writable) #self.comb += SBUS_DATA_OE_LED_2_o.eq(rd_fifo_data.readable) + + #leds = Signal(7, reset=0x00) + #self.comb += platform.request("user_led", 0).eq(leds[0]) + #self.comb += platform.request("user_led", 1).eq(leds[1]) + #self.comb += platform.request("user_led", 2).eq(leds[2]) + #self.comb += platform.request("user_led", 3).eq(leds[3]) + #self.comb += platform.request("user_led", 4).eq(leds[4]) + #self.comb += platform.request("user_led", 5).eq(leds[5]) + #self.comb += platform.request("user_led", 6).eq(leds[6]) + ##self.comb += platform.request("user_led", 7).eq(leds[7]) + + #self.comb += leds[0].eq(self.wr_fifo.writable) + #self.comb += leds[1].eq(~self.rd_fifo_data.readable) + #self.comb += leds[2].eq(self.rd_fifo_addr.writable) + + #self.comb += leds[4].eq(~self.master_wr_fifo.readable) + #self.comb += leds[5].eq(self.master_rd_fifo_data.writable) + #self.comb += leds[6].eq(~self.master_rd_fifo_addr.readable) #pad_SBUS_3V3_CLK = platform.request("SBUS_3V3_CLK") pad_SBUS_3V3_ASs = platform.request("SBUS_3V3_ASs") @@ -243,7 +260,7 @@ class SBusFPGABus(Module): master_we = Signal(); -# self.submodules.led_display = LedDisplay() + self.submodules.led_display = LedDisplay(platform.request_all("user_led")) # #self.comb += self.led_display.value.eq(Cat(Signal(2, reset=0), master_addr)) # self.comb += self.led_display.value.eq(p_data) # old_display = Signal(8) @@ -517,6 +534,8 @@ class SBusFPGABus(Module): ) # ##### MASTER ##### slave_fsm.act("Master_Translation", + If(master_addr[22:30] == 0xFC, + NextValue(self.led_display.value, Cat(master_we, Signal(1, reset = 0), master_addr))), If(master_we, NextValue(sbus_oe_data, 1), NextValue(SBUS_3V3_D_o, master_data) @@ -559,14 +578,18 @@ class SBusFPGABus(Module): ACK_IDLE: [NextState("Master_Read") ## redundant ], - ACK_RERUN: ### dunno how to handle that yet, maybe delay the fifo re(1)? - [NextValue(sbus_oe_data, 0), + ACK_RERUN: ### burst not handled + [self.master_rd_fifo_data.we.eq(1), + NextValue(self.master_rd_fifo_data.din, Cat(0xDEADBEEF, Signal(1, reset = 1))), + NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), NextState("Idle") ], - "default": ## ACK_ERRS or other - [NextValue(sbus_oe_data, 0), + "default": ## ACK_ERRS or other ### burst not handled + [self.master_rd_fifo_data.we.eq(1), + NextValue(self.master_rd_fifo_data.din, Cat(0xDEADBEEF, Signal(1, reset = 1))), + NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), NextState("Idle") @@ -575,7 +598,7 @@ class SBusFPGABus(Module): ) slave_fsm.act("Master_Read_Ack", self.master_rd_fifo_data.we.eq(1), - NextValue(self.master_rd_fifo_data.din, SBUS_3V3_D_i), + NextValue(self.master_rd_fifo_data.din, Cat(SBUS_3V3_D_i, Signal(1, reset = 0))), NextValue(burst_counter, burst_counter + 1), If(burst_counter == burst_limit_m1, NextState("Master_Read_Finish") diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 26a4941..ad0592d 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -45,11 +45,11 @@ _usb_io = [ class _CRG(Module): def __init__(self, platform, sys_clk_freq): - self.clock_domains.cd_sys = ClockDomain() # 100 MHz PLL, reset'ed by SBus, SoC/Wishbone main clock - self.clock_domains.cd_native = ClockDomain(reset_less=True) # 48MHz native, non-reset'ed (for power-on long delay, never reset) + self.clock_domains.cd_sys = ClockDomain() # 100 MHz PLL, reset'ed by SBus (via pll), SoC/Wishbone main clock + self.clock_domains.cd_native = ClockDomain(reset_less=True) # 48MHz native, non-reset'ed (for power-on long delay, never reset, we don't want the delay after a warm reset) self.clock_domains.cd_sbus = ClockDomain() # 16.67-25 MHz SBus, reset'ed by SBus, native SBus clock domain - self.clock_domains.cd_por = ClockDomain() # 48 MHz native, reset'ed by SBus, power-on-reset timer - self.clock_domains.cd_usb = ClockDomain() # 48 MHZ PLL, reset'ed by SBus, for USB controller +# self.clock_domains.cd_por = ClockDomain() # 48 MHz native, reset'ed by SBus, power-on-reset timer + self.clock_domains.cd_usb = ClockDomain() # 48 MHZ PLL, reset'ed by SBus (via pll), for USB controller # # # clk48 = platform.request("clk48") @@ -57,7 +57,6 @@ class _CRG(Module): clk_sbus = platform.request("SBUS_3V3_CLK") self.cd_sbus.clk = clk_sbus rst_sbus = platform.request("SBUS_3V3_RSTs") - self.comb += self.cd_sbus.rst.eq(~rst_sbus) self.submodules.pll = pll = S7MMCM(speedgrade=-1) @@ -70,18 +69,19 @@ class _CRG(Module): platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_sys.clk) # Power on reset, reset propagate from SBus to SYS - por_count = Signal(16, reset=2**16-1) - por_done = Signal() - self.comb += self.cd_por.clk.eq(clk48) - self.comb += por_done.eq(por_count == 0) - self.sync.por += If(~por_done, por_count.eq(por_count - 1)) - self.comb += pll.reset.eq(~por_done | ~rst_sbus) +# por_count = Signal(16, reset=2**16-1) +# por_done = Signal() +# self.comb += self.cd_por.clk.eq(clk48) +# self.comb += por_done.eq(por_count == 0) +# self.sync.por += If(~por_done, por_count.eq(por_count - 1)) +# self.comb += self.cd_por.rst.eq(~rst_sbus) +# self.comb += pll.reset.eq(~por_done | ~rst_sbus) # USB self.submodules.usb_pll = usb_pll = S7MMCM(speedgrade=-1) - self.comb += usb_pll.reset.eq(~por_done | ~rst_sbus) usb_pll.register_clkin(clk48, 48e6) usb_pll.create_clkout(self.cd_usb, 48e6, margin = 0) + self.comb += usb_pll.reset.eq(~rst_sbus) # | ~por_done platform.add_false_path_constraints(self.cd_sys.clk, self.cd_usb.clk) class SBusFPGA(SoCCore): @@ -109,7 +109,7 @@ class SBusFPGA(SoCCore): self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) # SBus max self.submodules.leds = LedChaser( - pads = platform.request_all("user_led"), + pads = platform.request("SBUS_DATA_OE_LED_2"), #platform.request("user_led", 7), sys_clk_freq = sys_clk_freq) self.add_csr("leds") @@ -186,7 +186,7 @@ class SBusFPGA(SoCCore): wishbone_to_sbus_rd_fifo_addr = AsyncFIFOBuffered(width=30, depth=4) wishbone_to_sbus_rd_fifo_addr = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(wishbone_to_sbus_rd_fifo_addr) self.submodules += wishbone_to_sbus_rd_fifo_addr - wishbone_to_sbus_rd_fifo_data = AsyncFIFOBuffered(width=32, depth=4) + wishbone_to_sbus_rd_fifo_data = AsyncFIFOBuffered(width=32+1, depth=4) wishbone_to_sbus_rd_fifo_data = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(wishbone_to_sbus_rd_fifo_data) self.submodules += wishbone_to_sbus_rd_fifo_data diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py index 54766b8..9f5b50b 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py @@ -20,7 +20,7 @@ class SBusToWishbone(Module): data = Signal(32) adr = Signal(30) - timeout = Signal(7) + timeout = Signal(9) # ##### FSM: read/write from/to WB ##### self.submodules.fsm = fsm = FSM(reset_state="Reset") @@ -36,12 +36,12 @@ class SBusToWishbone(Module): self.wr_fifo.re.eq(1), NextValue(adr, self.wr_fifo.dout[0:30]), NextValue(data, self.wr_fifo.dout[30:62]), - NextValue(timeout, 127), + NextValue(timeout, 511), NextState("Write") ).Elif (rd_fifo_addr.readable & ~self.wishbone.cyc & self.rd_fifo_data.writable, rd_fifo_addr.re.eq(1), NextValue(adr, self.rd_fifo_addr.dout[0:30]), - NextValue(timeout, 127), + NextValue(timeout, 511), NextState("Read") ) ) @@ -105,6 +105,7 @@ class WishboneToSBus(Module): data = Signal(32) adr = Signal(30) + timeout = Signal(9) # ##### FSM: read/write from/to SBus ##### self.submodules.fsm = fsm = FSM(reset_state="Reset") @@ -117,6 +118,7 @@ class WishboneToSBus(Module): self.wr_fifo.we.eq(1), self.wr_fifo.din.eq(Cat(self.wishbone.adr[0:30], self.wishbone.dat_w[0:32])) ), + NextValue(timeout, 511), NextState("WriteWait") ).Elif(self.wishbone.stb & self.wishbone.cyc & ~self.wishbone.we & self.rd_fifo_addr.writable, If(self.wishbone.adr[24:30] == 0x3f, ## in our DMA range @@ -124,6 +126,7 @@ class WishboneToSBus(Module): self.rd_fifo_addr.we.eq(1), self.rd_fifo_addr.din.eq(self.wishbone.adr[0:30]) ), + NextValue(timeout, 511), NextState("ReadWait"), ) ) @@ -133,18 +136,30 @@ class WishboneToSBus(Module): ).Else( self.wishbone.err.eq(1) ), + NextValue(timeout, timeout - 1), If(~self.wishbone.stb, NextState("Idle") + ).Elif(timeout == 0, # fixme, what to do to signal a problem ? + NextState("Idle") ) ) fsm.act("ReadWait", + NextValue(timeout, timeout - 1), If(adr[24:30] == 0x3f, ## in our DMA range If(self.rd_fifo_data.readable, - self.wishbone.ack.eq(1), - self.rd_fifo_data.re.eq(1), - NextValue(data, self.rd_fifo_data.dout), - self.wishbone.dat_r.eq(self.rd_fifo_data.dout), - NextState("ReadWait2") + If(self.rd_fifo_data.dout[32] == 0, + self.wishbone.ack.eq(1), + self.rd_fifo_data.re.eq(1), + NextValue(data, self.rd_fifo_data.dout), + self.wishbone.dat_r.eq(self.rd_fifo_data.dout[0:32]), + NextState("ReadWait2") + ).Else( + self.wishbone.err.eq(1), + self.rd_fifo_data.re.eq(1), + NextState("ReadWaitErr") + ) + ).Elif(timeout == 0, # fixme, what to do to signal a problem ? + NextState("Idle") ) ).Else( self.wishbone.err.eq(1), @@ -154,9 +169,21 @@ class WishboneToSBus(Module): ) ) fsm.act("ReadWait2", + NextValue(timeout, timeout - 1), self.wishbone.ack.eq(1), self.wishbone.dat_r.eq(data), If(~self.wishbone.stb, NextState("Idle") + ).Elif(timeout == 0, # fixme, what to do to signal a problem ? + NextState("Idle") + ) + ) + fsm.act("ReadWaitErr", + NextValue(timeout, timeout - 1), + self.wishbone.err.eq(1), + If(~self.wishbone.stb, + NextState("Idle") + ).Elif(timeout == 0, # fixme, what to do to signal a problem ? + NextState("Idle") ) ) From c9269b9f2edd04293a1847282d342f8ebed88516 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 20 Jun 2021 11:22:15 -0400 Subject: [PATCH 14/78] not yet working , DMA very suspicious, at least interrupt seems to propagate... --- .../sbus_to_fpga_fsm.py | 474 +++++++++++------- .../sbus_to_fpga_soc.py | 124 +++-- 2 files changed, 369 insertions(+), 229 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index 26f55c8..05add64 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -29,6 +29,9 @@ ROM_ADDR_PFX = Signal(12, reset = 0) WISHBONE_CSR_ADDR_PFX = Signal(12, reset = 4) USBOHCI_ADDR_PFX = Signal(12, reset = 8) +wishbone_default_timeout = 63 +sbus_default_timeout = 63 + def siz_is_word(siz): return (SIZ_WORD == siz) | (SIZ_BURST2 == siz) | (SIZ_BURST4 == siz) | (SIZ_BURST8 == siz) | (SIZ_BURST16 == siz) @@ -66,8 +69,8 @@ def siz_to_burst_size_m1(siz): class LedDisplay(Module): def __init__(self, pads): n = len(pads) - self.value = Signal(32, reset = 0x18244281) - old_value = Signal(32) + self.value = Signal(40, reset = 0x0018244281) + old_value = Signal(40) self.display = Signal(8) self.comb += pads.eq(self.display) @@ -75,11 +78,11 @@ class LedDisplay(Module): time_counter = Signal(32, reset = 0) blink_counter = Signal(4, reset = 0) fsm.act("Reset", - NextValue(time_counter, 25000000//10), - NextValue(blink_counter, 10), - NextValue(self.display, 0x00), + NextValue(time_counter, 25000000//2), + NextValue(blink_counter, 0), + NextValue(self.display, self.value[0:8]), NextValue(old_value, self.value), - NextState("Quick")) + NextState("Byte0")) fsm.act("Quick", If (old_value != self.value, NextState("Reset") @@ -101,9 +104,9 @@ class LedDisplay(Module): If (old_value != self.value, NextState("Reset") ).Elif(time_counter == 0, - NextValue(time_counter, 25000000//2), - NextValue(self.display, self.value[8:16]), - NextState("Byte1") + NextValue(time_counter, 25000000//2), + NextValue(self.display, self.value[8:16]), + NextState("Byte1") ).Else( NextValue(time_counter, time_counter - 1) ) @@ -112,9 +115,9 @@ class LedDisplay(Module): If (old_value != self.value, NextState("Reset") ).Elif(time_counter == 0, - NextValue(time_counter, 25000000//2), - NextValue(self.display, self.value[16:24]), - NextState("Byte2") + NextValue(time_counter, 25000000//2), + NextValue(self.display, self.value[16:24]), + NextState("Byte2") ).Else( NextValue(time_counter, time_counter - 1) ) @@ -123,47 +126,51 @@ class LedDisplay(Module): If (old_value != self.value, NextState("Reset") ).Elif(time_counter == 0, - NextValue(time_counter, 25000000//2), - NextValue(self.display, self.value[24:32]), - NextState("Byte3") + NextValue(time_counter, 25000000//2), + NextValue(self.display, self.value[24:32]), + NextState("Byte3") ).Else( NextValue(time_counter, time_counter - 1) ) ) fsm.act("Byte3", + If (old_value != self.value, + NextState("Reset") + ).Elif(time_counter == 0, + NextValue(time_counter, 25000000//2), + NextValue(self.display, self.value[32:40]), + NextState("Byte4") + ).Else( + NextValue(time_counter, time_counter - 1) + ) + ) + fsm.act("Byte4", If (old_value != self.value, NextState("Reset") ).Elif(time_counter == 0, NextValue(time_counter, 25000000//10), NextValue(blink_counter, 10), NextValue(self.display, 0x00), - NextState("Quick") + NextState("Quick") ).Else( NextValue(time_counter, time_counter - 1) ) ) class SBusFPGABus(Module): - def __init__(self, platform, prom, hold_reset, wr_fifo, rd_fifo_addr, rd_fifo_data, master_wr_fifo, master_rd_fifo_addr, master_rd_fifo_data): + def __init__(self, platform, prom, hold_reset, wishbone_slave, wishbone_master): self.platform = platform self.hold_reset = hold_reset - self.wr_fifo = wr_fifo - self.rd_fifo_addr = rd_fifo_addr - self.rd_fifo_data = rd_fifo_data - self.master_wr_fifo = master_wr_fifo - self.master_rd_fifo_addr = master_rd_fifo_addr - self.master_rd_fifo_data = master_rd_fifo_data + self.wishbone_slave = wishbone_slave + self.wishbone_master = wishbone_master - ##pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") - ##SBUS_DATA_OE_LED_o = Signal() - ##self.comb += pad_SBUS_DATA_OE_LED.eq(SBUS_DATA_OE_LED_o) + #pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") + #SBUS_DATA_OE_LED_o = Signal() + #self.comb += pad_SBUS_DATA_OE_LED.eq(SBUS_DATA_OE_LED_o) ##pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") ##SBUS_DATA_OE_LED_2_o = Signal() ##self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) - - #self.comb += SBUS_DATA_OE_LED_o.eq(~rd_fifo_addr.writable) - #self.comb += SBUS_DATA_OE_LED_2_o.eq(rd_fifo_data.readable) #leds = Signal(7, reset=0x00) #self.comb += platform.request("user_led", 0).eq(leds[0]) @@ -175,14 +182,6 @@ class SBusFPGABus(Module): #self.comb += platform.request("user_led", 6).eq(leds[6]) ##self.comb += platform.request("user_led", 7).eq(leds[7]) - #self.comb += leds[0].eq(self.wr_fifo.writable) - #self.comb += leds[1].eq(~self.rd_fifo_data.readable) - #self.comb += leds[2].eq(self.rd_fifo_addr.writable) - - #self.comb += leds[4].eq(~self.master_wr_fifo.readable) - #self.comb += leds[5].eq(self.master_rd_fifo_data.writable) - #self.comb += leds[6].eq(~self.master_rd_fifo_addr.readable) - #pad_SBUS_3V3_CLK = platform.request("SBUS_3V3_CLK") pad_SBUS_3V3_ASs = platform.request("SBUS_3V3_ASs") pad_SBUS_3V3_BGs = platform.request("SBUS_3V3_BGs") @@ -252,51 +251,49 @@ class SBusFPGABus(Module): data_read_addr = Signal(30) # first addr of req. when reading from WB data_read_enable = Signal() # start enqueuing req. to read from WB - data_read_timeout = Signal(7) - data_read_stale = Signal(5, reset = 0) master_data = Signal(32) # could be merged with p_data master_addr = Signal(30) # could be meged with data_read_addr master_we = Signal(); - self.submodules.led_display = LedDisplay(platform.request_all("user_led")) -# #self.comb += self.led_display.value.eq(Cat(Signal(2, reset=0), master_addr)) -# self.comb += self.led_display.value.eq(p_data) -# old_display = Signal(8) -# self.sync += old_display.eq(self.led_display.display) -# self.submodules.display_fsm = display_fsm = FSM(reset_state="Reset") -# display_fsm.act("Reset", -# NextState("Idle")) -# display_fsm.act("Idle", -# If(old_display != self.led_display.display, -# NextState("Update"))) -# display_fsm.act("Update", -# If(self.wr_fifo.writable & SBUS_3V3_ASs_i, ## available space and not in a slave cycle -# self.wr_fifo.we.eq(1), -# self.wr_fifo.din.eq(Cat(Signal(30, reset=0x00040000), self.led_display.display, Signal(24, reset=0))), -# NextState("Idle"))) + wishbone_master_timeout = Signal(6) + wishbone_slave_timeout = Signal(6) + sbus_slave_timeout = Signal(6) - # clean the read FIFO from stale data - self.submodules.cleaning_fsm = cleaning_fsm = FSM(reset_state="Reset") - cleaning_fsm.act("Reset", - NextState("Idle")) - cleaning_fsm.act("Idle", - If(self.rd_fifo_data.readable & (data_read_stale != 0), - self.rd_fifo_data.re.eq(1), - NextValue(data_read_stale, data_read_stale - 1))) - #self.comb += SBUS_DATA_OE_LED_o.eq(data_read_stale != 0) + sbus_master_throttle = Signal(4) + + #self.submodules.led_display = LedDisplay(platform.request_all("user_led")) + + self.comb += platform.request("user_led", 0).eq(self.wishbone_slave.cyc) + self.comb += platform.request("user_led", 1).eq(self.wishbone_slave.stb) + self.comb += platform.request("user_led", 2).eq(self.wishbone_slave.we) + self.comb += platform.request("user_led", 3).eq(self.wishbone_slave.ack) + self.comb += platform.request("user_led", 4).eq(self.wishbone_slave.err) + led5 = platform.request("user_led", 5) + self.comb += platform.request("user_led", 6).eq(~SBUS_3V3_BGs_i) + self.comb += platform.request("user_led", 7).eq(~SBUS_3V3_BRs_o) self.submodules.slave_fsm = slave_fsm = FSM(reset_state="Reset") slave_fsm.act("Reset", + #NextValue(self.led_display.value, 0x0000000000), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), NextValue(p_data, 0), - NextState("Start") + NextState("Start"), + NextValue(self.wishbone_master.we, 0), + NextValue(self.wishbone_master.cyc, 0), + NextValue(self.wishbone_master.stb, 0), + NextValue(self.wishbone_slave.ack, 0), + NextValue(self.wishbone_slave.err, 0), + NextValue(wishbone_master_timeout, 0), + NextValue(wishbone_slave_timeout, 0), + NextValue(sbus_slave_timeout, 0) ) slave_fsm.act("Start", + #NextValue(self.led_display.value, 0x0FF0000000), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), @@ -304,14 +301,17 @@ class SBusFPGABus(Module): If((self.hold_reset == 0), NextState("Idle")) ) slave_fsm.act("Idle", + #NextValue(self.led_display.value, 0x0000000010 | self.led_display.value), +# If(((SBUS_3V3_SELs_i == 0) & +# (SBUS_3V3_ASs_i == 0) & +# self.wishbone_master.cyc), ## refuse access until we've cleaned up the mess +# NextValue(self.led_display.value, 0x00000010 | 0x00000001), +# NextValue(sbus_oe_master_in, 1), +# NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), +# NextValue(SBUS_3V3_ERRs_o, 1), +# NextState("Slave_Error") +# ).Eli If(((SBUS_3V3_SELs_i == 0) & - (SBUS_3V3_ASs_i == 0) & - (data_read_stale != 0)), ## refuse access until we've cleaned up the mess - NextValue(sbus_oe_master_in, 1), - NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), - NextValue(SBUS_3V3_ERRs_o, 1), - NextState("Slave_Error") - ).Elif(((SBUS_3V3_SELs_i == 0) & (SBUS_3V3_ASs_i == 0) & (siz_is_word(SBUS_3V3_SIZ_i)) & (SBUS_3V3_PPRD_i == 1) & @@ -329,17 +329,28 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextValue(SBUS_3V3_ERRs_o, 1), NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), + #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 40))), NextState("Slave_Ack_Read_Prom_Burst") ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX)), - NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet - NextValue(SBUS_3V3_ERRs_o, 1), - NextValue(p_data, 0xDEADBEEF), - NextValue(data_read_addr, (Cat(SBUS_3V3_PA_i[2:], Signal(4, reset=0)))), # enqueue all the request to the wishbone - NextValue(data_read_enable, 1), # enqueue all the request to the wishbone - NextValue(data_read_timeout, 0x7F), - NextState("Slave_Ack_Read_Reg_Burst_Wait_For_Data") + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet + NextValue(SBUS_3V3_ERRs_o, 1), + If(self.wishbone_master.cyc == 0, + NextValue(self.wishbone_master.cyc, 1), + NextValue(self.wishbone_master.stb, 1), + NextValue(self.wishbone_master.sel, 2**len(self.wishbone_master.sel)-1), + NextValue(self.wishbone_master.we, 0), + NextValue(self.wishbone_master.adr, Cat(SBUS_3V3_PA_i[2:28], Signal(4, reset = 0))), + NextValue(wishbone_master_timeout, wishbone_default_timeout), + NextValue(sbus_slave_timeout, sbus_default_timeout), + #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), + NextState("Slave_Ack_Read_Reg_Burst_Wait_For_Data") + ).Else( + NextValue(sbus_slave_timeout, sbus_default_timeout), + NextState("Slave_Ack_Read_Reg_Burst_Wait_For_Wishbone") + ) ).Else( + #NextValue(self.led_display.value, 0x0000000020 | 0x0000000001), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") @@ -354,8 +365,10 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), NextValue(SBUS_3V3_ERRs_o, 1), NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), + #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 80))), NextState("Slave_Ack_Read_Prom_Byte") ).Else( + #NextValue(self.led_display.value, 0x0000000040 | 0x0000000001), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") @@ -365,7 +378,7 @@ class SBusFPGABus(Module): (siz_is_word(SBUS_3V3_SIZ_i)) & (SBUS_3V3_PPRD_i == 0) & (SBUS_3V3_PA_i[0:2] == 0) & - (self.wr_fifo.writable)), # maybe we should check for enough space? not that we'll encounter write burst... + (~self.wishbone_master.cyc)), NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), NextValue(burst_counter, 0), @@ -379,46 +392,88 @@ class SBusFPGABus(Module): (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX)), NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), NextState("Slave_Ack_Reg_Write_Burst") ).Else( + #NextValue(self.led_display.value, 0x0000000060 | 0x0000000001), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") ) ).Elif(SBUS_3V3_BGs_i & - (self.master_wr_fifo.readable | self.master_rd_fifo_addr.readable), + self.wishbone_slave.cyc & + self.wishbone_slave.stb & + ~self.wishbone_slave.ack & + ~self.wishbone_slave.err & + (sbus_master_throttle == 0), NextValue(SBUS_3V3_BRs_o, 0) ).Elif(~SBUS_3V3_BGs_i & - (self.master_wr_fifo.readable | self.master_rd_fifo_addr.readable), + self.wishbone_slave.cyc & + self.wishbone_slave.stb & + ~self.wishbone_slave.ack & + ~self.wishbone_slave.err, NextValue(SBUS_3V3_BRs_o, 1), # relinquish the request NextValue(sbus_oe_data, 1), ## output data (at least for @ during translation) NextValue(sbus_oe_slave_in, 1), ## PPRD, SIZ becomes output NextValue(sbus_oe_master_in, 0), ## ERRs, ACKs are input NextValue(burst_counter, 0), NextValue(burst_limit_m1, 0), ## only single word for now - If(self.master_wr_fifo.readable, - NextValue(master_addr, self.master_wr_fifo.dout[0:30]), - NextValue(master_data, self.master_wr_fifo.dout[30:32]), - self.master_wr_fifo.re.eq(1), - NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.master_wr_fifo.dout[0:30])), + If(self.wishbone_slave.we, + NextValue(master_addr, self.wishbone_slave.adr), + NextValue(master_data, self.wishbone_slave.dat_w), + NextValue(self.wishbone_slave.ack, 1), + NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), NextValue(SBUS_3V3_PPRD_o, 0), NextValue(master_we, 1), + #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0x00), self.wishbone_slave.adr)), NextState("Master_Translation") - ).Elif(self.master_rd_fifo_addr.readable, - NextValue(master_addr, self.master_rd_fifo_addr.dout), - self.master_rd_fifo_addr.re.eq(1), - NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.master_rd_fifo_addr.dout[0:30])), - NextValue(SBUS_3V3_PPRD_o, 1), - NextValue(master_we, 0), - NextState("Master_Translation") - ).Else( - # FIXME: handle error - ) - + ).Else(NextValue(master_addr, self.wishbone_slave.adr), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), + NextValue(SBUS_3V3_PPRD_o, 1), + NextValue(master_we, 0), + #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0x00), self.wishbone_slave.adr)), + NextState("Master_Translation") + ) + ).Elif(((SBUS_3V3_SELs_i == 0) & + (SBUS_3V3_ASs_i == 0) & + ((SIZ_HWORD == SBUS_3V3_SIZ_i) | (SIZ_BYTE == SBUS_3V3_SIZ_i))), + NextValue(sbus_oe_master_in, 1), + NextValue(sbus_last_pa, SBUS_3V3_PA_i), + #NextValue(self.led_display.value, 0x00000000a0 | SBUS_3V3_PPRD_i | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Error") + ).Elif(((SBUS_3V3_SELs_i == 0) & + (SBUS_3V3_ASs_i == 0) & + (~self.wishbone_master.cyc)), + NextValue(sbus_oe_master_in, 1), + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(self.led_display.value, 0x000000000F | Cat(Signal(8, reset = 0x00), SBUS_3V3_PA_i, SBUS_3V3_SIZ_i, SBUS_3V3_PPRD_i)), + NextState("Slave_Error") + ).Elif(((SBUS_3V3_SELs_i == 0) & + (SBUS_3V3_ASs_i == 0) & + (self.wishbone_master.cyc)), ## we need to answer, set ACK_RERUN + NextValue(sbus_oe_master_in, 1), + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + #NextValue(self.led_display.value, 0x00000000C0 | Cat(self.wishbone_master.cyc, self.wishbone_master.stb, self.wishbone_master.we, self.wishbone_master.ack, Signal(4, reset = 0x00), SBUS_3V3_PA_i, SBUS_3V3_SIZ_i, SBUS_3V3_PPRD_i)), + NextState("Slave_Error") + ).Elif(((SBUS_3V3_SELs_i != 0) & + (SBUS_3V3_ASs_i != 0) & + (self.wishbone_master.cyc)), + NextValue(sbus_oe_master_in, 0), + ).Elif(~SBUS_3V3_BGs_i, + ### ouch we got the bus but nothing more to do ?!? + NextValue(SBUS_3V3_BRs_o, 1), + NextValue(led5, 1) + ).Else( + # FIXME: handle error ) ) # ##### SLAVE READ ##### slave_fsm.act("Slave_Ack_Read_Prom_Burst", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x02), self.led_display.value[8:40])), NextValue(sbus_oe_data, 1), NextValue(SBUS_3V3_D_o, p_data), NextValue(p_data, prom[Cat(index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW])]), @@ -431,6 +486,7 @@ class SBusFPGABus(Module): ) ) slave_fsm.act("Slave_Ack_Read_Prom_Byte", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x03), self.led_display.value[8:40])), NextValue(sbus_oe_data, 1), If((sbus_last_pa[0:2] == 0x0), NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[24:32])) @@ -444,6 +500,7 @@ class SBusFPGABus(Module): NextState("Slave_Do_Read") ) slave_fsm.act("Slave_Do_Read", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x04), self.led_display.value[8:40])), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), @@ -452,6 +509,7 @@ class SBusFPGABus(Module): ) ) slave_fsm.act("Slave_Ack_Read_Reg_Burst", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x05), self.led_display.value[8:40])), NextValue(sbus_oe_data, 1), NextValue(SBUS_3V3_D_o, p_data), If((burst_counter == burst_limit_m1), @@ -459,54 +517,67 @@ class SBusFPGABus(Module): NextState("Slave_Do_Read") ).Else( NextValue(burst_counter, burst_counter + 1), - If(self.rd_fifo_data.readable, - If(self.rd_fifo_data.dout[32] == 0, - NextValue(p_data, self.rd_fifo_data.dout), - self.rd_fifo_data.re.eq(1), - NextValue(SBUS_3V3_ACKs_o, ACK_WORD) - ).Else( - self.rd_fifo_data.re.eq(1), - NextValue(p_data, self.rd_fifo_data.dout), - NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), - NextValue(data_read_stale, burst_limit_m1 - burst_counter), - NextState("Slave_Do_Read"), - ) - ).Else( - NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), - NextState("Slave_Ack_Read_Reg_Burst_Wait_For_Data") - ) + NextValue(self.wishbone_master.cyc, 1), + NextValue(self.wishbone_master.stb, 1), + NextValue(self.wishbone_master.sel, 2**len(self.wishbone_master.sel)-1), + NextValue(self.wishbone_master.we, 0), + NextValue(wishbone_master_timeout, wishbone_default_timeout), + NextValue(sbus_slave_timeout, sbus_default_timeout), + NextValue(self.wishbone_master.adr, Cat(index_with_wrap(burst_counter+1, burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), # 4 bits, adr FIXME + sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], # 10 bits, adr + sbus_last_pa[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH], # 12 bits, adr + Signal(4, reset = 0))), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextState("Slave_Ack_Read_Reg_Burst_Wait_For_Data") ) ) slave_fsm.act("Slave_Ack_Read_Reg_Burst_Wait_For_Data", - NextValue(data_read_timeout, data_read_timeout - 1), - If(self.rd_fifo_data.readable, - If(self.rd_fifo_data.dout[32] == 0, - NextValue(p_data, self.rd_fifo_data.dout), - self.rd_fifo_data.re.eq(1), - NextValue(SBUS_3V3_ACKs_o, ACK_WORD), - NextState("Slave_Ack_Read_Reg_Burst") - ).Else( - self.rd_fifo_data.re.eq(1), - NextValue(p_data, self.rd_fifo_data.dout), - NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), - NextValue(data_read_stale, burst_limit_m1 - burst_counter), - NextState("Slave_Do_Read"), - ) - ).Elif(data_read_timeout == 0, - NextValue(p_data, 0x00C0FFEE), - NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), - NextValue(data_read_stale, 1 + burst_limit_m1 - burst_counter), - NextState("Slave_Do_Read") + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x06), self.led_display.value[8:40])), + If(self.wishbone_master.ack, + NextValue(p_data, self.wishbone_master.dat_r), + NextValue(self.wishbone_master.cyc, 0), + NextValue(self.wishbone_master.stb, 0), + NextValue(wishbone_master_timeout, 0), + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextState("Slave_Ack_Read_Reg_Burst") + ).Elif(sbus_slave_timeout == 0, ### this is taking too long + NextValue(self.wishbone_master.cyc, 0), ## abort transaction + NextValue(self.wishbone_master.stb, 0), + NextValue(wishbone_master_timeout, 0), + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextState("Slave_Error") + ) + ) + slave_fsm.act("Slave_Ack_Read_Reg_Burst_Wait_For_Wishbone", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x68), self.led_display.value[8:40])), + If(self.wishbone_master.cyc == 0, + NextValue(self.wishbone_master.cyc, 1), + NextValue(self.wishbone_master.stb, 1), + NextValue(self.wishbone_master.sel, 2**len(self.wishbone_master.sel)-1), + NextValue(self.wishbone_master.we, 0), + NextValue(self.wishbone_master.adr, Cat(sbus_last_pa[2:28], Signal(4, reset = 0))), + NextValue(wishbone_master_timeout, wishbone_default_timeout), + NextValue(sbus_slave_timeout, sbus_slave_timeout), + #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), + NextState("Slave_Ack_Read_Reg_Burst_Wait_For_Data") + ).Elif(sbus_slave_timeout == 0, ### this is taking too long + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextState("Slave_Error") ) ) # ##### SLAVE WRITE ##### slave_fsm.act("Slave_Ack_Reg_Write_Burst", - self.wr_fifo.din.eq(Cat(index_with_wrap(burst_counter, burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), # 4 bits, adr FIXME - sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], # 10 bits, adr - sbus_last_pa[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH], # 12 bits, adr - Signal(4, reset = 0), # 4 bits, adr (could be removed) - SBUS_3V3_D_i)), # 32 bits, data - self.wr_fifo.we.eq(1), + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x07), self.led_display.value[8:40])), + NextValue(self.wishbone_master.cyc, 1), + NextValue(self.wishbone_master.stb, 1), + NextValue(self.wishbone_master.sel, 2**len(self.wishbone_master.sel)-1), + NextValue(self.wishbone_master.adr, Cat(index_with_wrap(burst_counter, burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), # 4 bits, adr FIXME + sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], # 10 bits, adr + sbus_last_pa[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH], # 12 bits, adr + Signal(4, reset = 0))), + NextValue(self.wishbone_master.dat_w, SBUS_3V3_D_i), + NextValue(self.wishbone_master.we, 1), + NextValue(wishbone_master_timeout, wishbone_default_timeout), If((burst_counter == burst_limit_m1), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), NextState("Slave_Ack_Reg_Write_Final") @@ -516,6 +587,7 @@ class SBusFPGABus(Module): ) ) slave_fsm.act("Slave_Ack_Reg_Write_Final", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x08), self.led_display.value[8:40])), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), @@ -525,6 +597,7 @@ class SBusFPGABus(Module): ) # ##### SLAVE ERROR ##### slave_fsm.act("Slave_Error", + #NextValue(self.led_display.value, 0x0000000080 | self.led_display.value), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), @@ -534,8 +607,7 @@ class SBusFPGABus(Module): ) # ##### MASTER ##### slave_fsm.act("Master_Translation", - If(master_addr[22:30] == 0xFC, - NextValue(self.led_display.value, Cat(master_we, Signal(1, reset = 0), master_addr))), + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x09), self.led_display.value[8:40])), If(master_we, NextValue(sbus_oe_data, 1), NextValue(SBUS_3V3_D_o, master_data) @@ -548,18 +620,20 @@ class SBusFPGABus(Module): NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), NextState("Idle")], - ACK_RERUN: ### dunno how to handle that yet, maybe delay the fifo re(1)? - [NextValue(sbus_oe_data, 0), + ACK_RERUN: ### dunno how to handle that yet, + [NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), NextState("Idle")], ACK_IDLE: [If(master_we, - NextState("Master_Write"), + NextState("Master_Write") ## FIXME: in burst mode, should update master_data with the next value ## FIXME: we don't do burst mode yet ).Else( - NextState("Master_Read"), + NextState("Master_Read") )], "default": [If(SBUS_3V3_BGs_i, ## oups, we lost our bus access without error ?!? @@ -571,6 +645,7 @@ class SBusFPGABus(Module): }) ) slave_fsm.act("Master_Read", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x0a), self.led_display.value[8:40])), Case(SBUS_3V3_ACKs_i, { ACK_WORD: [NextState("Master_Read_Ack") @@ -579,16 +654,16 @@ class SBusFPGABus(Module): [NextState("Master_Read") ## redundant ], ACK_RERUN: ### burst not handled - [self.master_rd_fifo_data.we.eq(1), - NextValue(self.master_rd_fifo_data.din, Cat(0xDEADBEEF, Signal(1, reset = 1))), + [NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), NextState("Idle") ], "default": ## ACK_ERRS or other ### burst not handled - [self.master_rd_fifo_data.we.eq(1), - NextValue(self.master_rd_fifo_data.din, Cat(0xDEADBEEF, Signal(1, reset = 1))), + [NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), @@ -597,8 +672,10 @@ class SBusFPGABus(Module): }) ) slave_fsm.act("Master_Read_Ack", - self.master_rd_fifo_data.we.eq(1), - NextValue(self.master_rd_fifo_data.din, Cat(SBUS_3V3_D_i, Signal(1, reset = 0))), + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x0b), self.led_display.value[8:40])), + NextValue(self.wishbone_slave.ack, 1), + NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.dat_r, SBUS_3V3_D_i), NextValue(burst_counter, burst_counter + 1), If(burst_counter == burst_limit_m1, NextState("Master_Read_Finish") @@ -606,7 +683,7 @@ class SBusFPGABus(Module): Case(SBUS_3V3_ACKs_i, { ACK_WORD: NextState("Master_Read_Ack"), ## redundant ACK_IDLE: NextState("Master_Read"), - ACK_RERUN: ### dunno how to handle that yet, maybe delay the fifo re(1)? + ACK_RERUN: ### dunno how to handle that yet [NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), @@ -622,12 +699,14 @@ class SBusFPGABus(Module): ) ) slave_fsm.act("Master_Read_Finish", ## missing the handling of late error + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x0c), self.led_display.value[8:40])), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), NextState("Idle") ) slave_fsm.act("Master_Write", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x0d), self.led_display.value[8:40])), Case(SBUS_3V3_ACKs_i, { ACK_WORD: [If(burst_counter == burst_limit_m1, @@ -639,7 +718,7 @@ class SBusFPGABus(Module): ACK_IDLE: [NextState("Master_Write") ## redundant ], - ACK_RERUN: ### dunno how to handle that yet, maybe delay the fifo re(1)? + ACK_RERUN: ### dunno how to handle that yet [NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), @@ -654,37 +733,84 @@ class SBusFPGABus(Module): }) ) slave_fsm.act("Master_Write_Final", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x0e), self.led_display.value[8:40])), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), + NextValue(sbus_master_throttle, 7), NextState("Idle") ) # ##### FINISHED ##### - self.submodules.request_fsm = request_fsm = FSM(reset_state="Reset") - req_counter = Signal(4) - req_limit_m1 = Signal(4) - request_fsm.act("Reset", - NextState("Idle") + + # ##### FSMs to finish wishbone transactions asynchronously + + self.submodules.wishbone_master_wait_fsm = wishbone_master_wait_fsm = FSM(reset_state="Reset") + wishbone_master_wait_fsm.act("Reset", + NextState("Idle") ) - request_fsm.act("Idle", - If(data_read_enable, - NextValue(data_read_enable, 0), - self.rd_fifo_addr.we.eq(1), - self.rd_fifo_addr.din.eq(data_read_addr), - If (burst_limit_m1 != burst_counter, # 0 the first time - NextValue(req_counter, burst_counter + 1), - NextValue(req_limit_m1, burst_limit_m1), - NextState("Queue") + wishbone_master_wait_fsm.act("Idle", + If (wishbone_master_timeout != 0, + NextValue(wishbone_master_timeout, wishbone_master_timeout -1) + ), + If(self.wishbone_master.cyc & self.wishbone_master.stb & self.wishbone_master.we, + If(self.wishbone_master.ack | (wishbone_master_timeout == 0), + NextValue(self.wishbone_master.cyc, 0), + NextValue(self.wishbone_master.stb, 0), + NextValue(self.wishbone_master.we, 0), + NextValue(wishbone_master_timeout, 0) ) ) ) - request_fsm.act("Queue", - self.rd_fifo_addr.we.eq(1), - self.rd_fifo_addr.din.eq(Cat(index_with_wrap(req_counter, req_limit_m1, data_read_addr[0:4]), data_read_addr[4:])), - If(req_limit_m1 != req_counter, - NextValue(req_counter, req_counter + 1), - ).Else( - NextState("Idle") + + + self.submodules.wishbone_slave_wait_fsm = wishbone_slave_wait_fsm = FSM(reset_state="Reset") + wishbone_slave_wait_fsm.act("Reset", + NextState("Idle") + ) + wishbone_slave_wait_fsm.act("Idle", + If (wishbone_slave_timeout != 0, + NextValue(wishbone_slave_timeout, wishbone_slave_timeout -1) + ), + If(self.wishbone_slave.ack & self.wishbone_slave.we, + If((~self.wishbone_slave.stb) | (wishbone_slave_timeout == 0), #~self.wishbone_slave.cyc & + NextValue(self.wishbone_slave.ack, 0), + NextValue(wishbone_slave_timeout, 0) + ) + ), + If(self.wishbone_slave.ack & ~self.wishbone_slave.we, + If((~self.wishbone_slave.stb) | (wishbone_slave_timeout == 0), #~self.wishbone_slave.cyc & + NextValue(self.wishbone_slave.ack, 0), + NextValue(wishbone_slave_timeout, 0) + ) + ), + If(self.wishbone_slave.err, + If((~self.wishbone_slave.stb) | (wishbone_slave_timeout == 0), #~self.wishbone_slave.cyc & + NextValue(self.wishbone_slave.err, 0), + NextValue(wishbone_slave_timeout, 0) + ) ) ) + + self.submodules.sbus_slave_wait_fsm = sbus_slave_wait_fsm = FSM(reset_state="Reset") + sbus_slave_wait_fsm.act("Reset", + NextState("Idle") + ) + sbus_slave_wait_fsm.act("Idle", + If (sbus_slave_timeout != 0, + NextValue(sbus_slave_timeout, sbus_slave_timeout -1) + ), + ) + + # ##### FIXME: debug only? + self.submodules.sbus_master_throttle_fsm = sbus_master_throttle_fsm = FSM(reset_state="Reset") + sbus_master_throttle_fsm.act("Reset", + NextState("Idle") + ) + sbus_master_throttle_fsm.act("Idle", + If (sbus_master_throttle != 0, + NextValue(sbus_master_throttle, sbus_master_throttle -1) + ), + ) + + sbus_master_throttle diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index ad0592d..18cb782 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -45,9 +45,10 @@ _usb_io = [ class _CRG(Module): def __init__(self, platform, sys_clk_freq): - self.clock_domains.cd_sys = ClockDomain() # 100 MHz PLL, reset'ed by SBus (via pll), SoC/Wishbone main clock +## self.clock_domains.cd_sys = ClockDomain() # 100 MHz PLL, reset'ed by SBus (via pll), SoC/Wishbone main clock + self.clock_domains.cd_sys = ClockDomain() # 16.67-25 MHz SBus, reset'ed by SBus, native SBus & SYS clock domain self.clock_domains.cd_native = ClockDomain(reset_less=True) # 48MHz native, non-reset'ed (for power-on long delay, never reset, we don't want the delay after a warm reset) - self.clock_domains.cd_sbus = ClockDomain() # 16.67-25 MHz SBus, reset'ed by SBus, native SBus clock domain +## self.clock_domains.cd_sbus = ClockDomain() # 16.67-25 MHz SBus, reset'ed by SBus, native SBus clock domain # self.clock_domains.cd_por = ClockDomain() # 48 MHz native, reset'ed by SBus, power-on-reset timer self.clock_domains.cd_usb = ClockDomain() # 48 MHZ PLL, reset'ed by SBus (via pll), for USB controller @@ -55,18 +56,21 @@ class _CRG(Module): clk48 = platform.request("clk48") self.cd_native.clk = clk48 clk_sbus = platform.request("SBUS_3V3_CLK") - self.cd_sbus.clk = clk_sbus + ##self.cd_sbus.clk = clk_sbus rst_sbus = platform.request("SBUS_3V3_RSTs") - self.comb += self.cd_sbus.rst.eq(~rst_sbus) + ##self.comb += self.cd_sbus.rst.eq(~rst_sbus) + self.cd_sys.clk = clk_sbus + self.comb += self.cd_sys.rst.eq(~rst_sbus) - self.submodules.pll = pll = S7MMCM(speedgrade=-1) - pll.register_clkin(clk48, 48e6) - pll.create_clkout(self.cd_sys, sys_clk_freq) + ##self.submodules.pll = pll = S7MMCM(speedgrade=-1) + ##pll.register_clkin(clk48, 48e6) + ##pll.create_clkout(self.cd_sys, sys_clk_freq) - platform.add_false_path_constraints(self.cd_native.clk, self.cd_sbus.clk) - platform.add_false_path_constraints(self.cd_sys.clk, self.cd_sbus.clk) - platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_native.clk) - platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_sys.clk) + ##platform.add_false_path_constraints(self.cd_native.clk, self.cd_sbus.clk) + ##platform.add_false_path_constraints(self.cd_sys.clk, self.cd_sbus.clk) + ##platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_native.clk) + ##platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_sys.clk) + platform.add_false_path_constraints(self.cd_native.clk, self.cd_sys.clk) # Power on reset, reset propagate from SBus to SYS # por_count = Signal(16, reset=2**16-1) @@ -92,7 +96,7 @@ class SBusFPGA(SoCCore): kwargs["with_uart"] = False kwargs["with_timer"] = False - self.sys_clk_freq = sys_clk_freq = 100e6 + self.sys_clk_freq = sys_clk_freq = 25e6 ## 100e6 self.platform = platform = ztex213.Platform(variant="ztex2.13a", expansion="sbus") self.platform.add_extension(_sbus_sbus) @@ -120,7 +124,7 @@ class SBusFPGA(SoCCore): SBUS_3V3_INT1s_o = Signal(reset=1) # the 74LVC2G07 takes care of the Z state: 1 -> Z on the bus, 0 -> 0 on the bus (asserted interrupt) self.comb += pad_SBUS_3V3_INT1s.eq(SBUS_3V3_INT1s_o) - self.comb += SBUS_3V3_INT1s_o.eq(~self.usb_host.interrupt) + self.comb += SBUS_3V3_INT1s_o.eq(~self.usb_host.interrupt) ## pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") @@ -130,7 +134,7 @@ class SBusFPGA(SoCCore): #SBUS_DATA_OE_LED_2_o = Signal() #self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) interrupt_memory = Signal() - self.sync += interrupt_memory.eq(interrupt_memory | ~SBUS_3V3_INT1s_o) + self.sync += interrupt_memory.eq(interrupt_memory | self.usb_host.interrupt) self.comb += SBUS_DATA_OE_LED_o.eq(interrupt_memory) #self.comb += SBUS_DATA_OE_LED_2_o.eq(~SBUS_3V3_INT1s_o) @@ -157,60 +161,70 @@ class SBusFPGA(SoCCore): # FIFO to send data & address from SBus to the Wishbone - sbus_to_wishbone_wr_fifo = AsyncFIFOBuffered(width=32+30, depth=16) - sbus_to_wishbone_wr_fifo = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(sbus_to_wishbone_wr_fifo) - self.submodules += sbus_to_wishbone_wr_fifo + ##sbus_to_wishbone_wr_fifo = AsyncFIFOBuffered(width=32+30, depth=16) + ##sbus_to_wishbone_wr_fifo = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(sbus_to_wishbone_wr_fifo) + ##self.submodules += sbus_to_wishbone_wr_fifo # FIFOs to send address / receive data from SBus to the Wishbone - sbus_to_wishbone_rd_fifo_addr = AsyncFIFOBuffered(width=30, depth=16) - sbus_to_wishbone_rd_fifo_addr = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(sbus_to_wishbone_rd_fifo_addr) - self.submodules += sbus_to_wishbone_rd_fifo_addr - sbus_to_wishbone_rd_fifo_data = AsyncFIFOBuffered(width=32+1, depth=16) - sbus_to_wishbone_rd_fifo_data = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(sbus_to_wishbone_rd_fifo_data) - self.submodules += sbus_to_wishbone_rd_fifo_data + ##sbus_to_wishbone_rd_fifo_addr = AsyncFIFOBuffered(width=30, depth=16) + ##sbus_to_wishbone_rd_fifo_addr = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(sbus_to_wishbone_rd_fifo_addr) + ##self.submodules += sbus_to_wishbone_rd_fifo_addr + ##sbus_to_wishbone_rd_fifo_data = AsyncFIFOBuffered(width=32+1, depth=16) + ##sbus_to_wishbone_rd_fifo_data = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(sbus_to_wishbone_rd_fifo_data) + ##self.submodules += sbus_to_wishbone_rd_fifo_data # SBus to Wishbone, 'Slave' on the SBus side, 'Master' on the Wishbone side - self.submodules.sbus_to_wishbone = SBusToWishbone(platform=self.platform, - wr_fifo=sbus_to_wishbone_wr_fifo, - rd_fifo_addr=sbus_to_wishbone_rd_fifo_addr, - rd_fifo_data=sbus_to_wishbone_rd_fifo_data, - wishbone=wishbone.Interface(data_width=self.bus.data_width)) + ##self.submodules.sbus_to_wishbone = SBusToWishbone(platform=self.platform, + ## wr_fifo=sbus_to_wishbone_wr_fifo, + ## rd_fifo_addr=sbus_to_wishbone_rd_fifo_addr, + ## rd_fifo_data=sbus_to_wishbone_rd_fifo_data, + ## wishbone=wishbone.Interface(data_width=self.bus.data_width)) # FIFO to send data & address from Wishbone to the SBus - wishbone_to_sbus_wr_fifo = AsyncFIFOBuffered(width=32+30, depth=16) - wishbone_to_sbus_wr_fifo = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(wishbone_to_sbus_wr_fifo) - self.submodules += wishbone_to_sbus_wr_fifo + ##wishbone_to_sbus_wr_fifo = AsyncFIFOBuffered(width=32+30, depth=16) + ##wishbone_to_sbus_wr_fifo = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(wishbone_to_sbus_wr_fifo) + ##self.submodules += wishbone_to_sbus_wr_fifo # FIFOs to send address / receive data from Wishbone to the SBus - wishbone_to_sbus_rd_fifo_addr = AsyncFIFOBuffered(width=30, depth=4) - wishbone_to_sbus_rd_fifo_addr = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(wishbone_to_sbus_rd_fifo_addr) - self.submodules += wishbone_to_sbus_rd_fifo_addr - wishbone_to_sbus_rd_fifo_data = AsyncFIFOBuffered(width=32+1, depth=4) - wishbone_to_sbus_rd_fifo_data = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(wishbone_to_sbus_rd_fifo_data) - self.submodules += wishbone_to_sbus_rd_fifo_data + ##wishbone_to_sbus_rd_fifo_addr = AsyncFIFOBuffered(width=30, depth=4) + ##wishbone_to_sbus_rd_fifo_addr = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(wishbone_to_sbus_rd_fifo_addr) + ##self.submodules += wishbone_to_sbus_rd_fifo_addr + ##wishbone_to_sbus_rd_fifo_data = AsyncFIFOBuffered(width=32+1, depth=4) + ##wishbone_to_sbus_rd_fifo_data = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(wishbone_to_sbus_rd_fifo_data) + ##self.submodules += wishbone_to_sbus_rd_fifo_data # Wishbone to SBus, 'Master' on the SBus side, 'Slave' on the Wishbone side - self.submodules.wishbone_to_sbus = WishboneToSBus(platform=self.platform, - soc=self, - wr_fifo=wishbone_to_sbus_wr_fifo, - rd_fifo_addr=wishbone_to_sbus_rd_fifo_addr, - rd_fifo_data=wishbone_to_sbus_rd_fifo_data, - wishbone=wishbone.Interface(data_width=self.bus.data_width)) + ##self.submodules.wishbone_to_sbus = WishboneToSBus(platform=self.platform, + ## soc=self, + ## wr_fifo=wishbone_to_sbus_wr_fifo, + ## rd_fifo_addr=wishbone_to_sbus_rd_fifo_addr, + ## rd_fifo_data=wishbone_to_sbus_rd_fifo_data, + ## wishbone=wishbone.Interface(data_width=self.bus.data_width)) - _sbus_bus = SBusFPGABus(platform=self.platform, - prom=prom, - hold_reset=hold_reset, - wr_fifo=sbus_to_wishbone_wr_fifo, - rd_fifo_addr=sbus_to_wishbone_rd_fifo_addr, - rd_fifo_data=sbus_to_wishbone_rd_fifo_data, - master_wr_fifo=wishbone_to_sbus_wr_fifo, - master_rd_fifo_addr=wishbone_to_sbus_rd_fifo_addr, - master_rd_fifo_data=wishbone_to_sbus_rd_fifo_data) - self.submodules.sbus_bus = ClockDomainsRenamer("sbus")(_sbus_bus) + ##_sbus_bus = SBusFPGABus(platform=self.platform, + ## prom=prom, + ## hold_reset=hold_reset, + ## wr_fifo=sbus_to_wishbone_wr_fifo, + ## rd_fifo_addr=sbus_to_wishbone_rd_fifo_addr, + ## rd_fifo_data=sbus_to_wishbone_rd_fifo_data, + ## master_wr_fifo=wishbone_to_sbus_wr_fifo, + ## master_rd_fifo_addr=wishbone_to_sbus_rd_fifo_addr, + ## master_rd_fifo_data=wishbone_to_sbus_rd_fifo_data) + ##self.submodules.sbus_bus = ClockDomainsRenamer("sbus")(_sbus_bus) + + wishbone_slave = wishbone.Interface(data_width=self.bus.data_width) + wishbone_master = wishbone.Interface(data_width=self.bus.data_width) + self.submodules.sbus_bus = SBusFPGABus(platform=self.platform, + prom=prom, + hold_reset=hold_reset, + wishbone_slave=wishbone_slave, + wishbone_master=wishbone_master) - self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_to_wishbone.wishbone) - self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_to_sbus.wishbone, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) + ##self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_to_wishbone.wishbone) + ##self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_to_sbus.wishbone, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) + self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_bus.wishbone_master) + self.bus.add_slave(name="usb_fake_dma", slave=self.sbus_bus.wishbone_slave, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) # self.soc = Module() # self.soc.mem_regions = self.mem_regions = {} From b2e4a450e155a7eaea2c53bddafe33408117e9d5 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Mon, 21 Jun 2021 03:04:51 -0400 Subject: [PATCH 15/78] Still very wonkey, DMA burning cycles --- .../sbus_to_fpga_fsm.py | 160 ++++++++++++++---- .../sbus_to_fpga_soc.py | 5 +- 2 files changed, 128 insertions(+), 37 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index 05add64..a5c140d 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -213,9 +213,9 @@ class SBusFPGABus(Module): burst_limit_m1 = Signal(4) #SBUS_3V3_CLK = Signal() - SBUS_3V3_ASs_i = Signal() + SBUS_3V3_ASs_i = Signal(reset=1) self.comb += SBUS_3V3_ASs_i.eq(pad_SBUS_3V3_ASs) - SBUS_3V3_BGs_i = Signal() + SBUS_3V3_BGs_i = Signal(reset=1) self.comb += SBUS_3V3_BGs_i.eq(pad_SBUS_3V3_BGs) SBUS_3V3_BRs_o = Signal(reset=1) #self.specials += Tristate(pad_SBUS_3V3_BRs, SBUS_3V3_BRs_o, sbus_oe_master_br, None) @@ -224,7 +224,7 @@ class SBusFPGABus(Module): SBUS_3V3_ERRs_o = Signal() self.specials += Tristate(pad_SBUS_3V3_ERRs, SBUS_3V3_ERRs_o, sbus_oe_master_in, SBUS_3V3_ERRs_i) #SBUS_3V3_RSTs = Signal() - SBUS_3V3_SELs_i = Signal() + SBUS_3V3_SELs_i = Signal(reset=1) self.comb += SBUS_3V3_SELs_i.eq(pad_SBUS_3V3_SELs) #SBUS_3V3_INT1s_o = Signal(reset=1) #self.specials += Tristate(pad_SBUS_3V3_INT1s, SBUS_3V3_INT1s_o, sbus_oe_int1, None) @@ -265,14 +265,36 @@ class SBusFPGABus(Module): #self.submodules.led_display = LedDisplay(platform.request_all("user_led")) - self.comb += platform.request("user_led", 0).eq(self.wishbone_slave.cyc) - self.comb += platform.request("user_led", 1).eq(self.wishbone_slave.stb) - self.comb += platform.request("user_led", 2).eq(self.wishbone_slave.we) - self.comb += platform.request("user_led", 3).eq(self.wishbone_slave.ack) - self.comb += platform.request("user_led", 4).eq(self.wishbone_slave.err) + self.sync += platform.request("user_led", 0).eq(self.wishbone_slave.cyc) + #self.sync += platform.request("user_led", 1).eq(self.wishbone_slave.stb) + #self.sync += platform.request("user_led", 2).eq(self.wishbone_slave.we) + #self.sync += platform.request("user_led", 3).eq(self.wishbone_slave.ack) + #self.sync += platform.request("user_led", 4).eq(self.wishbone_slave.err) + led1 = platform.request("user_led", 1) + led2 = platform.request("user_led", 2) + led3 = platform.request("user_led", 3) + led4 = platform.request("user_led", 4) led5 = platform.request("user_led", 5) - self.comb += platform.request("user_led", 6).eq(~SBUS_3V3_BGs_i) - self.comb += platform.request("user_led", 7).eq(~SBUS_3V3_BRs_o) + self.sync += platform.request("user_led", 6).eq(~SBUS_3V3_BRs_o) + self.sync += platform.request("user_led", 7).eq(~SBUS_3V3_BGs_i) + + #cycle_counter = Signal(8, reset = 0) + #self.sync += cycle_counter.eq(cycle_counter + 1) + #cycle_busmaster = Signal(8, reset = 0) + #self.sync += If(cycle_counter != 0, + # cycle_busmaster.eq(cycle_busmaster + ~SBUS_3V3_BGs_i)).Else( + # cycle_busmaster.eq(0)) + #self.sync += If(cycle_counter == 0, + # platform.request("user_led", 0).eq(cycle_busmaster[4]), + # platform.request("user_led", 1).eq(cycle_busmaster[5]), + # platform.request("user_led", 2).eq(cycle_busmaster[6]), + # platform.request("user_led", 3).eq(cycle_busmaster[7])) + + self.master_read_buffer_data = Array(Signal(32) for a in range(4)) + self.master_read_buffer_addr = Signal(28) + self.master_read_buffer_done = Array(Signal() for a in range(4)) + self.master_read_buffer_read = Array(Signal() for a in range(4)) + self.master_read_buffer_start = Signal() self.submodules.slave_fsm = slave_fsm = FSM(reset_state="Reset") @@ -405,36 +427,51 @@ class SBusFPGABus(Module): self.wishbone_slave.stb & ~self.wishbone_slave.ack & ~self.wishbone_slave.err & - (sbus_master_throttle == 0), + self.wishbone_slave.we & + (sbus_master_throttle == 0) & + (wishbone_slave_timeout == 0), NextValue(SBUS_3V3_BRs_o, 0) ).Elif(~SBUS_3V3_BGs_i & self.wishbone_slave.cyc & self.wishbone_slave.stb & ~self.wishbone_slave.ack & - ~self.wishbone_slave.err, + ~self.wishbone_slave.err & + self.wishbone_slave.we, NextValue(SBUS_3V3_BRs_o, 1), # relinquish the request NextValue(sbus_oe_data, 1), ## output data (at least for @ during translation) NextValue(sbus_oe_slave_in, 1), ## PPRD, SIZ becomes output NextValue(sbus_oe_master_in, 0), ## ERRs, ACKs are input NextValue(burst_counter, 0), NextValue(burst_limit_m1, 0), ## only single word for now - If(self.wishbone_slave.we, - NextValue(master_addr, self.wishbone_slave.adr), - NextValue(master_data, self.wishbone_slave.dat_w), - NextValue(self.wishbone_slave.ack, 1), - NextValue(wishbone_slave_timeout, wishbone_default_timeout), - NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), - NextValue(SBUS_3V3_PPRD_o, 0), - NextValue(master_we, 1), - #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0x00), self.wishbone_slave.adr)), - NextState("Master_Translation") - ).Else(NextValue(master_addr, self.wishbone_slave.adr), - NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), - NextValue(SBUS_3V3_PPRD_o, 1), - NextValue(master_we, 0), - #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0x00), self.wishbone_slave.adr)), - NextState("Master_Translation") - ) + NextValue(master_addr, self.wishbone_slave.adr), + NextValue(master_data, self.wishbone_slave.dat_w), + NextValue(self.wishbone_slave.ack, 1), + NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), + NextValue(SBUS_3V3_PPRD_o, 0), + NextValue(SBUS_3V3_SIZ_o, SIZ_WORD), + NextValue(master_we, 1), + #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0x00), self.wishbone_slave.adr)), + NextState("Master_Translation") + ).Elif(SBUS_3V3_BGs_i & + self.master_read_buffer_start & + (sbus_master_throttle == 0) & + (wishbone_slave_timeout == 0), + NextValue(SBUS_3V3_BRs_o, 0) + ).Elif(~SBUS_3V3_BGs_i & + self.master_read_buffer_start, + NextValue(SBUS_3V3_BRs_o, 1), # relinquish the request + NextValue(sbus_oe_data, 1), ## output data (at least for @ during translation) + NextValue(sbus_oe_slave_in, 1), ## PPRD, SIZ becomes output + NextValue(sbus_oe_master_in, 0), ## ERRs, ACKs are input + NextValue(burst_counter, 0), + NextValue(burst_limit_m1, 3), ## only quadword word for now + NextValue(SBUS_3V3_D_o, Cat(Signal(4, reset = 0), self.master_read_buffer_addr)), + NextValue(SBUS_3V3_PPRD_o, 1), + NextValue(SBUS_3V3_SIZ_o, SIZ_BURST4), + NextValue(master_we, 0), + #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0x00), self.wishbone_slave.adr)), + NextState("Master_Translation") ).Elif(((SBUS_3V3_SELs_i == 0) & (SBUS_3V3_ASs_i == 0) & ((SIZ_HWORD == SBUS_3V3_SIZ_i) | (SIZ_BYTE == SBUS_3V3_SIZ_i))), @@ -607,6 +644,7 @@ class SBusFPGABus(Module): ) # ##### MASTER ##### slave_fsm.act("Master_Translation", + led1.eq(1), #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x09), self.led_display.value[8:40])), If(master_we, NextValue(sbus_oe_data, 1), @@ -673,11 +711,11 @@ class SBusFPGABus(Module): ) slave_fsm.act("Master_Read_Ack", #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x0b), self.led_display.value[8:40])), - NextValue(self.wishbone_slave.ack, 1), - NextValue(wishbone_slave_timeout, wishbone_default_timeout), - NextValue(self.wishbone_slave.dat_r, SBUS_3V3_D_i), + NextValue(self.master_read_buffer_data[burst_counter[0:2]], SBUS_3V3_D_i), + NextValue(self.master_read_buffer_done[burst_counter[0:2]], 1), NextValue(burst_counter, burst_counter + 1), If(burst_counter == burst_limit_m1, + NextValue(self.master_read_buffer_start, 0), NextState("Master_Read_Finish") ).Else( Case(SBUS_3V3_ACKs_i, { @@ -813,4 +851,60 @@ class SBusFPGABus(Module): ), ) - sbus_master_throttle + # ##### Slave read buffering FSM #### + last_word_idx = Signal(2) + self.submodules.wishbone_slave_buffering_fsm = wishbone_slave_buffering_fsm = FSM(reset_state="Reset") + self.sync += led4.eq(self.master_read_buffer_start) + wishbone_slave_buffering_fsm.act("Reset", + led1.eq(0), + led2.eq(0), + led3.eq(0), + NextState("Idle") + ) + wishbone_slave_buffering_fsm.act("Idle", + If(self.wishbone_slave.cyc & + self.wishbone_slave.stb & + ~self.wishbone_slave.ack & + ~self.wishbone_slave.err & + ~self.wishbone_slave.we & + (wishbone_slave_timeout == 0), + led3.eq(1), + If((self.master_read_buffer_addr == self.wishbone_slave.adr[2:30]) & + (self.master_read_buffer_done[self.wishbone_slave.adr[0:2]]) & + (~self.master_read_buffer_read[self.wishbone_slave.adr[0:2]]), + ## use cache + NextValue(self.wishbone_slave.ack, 1), + NextValue(self.wishbone_slave.dat_r, self.master_read_buffer_data[self.wishbone_slave.adr[0:2]]), + NextValue(self.master_read_buffer_read[self.wishbone_slave.adr[0:2]], 1), + NextValue(wishbone_slave_timeout, wishbone_default_timeout) + ).Elif(~self.master_read_buffer_start, + led2.eq(1), + NextValue(self.master_read_buffer_addr, self.wishbone_slave.adr[2:30]), + NextValue(self.master_read_buffer_done[0], 0), + NextValue(self.master_read_buffer_done[1], 0), + NextValue(self.master_read_buffer_done[2], 0), + NextValue(self.master_read_buffer_done[3], 0), + NextValue(self.master_read_buffer_read[0], 0), + NextValue(self.master_read_buffer_read[1], 0), + NextValue(self.master_read_buffer_read[2], 0), + NextValue(self.master_read_buffer_read[3], 0), + NextValue(last_word_idx, self.wishbone_slave.adr[0:2]), + NextValue(self.master_read_buffer_start, 1), + NextState("WaitForData") + ) + ) + ) + wishbone_slave_buffering_fsm.act("WaitForData", + led2.eq(1), + If(self.master_read_buffer_done[last_word_idx], + NextValue(self.wishbone_slave.ack, 1), + NextValue(self.wishbone_slave.dat_r, self.master_read_buffer_data[last_word_idx]), + NextValue(self.master_read_buffer_read[last_word_idx], 1), + NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextState("Idle") + ), + If(self.wishbone_slave.err, + NextState("Idle") + ) + ) + diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 18cb782..ecf33f7 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -133,10 +133,7 @@ class SBusFPGA(SoCCore): #pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") #SBUS_DATA_OE_LED_2_o = Signal() #self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) - interrupt_memory = Signal() - self.sync += interrupt_memory.eq(interrupt_memory | self.usb_host.interrupt) - self.comb += SBUS_DATA_OE_LED_o.eq(interrupt_memory) - #self.comb += SBUS_DATA_OE_LED_2_o.eq(~SBUS_3V3_INT1s_o) + self.comb += SBUS_DATA_OE_LED_o.eq(~SBUS_3V3_INT1s_o) prom_file = "prom_migen.fc" prom_data = soc_core.get_mem_data(prom_file, "big") From 5cdec193d276f11168be8bfd6b43d353c31fb56f Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Mon, 21 Jun 2021 03:15:39 -0400 Subject: [PATCH 16/78] oups --- sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index a5c140d..ab96820 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -856,7 +856,6 @@ class SBusFPGABus(Module): self.submodules.wishbone_slave_buffering_fsm = wishbone_slave_buffering_fsm = FSM(reset_state="Reset") self.sync += led4.eq(self.master_read_buffer_start) wishbone_slave_buffering_fsm.act("Reset", - led1.eq(0), led2.eq(0), led3.eq(0), NextState("Idle") From df88e17e9dfec10faf3bcaba9221f4449d3294c3 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Mon, 21 Jun 2021 11:40:17 -0400 Subject: [PATCH 17/78] upd --- .../sbus_to_fpga_fsm.py | 92 ++++++++++++------- .../sbus_to_fpga_soc.py | 11 ++- 2 files changed, 64 insertions(+), 39 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index ab96820..e9bf9b9 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -28,6 +28,7 @@ ADDR_PFX_LENGTH = 12 #(1 + ADDR_PFX_HIGH - ADDR_PFX_LOW) ROM_ADDR_PFX = Signal(12, reset = 0) WISHBONE_CSR_ADDR_PFX = Signal(12, reset = 4) USBOHCI_ADDR_PFX = Signal(12, reset = 8) +SRAM_ADDR_PFX = Signal(12, reset = 9) wishbone_default_timeout = 63 sbus_default_timeout = 63 @@ -84,10 +85,10 @@ class LedDisplay(Module): NextValue(old_value, self.value), NextState("Byte0")) fsm.act("Quick", - If (old_value != self.value, + If(old_value != self.value, NextState("Reset") ).Elif(time_counter == 0, - If (blink_counter == 0, + If(blink_counter == 0, NextValue(time_counter, 25000000//2), NextValue(self.display, self.value[0:8]), NextState("Byte0") @@ -101,7 +102,7 @@ class LedDisplay(Module): ) ) fsm.act("Byte0", - If (old_value != self.value, + If(old_value != self.value, NextState("Reset") ).Elif(time_counter == 0, NextValue(time_counter, 25000000//2), @@ -112,7 +113,7 @@ class LedDisplay(Module): ) ) fsm.act("Byte1", - If (old_value != self.value, + If(old_value != self.value, NextState("Reset") ).Elif(time_counter == 0, NextValue(time_counter, 25000000//2), @@ -123,7 +124,7 @@ class LedDisplay(Module): ) ) fsm.act("Byte2", - If (old_value != self.value, + If(old_value != self.value, NextState("Reset") ).Elif(time_counter == 0, NextValue(time_counter, 25000000//2), @@ -134,7 +135,7 @@ class LedDisplay(Module): ) ) fsm.act("Byte3", - If (old_value != self.value, + If(old_value != self.value, NextState("Reset") ).Elif(time_counter == 0, NextValue(time_counter, 25000000//2), @@ -145,7 +146,7 @@ class LedDisplay(Module): ) ) fsm.act("Byte4", - If (old_value != self.value, + If(old_value != self.value, NextState("Reset") ).Elif(time_counter == 0, NextValue(time_counter, 25000000//10), @@ -265,18 +266,23 @@ class SBusFPGABus(Module): #self.submodules.led_display = LedDisplay(platform.request_all("user_led")) - self.sync += platform.request("user_led", 0).eq(self.wishbone_slave.cyc) + #self.sync += platform.request("user_led", 0).eq(self.wishbone_slave.cyc) #self.sync += platform.request("user_led", 1).eq(self.wishbone_slave.stb) #self.sync += platform.request("user_led", 2).eq(self.wishbone_slave.we) #self.sync += platform.request("user_led", 3).eq(self.wishbone_slave.ack) #self.sync += platform.request("user_led", 4).eq(self.wishbone_slave.err) - led1 = platform.request("user_led", 1) - led2 = platform.request("user_led", 2) - led3 = platform.request("user_led", 3) - led4 = platform.request("user_led", 4) - led5 = platform.request("user_led", 5) - self.sync += platform.request("user_led", 6).eq(~SBUS_3V3_BRs_o) - self.sync += platform.request("user_led", 7).eq(~SBUS_3V3_BGs_i) + #led1 = platform.request("user_led", 0) + #led1 = platform.request("user_led", 1) + #led2 = platform.request("user_led", 2) + #led3 = platform.request("user_led", 3) + #led4 = platform.request("user_led", 4) + + self.sync += platform.request("user_led", 0).eq(self.wishbone_master.cyc) + self.sync += platform.request("user_led", 1).eq(~SBUS_3V3_SELs_i) + + #self.sync += platform.request("user_led", 5).eq(self.wishbone_slave.cyc) + #self.sync += platform.request("user_led", 6).eq(~SBUS_3V3_BRs_o) + #self.sync += platform.request("user_led", 7).eq(~SBUS_3V3_BGs_i) #cycle_counter = Signal(8, reset = 0) #self.sync += cycle_counter.eq(cycle_counter + 1) @@ -354,7 +360,8 @@ class SBusFPGABus(Module): #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 40))), NextState("Slave_Ack_Read_Prom_Burst") ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX)), + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX) | + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), If(self.wishbone_master.cyc == 0, @@ -399,8 +406,7 @@ class SBusFPGABus(Module): (SBUS_3V3_ASs_i == 0) & (siz_is_word(SBUS_3V3_SIZ_i)) & (SBUS_3V3_PPRD_i == 0) & - (SBUS_3V3_PA_i[0:2] == 0) & - (~self.wishbone_master.cyc)), + (SBUS_3V3_PA_i[0:2] == 0)), NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), NextValue(burst_counter, 0), @@ -411,11 +417,19 @@ class SBusFPGABus(Module): SIZ_BURST8: NextValue(burst_limit_m1, 7), SIZ_BURST16: NextValue(burst_limit_m1, 15)}), If(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX)), - NextValue(SBUS_3V3_ACKs_o, ACK_WORD), - NextValue(SBUS_3V3_ERRs_o, 1), - #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), - NextState("Slave_Ack_Reg_Write_Burst") + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX) | + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), + If(~self.wishbone_master.cyc, + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), + NextState("Slave_Ack_Reg_Write_Burst") + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(sbus_slave_timeout, sbus_default_timeout), + NextState("Slave_Ack_Reg_Write_Burst_Wait_For_Wishbone") + ) ).Else( #NextValue(self.led_display.value, 0x0000000060 | 0x0000000001), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), @@ -503,7 +517,6 @@ class SBusFPGABus(Module): ).Elif(~SBUS_3V3_BGs_i, ### ouch we got the bus but nothing more to do ?!? NextValue(SBUS_3V3_BRs_o, 1), - NextValue(led5, 1) ).Else( # FIXME: handle error ) @@ -632,6 +645,16 @@ class SBusFPGABus(Module): NextState("Idle") ) ) + slave_fsm.act("Slave_Ack_Reg_Write_Burst_Wait_For_Wishbone", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x68), self.led_display.value[8:40])), + If(self.wishbone_master.cyc == 0, + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextState("Slave_Ack_Reg_Write_Burst") + ).Elif(sbus_slave_timeout == 0, ### this is taking too long + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextState("Slave_Error") + ) + ) # ##### SLAVE ERROR ##### slave_fsm.act("Slave_Error", #NextValue(self.led_display.value, 0x0000000080 | self.led_display.value), @@ -644,7 +667,6 @@ class SBusFPGABus(Module): ) # ##### MASTER ##### slave_fsm.act("Master_Translation", - led1.eq(1), #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x09), self.led_display.value[8:40])), If(master_we, NextValue(sbus_oe_data, 1), @@ -788,7 +810,7 @@ class SBusFPGABus(Module): NextState("Idle") ) wishbone_master_wait_fsm.act("Idle", - If (wishbone_master_timeout != 0, + If(wishbone_master_timeout != 0, NextValue(wishbone_master_timeout, wishbone_master_timeout -1) ), If(self.wishbone_master.cyc & self.wishbone_master.stb & self.wishbone_master.we, @@ -807,7 +829,7 @@ class SBusFPGABus(Module): NextState("Idle") ) wishbone_slave_wait_fsm.act("Idle", - If (wishbone_slave_timeout != 0, + If(wishbone_slave_timeout != 0, NextValue(wishbone_slave_timeout, wishbone_slave_timeout -1) ), If(self.wishbone_slave.ack & self.wishbone_slave.we, @@ -835,7 +857,7 @@ class SBusFPGABus(Module): NextState("Idle") ) sbus_slave_wait_fsm.act("Idle", - If (sbus_slave_timeout != 0, + If(sbus_slave_timeout != 0, NextValue(sbus_slave_timeout, sbus_slave_timeout -1) ), ) @@ -846,7 +868,7 @@ class SBusFPGABus(Module): NextState("Idle") ) sbus_master_throttle_fsm.act("Idle", - If (sbus_master_throttle != 0, + If(sbus_master_throttle != 0, NextValue(sbus_master_throttle, sbus_master_throttle -1) ), ) @@ -854,10 +876,8 @@ class SBusFPGABus(Module): # ##### Slave read buffering FSM #### last_word_idx = Signal(2) self.submodules.wishbone_slave_buffering_fsm = wishbone_slave_buffering_fsm = FSM(reset_state="Reset") - self.sync += led4.eq(self.master_read_buffer_start) + #self.sync += led4.eq(self.master_read_buffer_start) wishbone_slave_buffering_fsm.act("Reset", - led2.eq(0), - led3.eq(0), NextState("Idle") ) wishbone_slave_buffering_fsm.act("Idle", @@ -867,7 +887,7 @@ class SBusFPGABus(Module): ~self.wishbone_slave.err & ~self.wishbone_slave.we & (wishbone_slave_timeout == 0), - led3.eq(1), + #led3.eq(1), If((self.master_read_buffer_addr == self.wishbone_slave.adr[2:30]) & (self.master_read_buffer_done[self.wishbone_slave.adr[0:2]]) & (~self.master_read_buffer_read[self.wishbone_slave.adr[0:2]]), @@ -877,7 +897,7 @@ class SBusFPGABus(Module): NextValue(self.master_read_buffer_read[self.wishbone_slave.adr[0:2]], 1), NextValue(wishbone_slave_timeout, wishbone_default_timeout) ).Elif(~self.master_read_buffer_start, - led2.eq(1), + #led2.eq(1), NextValue(self.master_read_buffer_addr, self.wishbone_slave.adr[2:30]), NextValue(self.master_read_buffer_done[0], 0), NextValue(self.master_read_buffer_done[1], 0), @@ -890,11 +910,13 @@ class SBusFPGABus(Module): NextValue(last_word_idx, self.wishbone_slave.adr[0:2]), NextValue(self.master_read_buffer_start, 1), NextState("WaitForData") + ).Else( + #led1.eq(self.master_read_buffer_start) ) ) ) wishbone_slave_buffering_fsm.act("WaitForData", - led2.eq(1), + #led2.eq(1), If(self.master_read_buffer_done[last_word_idx], NextValue(self.wishbone_slave.ack, 1), NextValue(self.wishbone_slave.dat_r, self.master_read_buffer_data[last_word_idx]), diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index ecf33f7..495f0fe 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -103,10 +103,11 @@ class SBusFPGA(SoCCore): self.platform.add_extension(_usb_io) SoCCore.__init__(self, platform=platform, sys_clk_freq=sys_clk_freq, clk_freq=sys_clk_freq, **kwargs) wb_mem_map = { - "prom": 0x00000000, - "csr" : 0x00040000, - "usb_host": 0x00080000, - "usb_fake_dma": 0xfc000000, + "prom": 0x00000000, + "csr" : 0x00040000, + "usb_host": 0x00080000, + "usb_shared_mem": 0x00090000, + "usb_fake_dma": 0xfc000000, } self.mem_map.update(wb_mem_map) self.submodules.crg = _CRG(platform=platform, sys_clk_freq=sys_clk_freq) @@ -119,6 +120,8 @@ class SBusFPGA(SoCCore): self.add_usb_host(pads=platform.request("usb"), usb_clk_freq=48e6) #self.comb += self.cpu.interrupt[16].eq(self.usb_host.interrupt) #fixme: need to deal with interrupts + + self.add_ram(name="usb_shared_mem", origin=self.mem_map["usb_shared_mem"], size=2**16) pad_SBUS_3V3_INT1s = platform.request("SBUS_3V3_INT1s") SBUS_3V3_INT1s_o = Signal(reset=1) From c8baa1fb6ef2f55990b34b829224f06b3aade6a5 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Mon, 21 Jun 2021 12:54:10 -0400 Subject: [PATCH 18/78] Byte support, needed; still need HWord... --- .../sbus_to_fpga_fsm.py | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index e9bf9b9..b6c0dc0 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -396,6 +396,23 @@ class SBusFPGABus(Module): NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 80))), NextState("Slave_Ack_Read_Prom_Byte") + ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet + NextValue(SBUS_3V3_ERRs_o, 1), + If(self.wishbone_master.cyc == 0, + NextValue(self.wishbone_master.cyc, 1), + NextValue(self.wishbone_master.stb, 1), + NextValue(self.wishbone_master.sel, 2**len(self.wishbone_master.sel)-1), + NextValue(self.wishbone_master.we, 0), + NextValue(self.wishbone_master.adr, Cat(SBUS_3V3_PA_i[2:28], Signal(4, reset = 0))), + NextValue(wishbone_master_timeout, wishbone_default_timeout), + NextValue(sbus_slave_timeout, sbus_default_timeout), + #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), + NextState("Slave_Ack_Read_Reg_Byte_Wait_For_Data") + ).Else( + NextValue(sbus_slave_timeout, sbus_default_timeout), + NextState("Slave_Ack_Read_Reg_Byte_Wait_For_Wishbone") + ) ).Else( #NextValue(self.led_display.value, 0x0000000040 | 0x0000000001), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), @@ -436,6 +453,30 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") ) + ).Elif(((SBUS_3V3_SELs_i == 0) & + (SBUS_3V3_ASs_i == 0) & + (SIZ_BYTE == SBUS_3V3_SIZ_i) & + (SBUS_3V3_PPRD_i == 0)), + NextValue(sbus_oe_master_in, 1), + NextValue(sbus_last_pa, SBUS_3V3_PA_i), + If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), + If(~self.wishbone_master.cyc, + NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), + NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), + NextState("Slave_Ack_Reg_Write_Byte") + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(sbus_slave_timeout, sbus_default_timeout), + NextState("Slave_Ack_Reg_Write_Byte_Wait_For_Wishbone") + ) + ).Else( + #NextValue(self.led_display.value, 0x0000000060 | 0x0000000001), + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Error") + ) ).Elif(SBUS_3V3_BGs_i & self.wishbone_slave.cyc & self.wishbone_slave.stb & @@ -522,6 +563,7 @@ class SBusFPGABus(Module): ) ) # ##### SLAVE READ ##### + # ## BURST (1->16 words) ## slave_fsm.act("Slave_Ack_Read_Prom_Burst", #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x02), self.led_display.value[8:40])), NextValue(sbus_oe_data, 1), @@ -615,7 +657,55 @@ class SBusFPGABus(Module): NextState("Slave_Error") ) ) + # ## BYTE + slave_fsm.act("Slave_Ack_Read_Reg_Byte", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x05), self.led_display.value[8:40])), + NextValue(sbus_oe_data, 1), + NextValue(SBUS_3V3_D_o, p_data), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextState("Slave_Do_Read") + ) + slave_fsm.act("Slave_Ack_Read_Reg_Byte_Wait_For_Data", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x06), self.led_display.value[8:40])), + If(self.wishbone_master.ack, + Case(sbus_last_pa[ADDR_PHYS_LOW:ADDR_PHYS_LOW+2], { + 0: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[24:32])), + 1: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[16:24])), + 2: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[ 8:16])), + 3: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[ 0: 8])), + }), + NextValue(self.wishbone_master.cyc, 0), + NextValue(self.wishbone_master.stb, 0), + NextValue(wishbone_master_timeout, 0), + NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), + NextState("Slave_Ack_Read_Reg_Byte") + ).Elif(sbus_slave_timeout == 0, ### this is taking too long + NextValue(self.wishbone_master.cyc, 0), ## abort transaction + NextValue(self.wishbone_master.stb, 0), + NextValue(wishbone_master_timeout, 0), + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextState("Slave_Error") + ) + ) + slave_fsm.act("Slave_Ack_Read_Reg_Byte_Wait_For_Wishbone", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x68), self.led_display.value[8:40])), + If(self.wishbone_master.cyc == 0, + NextValue(self.wishbone_master.cyc, 1), + NextValue(self.wishbone_master.stb, 1), + NextValue(self.wishbone_master.sel, 2**len(self.wishbone_master.sel)-1), + NextValue(self.wishbone_master.we, 0), + NextValue(self.wishbone_master.adr, Cat(sbus_last_pa[2:28], Signal(4, reset = 0))), + NextValue(wishbone_master_timeout, wishbone_default_timeout), + NextValue(sbus_slave_timeout, sbus_slave_timeout), + #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), + NextState("Slave_Ack_Read_Reg_Byte_Wait_For_Data") + ).Elif(sbus_slave_timeout == 0, ### this is taking too long + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextState("Slave_Error") + ) + ) # ##### SLAVE WRITE ##### + # ## BURST (1->16 words) ## slave_fsm.act("Slave_Ack_Reg_Write_Burst", #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x07), self.led_display.value[8:40])), NextValue(self.wishbone_master.cyc, 1), @@ -655,6 +745,36 @@ class SBusFPGABus(Module): NextState("Slave_Error") ) ) + # ## BYTE + slave_fsm.act("Slave_Ack_Reg_Write_Byte", + NextValue(self.wishbone_master.cyc, 1), + NextValue(self.wishbone_master.stb, 1), + Case(sbus_last_pa[ADDR_PHYS_LOW:ADDR_PHYS_LOW+2], { + 0: NextValue(self.wishbone_master.sel, 0x8), + 1: NextValue(self.wishbone_master.sel, 0x4), + 2: NextValue(self.wishbone_master.sel, 0x2), + 3: NextValue(self.wishbone_master.sel, 0x1), + }), + NextValue(self.wishbone_master.adr, Cat(sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6], # 4 bits, adr FIXME + sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], # 10 bits, adr + sbus_last_pa[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH], # 12 bits, adr + Signal(4, reset = 0))), + NextValue(self.wishbone_master.dat_w, + Cat(SBUS_3V3_D_i[24:32], SBUS_3V3_D_i[24:32], SBUS_3V3_D_i[24:32], SBUS_3V3_D_i[24:32])), + NextValue(self.wishbone_master.we, 1), + NextValue(wishbone_master_timeout, wishbone_default_timeout), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextState("Slave_Ack_Reg_Write_Final") + ) + slave_fsm.act("Slave_Ack_Reg_Write_Byte_Wait_For_Wishbone", + If(self.wishbone_master.cyc == 0, + NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), + NextState("Slave_Ack_Reg_Write_Byte") + ).Elif(sbus_slave_timeout == 0, ### this is taking too long + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextState("Slave_Error") + ) + ) # ##### SLAVE ERROR ##### slave_fsm.act("Slave_Error", #NextValue(self.led_display.value, 0x0000000080 | self.led_display.value), From b6bbbd05e87de03ab2c6b84d830ce47af40a7ebb Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Mon, 21 Jun 2021 13:17:08 -0400 Subject: [PATCH 19/78] HWord support, needed --- .../sbus_to_fpga_fsm.py | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index b6c0dc0..66bde47 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -419,6 +419,35 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") ) + ).Elif(((SBUS_3V3_SELs_i == 0) & + (SBUS_3V3_ASs_i == 0) & + (SIZ_HWORD == SBUS_3V3_SIZ_i) & + (SBUS_3V3_PPRD_i == 1)), + NextValue(sbus_oe_master_in, 1), + NextValue(sbus_last_pa, SBUS_3V3_PA_i), + If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet + NextValue(SBUS_3V3_ERRs_o, 1), + If(self.wishbone_master.cyc == 0, + NextValue(self.wishbone_master.cyc, 1), + NextValue(self.wishbone_master.stb, 1), + NextValue(self.wishbone_master.sel, 2**len(self.wishbone_master.sel)-1), + NextValue(self.wishbone_master.we, 0), + NextValue(self.wishbone_master.adr, Cat(SBUS_3V3_PA_i[2:28], Signal(4, reset = 0))), + NextValue(wishbone_master_timeout, wishbone_default_timeout), + NextValue(sbus_slave_timeout, sbus_default_timeout), + #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), + NextState("Slave_Ack_Read_Reg_HWord_Wait_For_Data") + ).Else( + NextValue(sbus_slave_timeout, sbus_default_timeout), + NextState("Slave_Ack_Read_Reg_HWord_Wait_For_Wishbone") + ) + ).Else( + #NextValue(self.led_display.value, 0x0000000040 | 0x0000000001), + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Error") + ) ).Elif(((SBUS_3V3_SELs_i == 0) & (SBUS_3V3_ASs_i == 0) & (siz_is_word(SBUS_3V3_SIZ_i)) & @@ -477,6 +506,30 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ERRs_o, 1), NextState("Slave_Error") ) + ).Elif(((SBUS_3V3_SELs_i == 0) & + (SBUS_3V3_ASs_i == 0) & + (SIZ_HWORD == SBUS_3V3_SIZ_i) & + (SBUS_3V3_PPRD_i == 0)), + NextValue(sbus_oe_master_in, 1), + NextValue(sbus_last_pa, SBUS_3V3_PA_i), + If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), + If(~self.wishbone_master.cyc, + NextValue(SBUS_3V3_ACKs_o, ACK_HWORD), + NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), + NextState("Slave_Ack_Reg_Write_HWord") + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(sbus_slave_timeout, sbus_default_timeout), + NextState("Slave_Ack_Reg_Write_HWord_Wait_For_Wishbone") + ) + ).Else( + #NextValue(self.led_display.value, 0x0000000060 | 0x0000000001), + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Error") + ) ).Elif(SBUS_3V3_BGs_i & self.wishbone_slave.cyc & self.wishbone_slave.stb & @@ -657,6 +710,51 @@ class SBusFPGABus(Module): NextState("Slave_Error") ) ) + # ## HWORD + slave_fsm.act("Slave_Ack_Read_Reg_HWord", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x05), self.led_display.value[8:40])), + NextValue(sbus_oe_data, 1), + NextValue(SBUS_3V3_D_o, p_data), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextState("Slave_Do_Read") + ) + slave_fsm.act("Slave_Ack_Read_Reg_HWord_Wait_For_Data", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x06), self.led_display.value[8:40])), + If(self.wishbone_master.ack, + Case(sbus_last_pa[ADDR_PHYS_LOW+1:ADDR_PHYS_LOW+2], { + 0: NextValue(p_data, Cat(Signal(16, reset = 0), self.wishbone_master.dat_r[16:32])), + 1: NextValue(p_data, Cat(Signal(16, reset = 0), self.wishbone_master.dat_r[ 0:16])), + }), + NextValue(self.wishbone_master.cyc, 0), + NextValue(self.wishbone_master.stb, 0), + NextValue(wishbone_master_timeout, 0), + NextValue(SBUS_3V3_ACKs_o, ACK_HWORD), + NextState("Slave_Ack_Read_Reg_HWord") + ).Elif(sbus_slave_timeout == 0, ### this is taking too long + NextValue(self.wishbone_master.cyc, 0), ## abort transaction + NextValue(self.wishbone_master.stb, 0), + NextValue(wishbone_master_timeout, 0), + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextState("Slave_Error") + ) + ) + slave_fsm.act("Slave_Ack_Read_Reg_HWord_Wait_For_Wishbone", + #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x68), self.led_display.value[8:40])), + If(self.wishbone_master.cyc == 0, + NextValue(self.wishbone_master.cyc, 1), + NextValue(self.wishbone_master.stb, 1), + NextValue(self.wishbone_master.sel, 2**len(self.wishbone_master.sel)-1), + NextValue(self.wishbone_master.we, 0), + NextValue(self.wishbone_master.adr, Cat(sbus_last_pa[2:28], Signal(4, reset = 0))), + NextValue(wishbone_master_timeout, wishbone_default_timeout), + NextValue(sbus_slave_timeout, sbus_slave_timeout), + #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), + NextState("Slave_Ack_Read_Reg_HWord_Wait_For_Data") + ).Elif(sbus_slave_timeout == 0, ### this is taking too long + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextState("Slave_Error") + ) + ) # ## BYTE slave_fsm.act("Slave_Ack_Read_Reg_Byte", #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x05), self.led_display.value[8:40])), @@ -745,6 +843,34 @@ class SBusFPGABus(Module): NextState("Slave_Error") ) ) + # ## HWORD + slave_fsm.act("Slave_Ack_Reg_Write_HWord", + NextValue(self.wishbone_master.cyc, 1), + NextValue(self.wishbone_master.stb, 1), + Case(sbus_last_pa[ADDR_PHYS_LOW+1:ADDR_PHYS_LOW+2], { + 0: NextValue(self.wishbone_master.sel, 0xc), + 1: NextValue(self.wishbone_master.sel, 0x3), + }), + NextValue(self.wishbone_master.adr, Cat(sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6], # 4 bits, adr FIXME + sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], # 10 bits, adr + sbus_last_pa[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH], # 12 bits, adr + Signal(4, reset = 0))), + NextValue(self.wishbone_master.dat_w, + Cat(SBUS_3V3_D_i[16:32], SBUS_3V3_D_i[16:32])), + NextValue(self.wishbone_master.we, 1), + NextValue(wishbone_master_timeout, wishbone_default_timeout), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextState("Slave_Ack_Reg_Write_Final") + ) + slave_fsm.act("Slave_Ack_Reg_Write_HWord_Wait_For_Wishbone", + If(self.wishbone_master.cyc == 0, + NextValue(SBUS_3V3_ACKs_o, ACK_HWORD), + NextState("Slave_Ack_Reg_Write_HWord") + ).Elif(sbus_slave_timeout == 0, ### this is taking too long + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextState("Slave_Error") + ) + ) # ## BYTE slave_fsm.act("Slave_Ack_Reg_Write_Byte", NextValue(self.wishbone_master.cyc, 1), From da0c49062dfde8fcd550d6303968e23cf818f3a3 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Tue, 22 Jun 2021 14:56:51 -0400 Subject: [PATCH 20/78] deleted --- sbus-to-ztex-gateware-migen/sbus-to-fpga.py | 174 ------ .../sbus_to_fpga_slave.py | 501 ------------------ 2 files changed, 675 deletions(-) delete mode 100644 sbus-to-ztex-gateware-migen/sbus-to-fpga.py delete mode 100644 sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py diff --git a/sbus-to-ztex-gateware-migen/sbus-to-fpga.py b/sbus-to-ztex-gateware-migen/sbus-to-fpga.py deleted file mode 100644 index 28b2c3e..0000000 --- a/sbus-to-ztex-gateware-migen/sbus-to-fpga.py +++ /dev/null @@ -1,174 +0,0 @@ -import os -import argparse -from migen import * -import litex -from litex.build.generic_platform import * -from litex.build.xilinx.vivado import vivado_build_args, vivado_build_argdict -from litex.soc.integration.soc import * -from litex.soc.integration.soc_core import * -from litex.soc.integration.builder import * -from litex.soc.cores.clock import * -from litex.soc.cores.led import LedChaser -from litex_boards.platforms import ztex213 -from migen.genlib.fifo import * - -from sbus_to_fpga_slave import *; -from sbus_to_fpga_wishbone import *; - -_sbus_sbus = [ - ("SBUS_3V3_CLK", 0, Pins("D15"), IOStandard("lvttl")), - ("SBUS_3V3_ASs", 0, Pins("T4"), IOStandard("lvttl")), - ("SBUS_3V3_BGs", 0, Pins("T6"), IOStandard("lvttl")), - ("SBUS_3V3_BRs", 0, Pins("R6"), IOStandard("lvttl")), - ("SBUS_3V3_ERRs", 0, Pins("V2"), IOStandard("lvttl")), - ("SBUS_DATA_OE_LED", 0, Pins("U1"), IOStandard("lvttl")), - ("SBUS_DATA_OE_LED_2", 0, Pins("T3"), IOStandard("lvttl")), - ("SBUS_3V3_RSTs", 0, Pins("U2"), IOStandard("lvttl")), - ("SBUS_3V3_SELs", 0, Pins("K6"), IOStandard("lvttl")), - ("SBUS_3V3_INT1s", 0, Pins("R3"), IOStandard("lvttl")), - ("SBUS_3V3_INT7s", 0, Pins("N5"), IOStandard("lvttl")), - ("SBUS_3V3_PPRD", 0, Pins("N6"), IOStandard("lvttl")), - ("SBUS_OE", 0, Pins("P5"), IOStandard("lvttl")), - ("SBUS_3V3_ACKs", 0, Pins("M6 L6 N4"), IOStandard("lvttl")), - ("SBUS_3V3_SIZ", 0, Pins("R7 U3 V1"), IOStandard("lvttl")), - ("SBUS_3V3_D", 0, Pins("J18 K16 J17 K15 K13 J15 J13 J14 H14 H17 G14 G17 G16 G18 H16 F18 F16 E18 F15 D18 E17 G13 D17 F13 F14 E16 E15 C17 C16 A18 B18 C15"), IOStandard("lvttl")), - ("SBUS_3V3_PA", 0, Pins("B16 B17 D14 C14 D12 A16 A15 B14 B13 B12 C12 A14 A13 B11 A11 M4 R2 M3 P2 M2 N2 K5 N1 L4 M1 L3 L1 K3"), IOStandard("lvttl")), -] -# CRG ---------------------------------------------------------------------------------------------- - -class _CRG(Module): - def __init__(self, platform, sys_clk_freq): - self.clock_domains.cd_sys = ClockDomain() - self.clock_domains.cd_native = ClockDomain(reset_less=True) - self.clock_domains.cd_sbus = ClockDomain() - self.clock_domains.cd_por = ClockDomain() - - # # # - clk48 = platform.request("clk48") - self.cd_native.clk = clk48 - clk_sbus = platform.request("SBUS_3V3_CLK") - self.cd_sbus.clk = clk_sbus - rst_sbus = platform.request("SBUS_3V3_RSTs") - - self.comb += self.cd_sbus.rst.eq(~rst_sbus) - - self.submodules.pll = pll = S7MMCM(speedgrade=-1) - pll.register_clkin(clk48, 48e6) - pll.create_clkout(self.cd_sys, sys_clk_freq) - - platform.add_false_path_constraints(self.cd_native.clk, self.cd_sbus.clk) - platform.add_false_path_constraints(self.cd_sys.clk, self.cd_sbus.clk) - platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_native.clk) - platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_sys.clk) - - # Power on reset, reset propagate from SBus to SYS - por_count = Signal(16, reset=2**16-1) - por_done = Signal() - self.comb += self.cd_por.clk.eq(clk48) - self.comb += por_done.eq(por_count == 0) - self.sync.por += If(~por_done, por_count.eq(por_count - 1)) - self.comb += pll.reset.eq(~por_done | ~rst_sbus) - -class SBusFPGA(SoCCore): - def __init__(self, **kwargs): - - kwargs["cpu_type"] = "None" - kwargs["integrated_sram_size"] = 0 - kwargs["with_uart"] = True - kwargs["with_timer"] = False - - self.sys_clk_freq = sys_clk_freq = 100e6 - - self.platform = platform = ztex213.Platform(variant="ztex2.13a", expansion="sbus") - self.platform.add_extension(_sbus_sbus) - SoCCore.__init__(self, platform=platform, sys_clk_freq=sys_clk_freq, clk_freq=sys_clk_freq, **kwargs) - wb_mem_map = { - "prom": 0x00000000, - "csr" : 0x00040000, - } - self.mem_map.update(wb_mem_map) - self.submodules.crg = _CRG(platform=platform, sys_clk_freq=sys_clk_freq) - self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) # SBus max - - self.submodules.leds = LedChaser( - pads = platform.request_all("user_led"), - sys_clk_freq = sys_clk_freq) - self.add_csr("leds") - - prom_file = "prom_mini.fc" - prom_data = soc_core.get_mem_data(prom_file, "big") - prom = Array(prom_data) - #print("\n****************************************\n") - #for i in range(len(prom)): - # print(hex(prom[i])) - #print("\n****************************************\n") - self.add_ram("prom", origin=self.mem_map["prom"], size=2**16, contents=prom_data, mode="r") # for show - #getattr(self,"prom").mem.init = prom_data - #getattr(self,"prom").mem.depth = 2**14 - - # don't enable anything on the SBus side for 20 seconds after power up - # this avoids FPGA initialization messing with the cold boot process - # requires us to reset the SPARCstation afterward so the FPGA board - # is properly identified - # This is in the 'native' ClockDomain that is never reset - hold_reset_ctr = Signal(30, reset=960000000) - self.sync.native += If(hold_reset_ctr>0, hold_reset_ctr.eq(hold_reset_ctr - 1)) - hold_reset = Signal(reset=1) - self.comb += hold_reset.eq(~(hold_reset_ctr == 0)) - - - - - sbus_to_wishbone_wr_fifo = AsyncFIFOBuffered(width=32+30, depth=16) - sbus_to_wishbone_wr_fifo = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(sbus_to_wishbone_wr_fifo) - self.submodules += sbus_to_wishbone_wr_fifo - - sbus_to_wishbone_rd_fifo_addr = AsyncFIFOBuffered(width=30, depth=16) - sbus_to_wishbone_rd_fifo_addr = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(sbus_to_wishbone_rd_fifo_addr) - self.submodules += sbus_to_wishbone_rd_fifo_addr - sbus_to_wishbone_rd_fifo_data = AsyncFIFOBuffered(width=32, depth=16) - sbus_to_wishbone_rd_fifo_data = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(sbus_to_wishbone_rd_fifo_data) - self.submodules += sbus_to_wishbone_rd_fifo_data - - self.submodules.sbus_to_wishbone = SBusToWishbone(wr_fifo=sbus_to_wishbone_wr_fifo, - rd_fifo_addr=sbus_to_wishbone_rd_fifo_addr, - rd_fifo_data=sbus_to_wishbone_rd_fifo_data, - wishbone=wishbone.Interface(data_width=self.bus.data_width)) - - _sbus_slave = SBusFPGASlave(platform=self.platform, - prom=prom, - hold_reset=hold_reset, - wr_fifo=sbus_to_wishbone_wr_fifo, - rd_fifo_addr=sbus_to_wishbone_rd_fifo_addr, - rd_fifo_data=sbus_to_wishbone_rd_fifo_data,) - self.submodules.sbus_slave = ClockDomainsRenamer("sbus")(_sbus_slave) - - self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_to_wishbone.wishbone) - - # self.soc = Module() - # self.soc.mem_regions = self.mem_regions = {} - # region = litex.soc.integration.soc.SoCRegion(origin=0x0, size=0x0) - # region.length = 0 - # self.mem_regions['csr'] = region - # self.soc.constants = self.constants = {} - # self.soc.csr_regions = self.csr_regions = {} - # self.soc.cpu_type = self.cpu_type = None - -# def do_finalize(self): -# self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) - -def main(): - parser = argparse.ArgumentParser(description="SbusFPGA") - parser.add_argument("--build", action="store_true", help="Build bitstream") - builder_args(parser) - vivado_build_args(parser) - args = parser.parse_args() - - soc = SBusFPGA(**soc_core_argdict(args)) - #soc.add_uart(name="uart", baudrate=115200, fifo_depth=16) - - builder = Builder(soc, **builder_argdict(args)) - builder.build(**vivado_build_argdict(args), run=args.build) - -if __name__ == "__main__": - main() diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py deleted file mode 100644 index e83b6a5..0000000 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_slave.py +++ /dev/null @@ -1,501 +0,0 @@ - -from migen import * -from migen.fhdl.specials import Tristate - -SIZ_WORD = 0x0 -SIZ_BYTE = 0x1 -SIZ_HWORD = 0x2 -SIZ_EXT = 0x3 -SIZ_BURST4 = 0x4 -SIZ_BURST8 = 0x5 -SIZ_BURST16 = 0x6 -SIZ_BURST2 = 0x7 - -ACK_IDLE = 0x7 -ACK_ERR = 0x6 -ACK_BYTE = 0x5 -ACK_RERUN = 0x4 -ACK_WORD = 0x3 -ACK_DWORD = 0x2 -ACK_HWORD = 0x1 -ACK_RECV = 0x0 - -ADDR_PHYS_HIGH = 27 -ADDR_PHYS_LOW = 0 -ADDR_PFX_HIGH = ADDR_PHYS_HIGH -ADDR_PFX_LOW = 16 ## 64 KiB per prefix -ADDR_PFX_LENGTH = 12 #(1 + ADDR_PFX_HIGH - ADDR_PFX_LOW) -ROM_ADDR_PFX = Signal(12, reset = 0) -WISHBONE_CSR_ADDR_PFX = Signal(12, reset = 4) - -def siz_is_word(siz): - return (SIZ_WORD == siz) | (SIZ_BURST2 == siz) | (SIZ_BURST4 == siz) | (SIZ_BURST8 == siz) | (SIZ_BURST16 == siz) - -# FIXME: this doesn't work. Verilog aways use value[0:4] -#def _index_with_wrap(counter, limit_m1, value): -# if (limit_m1 == 0): -# return value[0:4] -# elif (limit_m1 == 1): -# return Cat((value + counter)[0:1], value[1:4]) -# elif (limit_m1 == 3): -# return Cat((value + counter)[0:2], value[2:4]) -# elif (limit_m1 == 7): -# return Cat((value + counter)[0:3], value[3:4]) -# elif (limit_m1 == 15): -# return (value + counter)[0:4] -# return value[0:4] - -def index_with_wrap(counter, limit_m1, value): - return ((value+counter) & limit_m1)[0:4] | (value&(~limit_m1))[0:4] - -# FIXME: this doesn't work. Verilog aways use 1 -def siz_to_burst_size_m1(siz): - if (SIZ_WORD == siz): - return 0 - elif (SIZ_BURST2 == siz): - return 1 - elif (SIZ_BURST4 == siz): - return 3 - elif (SIZ_BURST8 == siz): - return 7 - elif (SIZ_BURST16 == siz): - return 15 - return 1 - -class LedDisplay(Module): - def __init__(self, pads): - n = len(pads) - self.value = Signal(32, reset = 0x18244281) - old_value = Signal(32) - display = Signal(8) - - self.submodules.fsm = fsm = FSM(reset_state="Reset") - time_counter = Signal(32, reset = 0) - blink_counter = Signal(4, reset = 0) - self.comb += pads.eq(display) - fsm.act("Reset", - NextValue(time_counter, 25000000//10), - NextValue(blink_counter, 10), - NextValue(display, 0x00), - NextValue(old_value, self.value), - NextState("Quick")) - fsm.act("Quick", - If (old_value != self.value, - NextState("Reset") - ).Elif(time_counter == 0, - If (blink_counter == 0, - NextValue(time_counter, 25000000//2), - NextValue(display, self.value[0:8]), - NextState("Byte0") - ).Else( - NextValue(display, ~display), - NextValue(time_counter, 25000000//10), - NextValue(blink_counter, blink_counter - 1) - ) - ).Else( - NextValue(time_counter, time_counter - 1) - ) - ) - fsm.act("Byte0", - If (old_value != self.value, - NextState("Reset") - ).Elif(time_counter == 0, - NextValue(time_counter, 25000000//2), - NextValue(display, self.value[8:16]), - NextState("Byte1") - ).Else( - NextValue(time_counter, time_counter - 1) - ) - ) - fsm.act("Byte1", - If (old_value != self.value, - NextState("Reset") - ).Elif(time_counter == 0, - NextValue(time_counter, 25000000//2), - NextValue(display, self.value[16:24]), - NextState("Byte2") - ).Else( - NextValue(time_counter, time_counter - 1) - ) - ) - fsm.act("Byte2", - If (old_value != self.value, - NextState("Reset") - ).Elif(time_counter == 0, - NextValue(time_counter, 25000000//2), - NextValue(display, self.value[24:32]), - NextState("Byte3") - ).Else( - NextValue(time_counter, time_counter - 1) - ) - ) - fsm.act("Byte3", - If (old_value != self.value, - NextState("Reset") - ).Elif(time_counter == 0, - NextValue(time_counter, 25000000//10), - NextValue(blink_counter, 10), - NextValue(display, 0x00), - NextState("Quick") - ).Else( - NextValue(time_counter, time_counter - 1) - ) - ) - -class SBusFPGASlave(Module): - def __init__(self, platform, prom, hold_reset, wr_fifo, rd_fifo_addr, rd_fifo_data): - self.platform = platform - self.hold_reset = hold_reset - self.wr_fifo = wr_fifo - self.rd_fifo_addr = rd_fifo_addr - self.rd_fifo_data = rd_fifo_data - - #self.submodules.led_display = LedDisplay(pads=platform.request_all("user_led")) - - #pad_SBUS_3V3_CLK = platform.request("SBUS_3V3_CLK") - pad_SBUS_3V3_ASs = platform.request("SBUS_3V3_ASs") - pad_SBUS_3V3_BGs = platform.request("SBUS_3V3_BGs") - pad_SBUS_3V3_BRs = platform.request("SBUS_3V3_BRs") - pad_SBUS_3V3_ERRs = platform.request("SBUS_3V3_ERRs") - pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") - pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") - #pad_SBUS_3V3_RSTs = platform.request("SBUS_3V3_RSTs") - pad_SBUS_3V3_SELs = platform.request("SBUS_3V3_SELs") - pad_SBUS_3V3_INT1s = platform.request("SBUS_3V3_INT1s") - pad_SBUS_3V3_INT7s = platform.request("SBUS_3V3_INT7s") - pad_SBUS_3V3_PPRD = platform.request("SBUS_3V3_PPRD") - pad_SBUS_OE = platform.request("SBUS_OE") - pad_SBUS_3V3_ACKs = platform.request("SBUS_3V3_ACKs") - pad_SBUS_3V3_SIZ = platform.request("SBUS_3V3_SIZ") - pad_SBUS_3V3_D = platform.request("SBUS_3V3_D") - pad_SBUS_3V3_PA = platform.request("SBUS_3V3_PA") - assert len(pad_SBUS_3V3_D) == 32, "len(pad_SBUS_3V3_D) should be 32" - assert len(pad_SBUS_3V3_PA) == 28, "len(pad_SBUS_3V3_PA) should be 28" - - #leds = Signal(8, reset=0xF0) - #self.comb += platform.request("user_led", 0).eq(leds[0]) - #self.comb += platform.request("user_led", 1).eq(leds[1]) - #self.comb += platform.request("user_led", 2).eq(leds[2]) - #self.comb += platform.request("user_led", 3).eq(leds[3]) - #self.comb += platform.request("user_led", 4).eq(leds[4]) - #self.comb += platform.request("user_led", 5).eq(leds[5]) - #self.comb += platform.request("user_led", 6).eq(leds[6]) - #self.comb += platform.request("user_led", 7).eq(leds[7]) - - sbus_oe_data = Signal(reset=0) - sbus_oe_slave_in = Signal(reset=0) - sbus_oe_master_in = Signal(reset=0) - sbus_oe_int1 = Signal(reset=0) - sbus_oe_int7 = Signal(reset=0) - sbus_oe_master_br = Signal(reset=0) - - sbus_last_pa = Signal(28) - burst_index = Signal(4) - burst_counter = Signal(4) - burst_limit_m1 = Signal(4) - - #SBUS_3V3_CLK = Signal() - SBUS_3V3_ASs_i = Signal() - self.comb += SBUS_3V3_ASs_i.eq(pad_SBUS_3V3_ASs) - SBUS_3V3_BGs_i = Signal() - self.comb += SBUS_3V3_BGs_i.eq(pad_SBUS_3V3_BGs) - SBUS_3V3_BRs_o = Signal(reset=1) - self.specials += Tristate(pad_SBUS_3V3_BRs, SBUS_3V3_BRs_o, sbus_oe_master_br, None) - SBUS_3V3_ERRs_i = Signal() - SBUS_3V3_ERRs_o = Signal() - self.specials += Tristate(pad_SBUS_3V3_ERRs, SBUS_3V3_ERRs_o, sbus_oe_master_in, SBUS_3V3_ERRs_i) - SBUS_DATA_OE_LED_o = Signal() - self.comb += pad_SBUS_DATA_OE_LED.eq(SBUS_DATA_OE_LED_o) - SBUS_DATA_OE_LED_2_o = Signal() - self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) - #SBUS_3V3_RSTs = Signal() - SBUS_3V3_SELs_i = Signal() - self.comb += SBUS_3V3_SELs_i.eq(pad_SBUS_3V3_SELs) - SBUS_3V3_INT1s_o = Signal(reset=1) - self.specials += Tristate(pad_SBUS_3V3_INT1s, SBUS_3V3_INT1s_o, sbus_oe_int1, None) - SBUS_3V3_INT7s_o = Signal(reset=1) - self.specials += Tristate(pad_SBUS_3V3_INT7s, SBUS_3V3_INT7s_o, sbus_oe_int7, None) - SBUS_3V3_PPRD_i = Signal() - SBUS_3V3_PPRD_o = Signal() - self.specials += Tristate(pad_SBUS_3V3_PPRD, SBUS_3V3_PPRD_o, sbus_oe_slave_in, SBUS_3V3_PPRD_i) - #SBUS_OE_o = Signal() - self.comb += pad_SBUS_OE.eq(self.hold_reset) - SBUS_3V3_ACKs_i = Signal(3) - SBUS_3V3_ACKs_o = Signal(3) - self.specials += Tristate(pad_SBUS_3V3_ACKs, SBUS_3V3_ACKs_o, sbus_oe_master_in, SBUS_3V3_ACKs_i) - SBUS_3V3_SIZ_i = Signal(3) - SBUS_3V3_SIZ_o = Signal(3) - self.specials += Tristate(pad_SBUS_3V3_SIZ, SBUS_3V3_SIZ_o, sbus_oe_slave_in, SBUS_3V3_SIZ_i) - SBUS_3V3_D_i = Signal(32) - SBUS_3V3_D_o = Signal(32) - self.specials += Tristate(pad_SBUS_3V3_D, SBUS_3V3_D_o, sbus_oe_data, SBUS_3V3_D_i) - SBUS_3V3_PA_i = Signal(28) - self.comb += SBUS_3V3_PA_i.eq(pad_SBUS_3V3_PA) - - p_data = Signal(32) # data to read/write - - data_read_addr = Signal(30) # first addr of req. when reading from WB - data_read_enable = Signal() # start enqueuing req. to read from WB - - self.submodules.slave_fsm = slave_fsm = FSM(reset_state="Reset") - - slave_fsm.act("Reset", - #NextValue(SBUS_DATA_OE_LED_o, 0), - #NextValue(SBUS_DATA_OE_LED_2_o, 0), - NextValue(sbus_oe_int1, 0), - NextValue(sbus_oe_int7, 0), - NextValue(sbus_oe_data, 0), - NextValue(sbus_oe_slave_in, 0), - NextValue(sbus_oe_master_in, 0), - NextValue(sbus_oe_master_br, 0), - NextValue(p_data, 0), - #NextValue(leds, 0x0F), - NextState("Start") - ) - slave_fsm.act("Start", - #NextValue(SBUS_DATA_OE_LED_o, 0), - #NextValue(SBUS_DATA_OE_LED_2_o, 0), - NextValue(sbus_oe_int1, 0), - NextValue(sbus_oe_int7, 0), - NextValue(sbus_oe_data, 0), - NextValue(sbus_oe_slave_in, 0), - NextValue(sbus_oe_master_in, 0), - NextValue(sbus_oe_master_br, 0), - NextValue(p_data, 0), - #NextValue(leds, 0x01), - If((self.hold_reset == 0), NextState("Idle")) - ) - slave_fsm.act("Idle", - #NextValue(leds, 0x11), - If(((SBUS_3V3_SELs_i == 0) & - (SBUS_3V3_ASs_i == 0) & - (siz_is_word(SBUS_3V3_SIZ_i)) & - (SBUS_3V3_PPRD_i == 1) & - (SBUS_3V3_PA_i[0:2] == 0)), - #NextValue(SBUS_DATA_OE_LED_o, 1), - #NextValue(SBUS_DATA_OE_LED_2_o, 0), - NextValue(sbus_oe_master_in, 1), - NextValue(sbus_last_pa, SBUS_3V3_PA_i), - NextValue(burst_counter, 0), - Case(SBUS_3V3_SIZ_i, { - SIZ_WORD: NextValue(burst_limit_m1, 0), - SIZ_BURST2: NextValue(burst_limit_m1, 1), - SIZ_BURST4: NextValue(burst_limit_m1, 3), - SIZ_BURST8: NextValue(burst_limit_m1, 7), - SIZ_BURST16: NextValue(burst_limit_m1, 15)}), - If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX), - NextValue(SBUS_3V3_ACKs_o, ACK_WORD), - NextValue(SBUS_3V3_ERRs_o, 1), - NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), - NextValue(SBUS_DATA_OE_LED_o, 1), - NextState("Slave_Ack_Read_Prom_Burst") - ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX), - NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet - NextValue(SBUS_3V3_ERRs_o, 1), - NextValue(p_data, 0xDEADBEEF), - NextValue(data_read_addr, (Cat(SBUS_3V3_PA_i[2:], Signal(4, reset=0)))), # enqueue all the request to the wishbone - NextValue(data_read_enable, 1), # enqueue all the request to the wishbone - NextValue(SBUS_DATA_OE_LED_2_o, 1), - NextState("Slave_Ack_Read_Reg_Burst_Wait_For_Data") - ).Else( - #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, Signal(2, reset = 1), SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), - NextValue(SBUS_3V3_ACKs_o, ACK_ERR), - NextValue(SBUS_3V3_ERRs_o, 1), - NextState("Slave_Error") - ) - ).Elif(((SBUS_3V3_SELs_i == 0) & - (SBUS_3V3_ASs_i == 0) & - (SIZ_BYTE == SBUS_3V3_SIZ_i) & - (SBUS_3V3_PPRD_i == 1)), - #NextValue(SBUS_DATA_OE_LED_o, 0), - #NextValue(SBUS_DATA_OE_LED_2_o, 1), - NextValue(sbus_oe_master_in, 1), - NextValue(sbus_last_pa, SBUS_3V3_PA_i), - If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX), - NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), - NextValue(SBUS_3V3_ERRs_o, 1), - NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), - NextState("Slave_Ack_Read_Prom_Byte") - ).Else( - #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, Signal(2, reset = 2), SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), - NextValue(SBUS_3V3_ACKs_o, ACK_ERR), - NextValue(SBUS_3V3_ERRs_o, 1), - NextState("Slave_Error") - ) - ).Elif(((SBUS_3V3_SELs_i == 0) & - (SBUS_3V3_ASs_i == 0) & - (siz_is_word(SBUS_3V3_SIZ_i)) & - (SBUS_3V3_PPRD_i == 0) & - (SBUS_3V3_PA_i[0:2] == 0) & - (self.wr_fifo.writable)), # maybe we should check for enough space? not that we'll encounter write burst... - #NextValue(SBUS_DATA_OE_LED_o, 1), - #NextValue(SBUS_DATA_OE_LED_2_o, 1), - NextValue(sbus_oe_master_in, 1), - NextValue(sbus_last_pa, SBUS_3V3_PA_i), - NextValue(burst_counter, 0), - Case(SBUS_3V3_SIZ_i, { - SIZ_WORD: NextValue(burst_limit_m1, 0), - SIZ_BURST2: NextValue(burst_limit_m1, 1), - SIZ_BURST4: NextValue(burst_limit_m1, 3), - SIZ_BURST8: NextValue(burst_limit_m1, 7), - SIZ_BURST16: NextValue(burst_limit_m1, 15)}), - If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX), - NextValue(SBUS_3V3_ACKs_o, ACK_WORD), - NextValue(SBUS_3V3_ERRs_o, 1), - NextState("Slave_Ack_Reg_Write_Burst") - ).Else( - #NextValue(self.led_display.value, Cat(SBUS_3V3_PA_i, Signal(2, reset = 3), SBUS_3V3_PA_i[1:2], SBUS_3V3_PPRD_i)), - NextValue(SBUS_3V3_ACKs_o, ACK_ERR), - NextValue(SBUS_3V3_ERRs_o, 1), - NextState("Slave_Error") - ) - ) - ) - # ##### READ ##### - slave_fsm.act("Slave_Ack_Read_Prom_Burst", - #NextValue(leds, 0x03), - NextValue(sbus_oe_data, 1), - NextValue(SBUS_3V3_D_o, p_data), - #NextValue(burst_index, index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])), - NextValue(p_data, prom[Cat(index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW])]), - If((burst_counter == burst_limit_m1), - NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), - NextState("Slave_Do_Read") - ).Else( - NextValue(SBUS_3V3_ACKs_o, ACK_WORD), - NextValue(burst_counter, burst_counter + 1) - ) - ) - slave_fsm.act("Slave_Ack_Read_Prom_Byte", - #NextValue(leds, 0x0c), - NextValue(sbus_oe_data, 1), - #NextValue(self.led_display.value, sbus_last_pa[0:2]), - If((sbus_last_pa[0:2] == 0x0), - NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[24:32])) - ).Elif((sbus_last_pa[0:2] == 0x1), - NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[16:24])) - ).Elif((sbus_last_pa[0:2] == 0x2), - NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[ 8:16])) - ).Elif((sbus_last_pa[0:2] == 0x3), - NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[ 0: 8])) - ), - NextState("Slave_Do_Read") - ) - slave_fsm.act("Slave_Do_Read", - #NextValue(leds, 0x30), - NextValue(sbus_oe_int1, 0), - NextValue(sbus_oe_int7, 0), - NextValue(sbus_oe_data, 0), - NextValue(sbus_oe_slave_in, 0), - NextValue(sbus_oe_master_in, 0), - NextValue(sbus_oe_master_br, 0), - If((SBUS_3V3_ASs_i == 1), - NextState("Idle") - ) - ) - slave_fsm.act("Slave_Ack_Read_Reg_Burst", - NextValue(sbus_oe_data, 1), - NextValue(SBUS_3V3_D_o, p_data), - If((burst_counter == burst_limit_m1), - NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), - NextState("Slave_Do_Read") - ).Else( - NextValue(burst_counter, burst_counter + 1), - If(rd_fifo_data.readable, - NextValue(p_data, rd_fifo_data.dout), - rd_fifo_data.re.eq(1), - NextValue(SBUS_3V3_ACKs_o, ACK_WORD) - ).Else( - NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), - NextState("Slave_Ack_Read_Reg_Burst_Wait_For_Data") - ) - ) - ) - slave_fsm.act("Slave_Ack_Read_Reg_Burst_Wait_For_Data", - If(rd_fifo_data.readable, - NextValue(p_data, rd_fifo_data.dout), - rd_fifo_data.re.eq(1), - NextValue(SBUS_3V3_ACKs_o, ACK_WORD), - NextState("Slave_Ack_Read_Reg_Burst") - ) - ) - # ##### WRITE ##### - slave_fsm.act("Slave_Ack_Reg_Write_Burst", - #NextValue(SBUS_DATA_OE_LED_o, 1), - #NextValue(SBUS_DATA_OE_LED_2_o, 1), - #NextValue(leds, 0x03), - #NextValue(burst_index, index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6])), - #NextValue(csr_data_w_addr, Cat(Signal(2, reset = 0), - # index_with_wrap(burst_counter, burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), - # sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], - # WISHBONE_CSR_ADDR_PFX)), - #NextValue(csr_data_w_data, SBUS_3V3_D_i), - #NextValue(csr_data_w_addr, 0x00040000), - #NextValue(csr_data_w_we, 1), - self.wr_fifo.din.eq(Cat(index_with_wrap(burst_counter, burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), # 4 bits, adr FIXME - sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], # 10 bits, adr - WISHBONE_CSR_ADDR_PFX, # 12 bits, adr - Signal(4, reset = 0), # 4 bits, adr (could be removed) - SBUS_3V3_D_i)), # 32 bits, data - self.wr_fifo.we.eq(1), - If((burst_counter == burst_limit_m1), - NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), - NextState("Slave_Ack_Reg_Write_Final") - ).Else( - NextValue(SBUS_3V3_ACKs_o, ACK_WORD), - NextValue(burst_counter, burst_counter + 1) - ) - ) - slave_fsm.act("Slave_Ack_Reg_Write_Final", - #NextValue(SBUS_DATA_OE_LED_o, 1), - #NextValue(SBUS_DATA_OE_LED_2_o, 0), - NextValue(sbus_oe_int1, 0), - NextValue(sbus_oe_int7, 0), - NextValue(sbus_oe_data, 0), - NextValue(sbus_oe_slave_in, 0), - NextValue(sbus_oe_master_in, 0), - NextValue(sbus_oe_master_br, 0), - If((SBUS_3V3_ASs_i == 1), - NextState("Idle") - ) - ) - # ##### ERROR ##### - slave_fsm.act("Slave_Error", - #NextValue(leds, 0xc0), - #NextValue(SBUS_DATA_OE_LED_o, 1), - #NextValue(SBUS_DATA_OE_LED_2_o, 1), - NextValue(sbus_oe_int1, 0), - NextValue(sbus_oe_int7, 0), - NextValue(sbus_oe_data, 0), - NextValue(sbus_oe_slave_in, 0), - NextValue(sbus_oe_master_in, 0), - NextValue(sbus_oe_master_br, 0), - If((SBUS_3V3_ASs_i == 1), - NextState("Idle") - ) - ) - - self.submodules.request_fsm = request_fsm = FSM(reset_state="Reset") - request_fsm.act("Reset", - NextState("Idle") - ) - request_fsm.act("Idle", - If(data_read_enable, - NextValue(data_read_enable, 0), - self.rd_fifo_addr.we.eq(1), - self.rd_fifo_addr.din.eq(data_read_addr), - If (burst_limit_m1 != burst_counter, # 0 the first time - NextValue(burst_counter, burst_counter + 1), - NextState("Queue") - ) - ) - ) - request_fsm.act("Queue", - self.rd_fifo_addr.we.eq(1), - self.rd_fifo_addr.din.eq(Cat(index_with_wrap(burst_counter, burst_limit_m1, data_read_addr[0:4]), data_read_addr[4:])), - If (burst_limit_m1 != burst_counter, - NextValue(burst_counter, burst_counter + 1), - ).Else( - NextState("Idle") - ) - ) From 4ad0a648e30f902837cffb075e847e90d3233a13 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Wed, 23 Jun 2021 03:48:25 -0400 Subject: [PATCH 21/78] swap endianness on flash prefix --- .../sbus_to_fpga_fsm.py | 127 ++++++++++++++---- 1 file changed, 101 insertions(+), 26 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index 66bde47..29bb688 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -256,7 +256,9 @@ class SBusFPGABus(Module): master_data = Signal(32) # could be merged with p_data master_addr = Signal(30) # could be meged with data_read_addr - master_we = Signal(); + master_we = Signal() + + sbus_wishbone_le = Signal() wishbone_master_timeout = Signal(6) wishbone_slave_timeout = Signal(6) @@ -357,6 +359,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextValue(SBUS_3V3_ERRs_o, 1), NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), + NextValue(sbus_wishbone_le, 0), #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 40))), NextState("Slave_Ack_Read_Prom_Burst") ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | @@ -364,6 +367,7 @@ class SBusFPGABus(Module): (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), If(self.wishbone_master.cyc == 0, NextValue(self.wishbone_master.cyc, 1), NextValue(self.wishbone_master.stb, 1), @@ -393,12 +397,14 @@ class SBusFPGABus(Module): If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX), NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(sbus_wishbone_le, 0), NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 80))), NextState("Slave_Ack_Read_Prom_Byte") ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), If(self.wishbone_master.cyc == 0, NextValue(self.wishbone_master.cyc, 1), NextValue(self.wishbone_master.stb, 1), @@ -428,6 +434,7 @@ class SBusFPGABus(Module): If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), If(self.wishbone_master.cyc == 0, NextValue(self.wishbone_master.cyc, 1), NextValue(self.wishbone_master.stb, 1), @@ -465,6 +472,7 @@ class SBusFPGABus(Module): If(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), + NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), If(~self.wishbone_master.cyc, NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextValue(SBUS_3V3_ERRs_o, 1), @@ -489,6 +497,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), + NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), If(~self.wishbone_master.cyc, NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), NextValue(SBUS_3V3_ERRs_o, 1), @@ -513,6 +522,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), + NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), If(~self.wishbone_master.cyc, NextValue(SBUS_3V3_ACKs_o, ACK_HWORD), NextValue(SBUS_3V3_ERRs_o, 1), @@ -545,6 +555,7 @@ class SBusFPGABus(Module): ~self.wishbone_slave.ack & ~self.wishbone_slave.err & self.wishbone_slave.we, + NextValue(sbus_wishbone_le, 1), # checkme NextValue(SBUS_3V3_BRs_o, 1), # relinquish the request NextValue(sbus_oe_data, 1), ## output data (at least for @ during translation) NextValue(sbus_oe_slave_in, 1), ## PPRD, SIZ becomes output @@ -552,7 +563,10 @@ class SBusFPGABus(Module): NextValue(burst_counter, 0), NextValue(burst_limit_m1, 0), ## only single word for now NextValue(master_addr, self.wishbone_slave.adr), - NextValue(master_data, self.wishbone_slave.dat_w), + NextValue(master_data, Cat(self.wishbone_slave.dat_w[24:32], ## LE + self.wishbone_slave.dat_w[16:24], + self.wishbone_slave.dat_w[ 8:16], + self.wishbone_slave.dat_w[ 0: 8])), NextValue(self.wishbone_slave.ack, 1), NextValue(wishbone_slave_timeout, wishbone_default_timeout), NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), @@ -568,6 +582,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_BRs_o, 0) ).Elif(~SBUS_3V3_BGs_i & self.master_read_buffer_start, + NextValue(sbus_wishbone_le, 1), # checkme NextValue(SBUS_3V3_BRs_o, 1), # relinquish the request NextValue(sbus_oe_data, 1), ## output data (at least for @ during translation) NextValue(sbus_oe_slave_in, 1), ## PPRD, SIZ becomes output @@ -679,7 +694,13 @@ class SBusFPGABus(Module): slave_fsm.act("Slave_Ack_Read_Reg_Burst_Wait_For_Data", #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x06), self.led_display.value[8:40])), If(self.wishbone_master.ack, - NextValue(p_data, self.wishbone_master.dat_r), + Case(sbus_wishbone_le, { + 0: NextValue(p_data,self.wishbone_master.dat_r), + 1: NextValue(p_data, Cat(self.wishbone_master.dat_r[24:32], + self.wishbone_master.dat_r[16:24], + self.wishbone_master.dat_r[ 8:16], + self.wishbone_master.dat_r[ 0: 8])) + }), NextValue(self.wishbone_master.cyc, 0), NextValue(self.wishbone_master.stb, 0), NextValue(wishbone_master_timeout, 0), @@ -721,9 +742,21 @@ class SBusFPGABus(Module): slave_fsm.act("Slave_Ack_Read_Reg_HWord_Wait_For_Data", #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x06), self.led_display.value[8:40])), If(self.wishbone_master.ack, - Case(sbus_last_pa[ADDR_PHYS_LOW+1:ADDR_PHYS_LOW+2], { - 0: NextValue(p_data, Cat(Signal(16, reset = 0), self.wishbone_master.dat_r[16:32])), - 1: NextValue(p_data, Cat(Signal(16, reset = 0), self.wishbone_master.dat_r[ 0:16])), + Case(sbus_wishbone_le, { + 0: Case(sbus_last_pa[ADDR_PHYS_LOW+1:ADDR_PHYS_LOW+2], { + 0: NextValue(p_data, Cat(Signal(16, reset = 0), + self.wishbone_master.dat_r[16:32])), + 1: NextValue(p_data, Cat(Signal(16, reset = 0), + self.wishbone_master.dat_r[ 0:16])), + }), + 1: Case(sbus_last_pa[ADDR_PHYS_LOW+1:ADDR_PHYS_LOW+2], { + 1: NextValue(p_data, Cat(Signal(16, reset = 0), + self.wishbone_master.dat_r[24:32], + self.wishbone_master.dat_r[16:24])), + 0: NextValue(p_data, Cat(Signal(16, reset = 0), + self.wishbone_master.dat_r[ 8:16], + self.wishbone_master.dat_r[ 0: 8])), + }) }), NextValue(self.wishbone_master.cyc, 0), NextValue(self.wishbone_master.stb, 0), @@ -766,11 +799,19 @@ class SBusFPGABus(Module): slave_fsm.act("Slave_Ack_Read_Reg_Byte_Wait_For_Data", #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x06), self.led_display.value[8:40])), If(self.wishbone_master.ack, - Case(sbus_last_pa[ADDR_PHYS_LOW:ADDR_PHYS_LOW+2], { - 0: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[24:32])), - 1: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[16:24])), - 2: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[ 8:16])), - 3: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[ 0: 8])), + Case(sbus_wishbone_le, { + 0: Case(sbus_last_pa[ADDR_PHYS_LOW:ADDR_PHYS_LOW+2], { + 0: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[24:32])), + 1: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[16:24])), + 2: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[ 8:16])), + 3: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[ 0: 8])), + }), + 1: Case(sbus_last_pa[ADDR_PHYS_LOW:ADDR_PHYS_LOW+2], { + 3: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[24:32])), + 2: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[16:24])), + 1: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[ 8:16])), + 0: NextValue(p_data, Cat(Signal(24, reset = 0), self.wishbone_master.dat_r[ 0: 8])), + }) }), NextValue(self.wishbone_master.cyc, 0), NextValue(self.wishbone_master.stb, 0), @@ -813,7 +854,13 @@ class SBusFPGABus(Module): sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], # 10 bits, adr sbus_last_pa[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH], # 12 bits, adr Signal(4, reset = 0))), - NextValue(self.wishbone_master.dat_w, SBUS_3V3_D_i), + Case(sbus_wishbone_le, { + 0: NextValue(self.wishbone_master.dat_w, Cat(SBUS_3V3_D_i)), + 1: NextValue(self.wishbone_master.dat_w, Cat(SBUS_3V3_D_i[24:32], + SBUS_3V3_D_i[16:24], + SBUS_3V3_D_i[ 8:16], + SBUS_3V3_D_i[ 0: 8])) + }), NextValue(self.wishbone_master.we, 1), NextValue(wishbone_master_timeout, wishbone_default_timeout), If((burst_counter == burst_limit_m1), @@ -847,16 +894,28 @@ class SBusFPGABus(Module): slave_fsm.act("Slave_Ack_Reg_Write_HWord", NextValue(self.wishbone_master.cyc, 1), NextValue(self.wishbone_master.stb, 1), - Case(sbus_last_pa[ADDR_PHYS_LOW+1:ADDR_PHYS_LOW+2], { - 0: NextValue(self.wishbone_master.sel, 0xc), - 1: NextValue(self.wishbone_master.sel, 0x3), + Case(sbus_wishbone_le, { + 0: Case(sbus_last_pa[ADDR_PHYS_LOW+1:ADDR_PHYS_LOW+2], { + 0: NextValue(self.wishbone_master.sel, 0xc), + 1: NextValue(self.wishbone_master.sel, 0x3), + }), + 1: Case(sbus_last_pa[ADDR_PHYS_LOW+1:ADDR_PHYS_LOW+2], { + 1: NextValue(self.wishbone_master.sel, 0xc), + 0: NextValue(self.wishbone_master.sel, 0x3), + }), }), NextValue(self.wishbone_master.adr, Cat(sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6], # 4 bits, adr FIXME sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], # 10 bits, adr sbus_last_pa[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH], # 12 bits, adr Signal(4, reset = 0))), - NextValue(self.wishbone_master.dat_w, - Cat(SBUS_3V3_D_i[16:32], SBUS_3V3_D_i[16:32])), + Case(sbus_wishbone_le, { + 0: NextValue(self.wishbone_master.dat_w, Cat(SBUS_3V3_D_i[16:32], + SBUS_3V3_D_i[16:32])), + 1: NextValue(self.wishbone_master.dat_w, Cat(SBUS_3V3_D_i[24:32], + SBUS_3V3_D_i[16:24], + SBUS_3V3_D_i[24:32], + SBUS_3V3_D_i[16:24])), + }), NextValue(self.wishbone_master.we, 1), NextValue(wishbone_master_timeout, wishbone_default_timeout), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), @@ -875,18 +934,28 @@ class SBusFPGABus(Module): slave_fsm.act("Slave_Ack_Reg_Write_Byte", NextValue(self.wishbone_master.cyc, 1), NextValue(self.wishbone_master.stb, 1), - Case(sbus_last_pa[ADDR_PHYS_LOW:ADDR_PHYS_LOW+2], { - 0: NextValue(self.wishbone_master.sel, 0x8), - 1: NextValue(self.wishbone_master.sel, 0x4), - 2: NextValue(self.wishbone_master.sel, 0x2), - 3: NextValue(self.wishbone_master.sel, 0x1), + Case(sbus_wishbone_le, { + 0: Case(sbus_last_pa[ADDR_PHYS_LOW:ADDR_PHYS_LOW+2], { + 0: NextValue(self.wishbone_master.sel, 0x8), + 1: NextValue(self.wishbone_master.sel, 0x4), + 2: NextValue(self.wishbone_master.sel, 0x2), + 3: NextValue(self.wishbone_master.sel, 0x1), + }), + 1: Case(sbus_last_pa[ADDR_PHYS_LOW:ADDR_PHYS_LOW+2], { + 3: NextValue(self.wishbone_master.sel, 0x8), + 2: NextValue(self.wishbone_master.sel, 0x4), + 1: NextValue(self.wishbone_master.sel, 0x2), + 0: NextValue(self.wishbone_master.sel, 0x1), + }), }), NextValue(self.wishbone_master.adr, Cat(sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6], # 4 bits, adr FIXME sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], # 10 bits, adr sbus_last_pa[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH], # 12 bits, adr Signal(4, reset = 0))), - NextValue(self.wishbone_master.dat_w, - Cat(SBUS_3V3_D_i[24:32], SBUS_3V3_D_i[24:32], SBUS_3V3_D_i[24:32], SBUS_3V3_D_i[24:32])), + NextValue(self.wishbone_master.dat_w, Cat(SBUS_3V3_D_i[24:32], # LE/BE identical + SBUS_3V3_D_i[24:32], + SBUS_3V3_D_i[24:32], + SBUS_3V3_D_i[24:32])), NextValue(self.wishbone_master.we, 1), NextValue(wishbone_master_timeout, wishbone_default_timeout), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), @@ -1139,7 +1208,10 @@ class SBusFPGABus(Module): (~self.master_read_buffer_read[self.wishbone_slave.adr[0:2]]), ## use cache NextValue(self.wishbone_slave.ack, 1), - NextValue(self.wishbone_slave.dat_r, self.master_read_buffer_data[self.wishbone_slave.adr[0:2]]), + NextValue(self.wishbone_slave.dat_r, Cat(self.master_read_buffer_data[self.wishbone_slave.adr[0:2]][24:32], # LE + self.master_read_buffer_data[self.wishbone_slave.adr[0:2]][16:24], + self.master_read_buffer_data[self.wishbone_slave.adr[0:2]][ 8:16], + self.master_read_buffer_data[self.wishbone_slave.adr[0:2]][ 0: 8])), NextValue(self.master_read_buffer_read[self.wishbone_slave.adr[0:2]], 1), NextValue(wishbone_slave_timeout, wishbone_default_timeout) ).Elif(~self.master_read_buffer_start, @@ -1165,7 +1237,10 @@ class SBusFPGABus(Module): #led2.eq(1), If(self.master_read_buffer_done[last_word_idx], NextValue(self.wishbone_slave.ack, 1), - NextValue(self.wishbone_slave.dat_r, self.master_read_buffer_data[last_word_idx]), + NextValue(self.wishbone_slave.dat_r, Cat(self.master_read_buffer_data[last_word_idx][24:32], # LE + self.master_read_buffer_data[last_word_idx][16:24], + self.master_read_buffer_data[last_word_idx][ 8:16], + self.master_read_buffer_data[last_word_idx][ 0: 8])), NextValue(self.master_read_buffer_read[last_word_idx], 1), NextValue(wishbone_slave_timeout, wishbone_default_timeout), NextState("Idle") From d3aa008b8b12464967845bcd68077e04dde7a720 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Wed, 23 Jun 2021 07:56:39 -0400 Subject: [PATCH 22/78] cleanups --- .../sbus_to_fpga_fsm.py | 166 +++++++++--------- .../sbus_to_fpga_soc.py | 2 +- 2 files changed, 82 insertions(+), 86 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index 29bb688..fbaf164 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -280,8 +280,14 @@ class SBusFPGABus(Module): #led4 = platform.request("user_led", 4) self.sync += platform.request("user_led", 0).eq(self.wishbone_master.cyc) - self.sync += platform.request("user_led", 1).eq(~SBUS_3V3_SELs_i) - + self.sync += platform.request("user_led", 1).eq(self.wishbone_master.stb) + self.sync += platform.request("user_led", 2).eq(self.wishbone_master.we) + self.sync += platform.request("user_led", 3).eq(self.wishbone_master.ack) + self.sync += platform.request("user_led", 4).eq(~SBUS_3V3_SELs_i) + self.sync += platform.request("user_led", 5).eq(~SBUS_3V3_ASs_i) + self.sync += platform.request("user_led", 6).eq(wishbone_master_timeout == 0) + led7 = platform.request("user_led", 7) + #self.sync += platform.request("user_led", 5).eq(self.wishbone_slave.cyc) #self.sync += platform.request("user_led", 6).eq(~SBUS_3V3_BRs_o) #self.sync += platform.request("user_led", 7).eq(~SBUS_3V3_BGs_i) @@ -331,21 +337,10 @@ class SBusFPGABus(Module): If((self.hold_reset == 0), NextState("Idle")) ) slave_fsm.act("Idle", - #NextValue(self.led_display.value, 0x0000000010 | self.led_display.value), -# If(((SBUS_3V3_SELs_i == 0) & -# (SBUS_3V3_ASs_i == 0) & -# self.wishbone_master.cyc), ## refuse access until we've cleaned up the mess -# NextValue(self.led_display.value, 0x00000010 | 0x00000001), -# NextValue(sbus_oe_master_in, 1), -# NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), -# NextValue(SBUS_3V3_ERRs_o, 1), -# NextState("Slave_Error") -# ).Eli If(((SBUS_3V3_SELs_i == 0) & (SBUS_3V3_ASs_i == 0) & (siz_is_word(SBUS_3V3_SIZ_i)) & - (SBUS_3V3_PPRD_i == 1) & - (SBUS_3V3_PA_i[0:2] == 0)), + (SBUS_3V3_PPRD_i == 1)), NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), NextValue(burst_counter, 0), @@ -355,7 +350,11 @@ class SBusFPGABus(Module): SIZ_BURST4: NextValue(burst_limit_m1, 3), SIZ_BURST8: NextValue(burst_limit_m1, 7), SIZ_BURST16: NextValue(burst_limit_m1, 15)}), - If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX), + If(SBUS_3V3_PA_i[0:2] != 0, + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Error") + ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX), NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextValue(SBUS_3V3_ERRs_o, 1), NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), @@ -431,7 +430,11 @@ class SBusFPGABus(Module): (SBUS_3V3_PPRD_i == 1)), NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), - If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), + If(SBUS_3V3_PA_i[0:1] != 0, + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Error") + ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), @@ -458,38 +461,42 @@ class SBusFPGABus(Module): ).Elif(((SBUS_3V3_SELs_i == 0) & (SBUS_3V3_ASs_i == 0) & (siz_is_word(SBUS_3V3_SIZ_i)) & - (SBUS_3V3_PPRD_i == 0) & - (SBUS_3V3_PA_i[0:2] == 0)), - NextValue(sbus_oe_master_in, 1), - NextValue(sbus_last_pa, SBUS_3V3_PA_i), - NextValue(burst_counter, 0), - Case(SBUS_3V3_SIZ_i, { - SIZ_WORD: NextValue(burst_limit_m1, 0), - SIZ_BURST2: NextValue(burst_limit_m1, 1), - SIZ_BURST4: NextValue(burst_limit_m1, 3), - SIZ_BURST8: NextValue(burst_limit_m1, 7), - SIZ_BURST16: NextValue(burst_limit_m1, 15)}), - If(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX) | - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), - NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), - If(~self.wishbone_master.cyc, - NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + (SBUS_3V3_PPRD_i == 0)), + NextValue(sbus_oe_master_in, 1), + NextValue(sbus_last_pa, SBUS_3V3_PA_i), + NextValue(burst_counter, 0), + Case(SBUS_3V3_SIZ_i, { + SIZ_WORD: NextValue(burst_limit_m1, 0), + SIZ_BURST2: NextValue(burst_limit_m1, 1), + SIZ_BURST4: NextValue(burst_limit_m1, 3), + SIZ_BURST8: NextValue(burst_limit_m1, 7), + SIZ_BURST16: NextValue(burst_limit_m1, 15) + }), + If(SBUS_3V3_PA_i[0:2] != 0, + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), - #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), - NextState("Slave_Ack_Reg_Write_Burst") - ).Else( - NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), - NextValue(SBUS_3V3_ERRs_o, 1), - NextValue(sbus_slave_timeout, sbus_default_timeout), - NextState("Slave_Ack_Reg_Write_Burst_Wait_For_Wishbone") - ) - ).Else( - #NextValue(self.led_display.value, 0x0000000060 | 0x0000000001), - NextValue(SBUS_3V3_ACKs_o, ACK_ERR), - NextValue(SBUS_3V3_ERRs_o, 1), - NextState("Slave_Error") - ) + NextState("Slave_Error") + ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX) | + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), + NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), + If(~self.wishbone_master.cyc, + NextValue(SBUS_3V3_ACKs_o, ACK_WORD), + NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), + NextState("Slave_Ack_Reg_Write_Burst") + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(sbus_slave_timeout, sbus_default_timeout), + NextState("Slave_Ack_Reg_Write_Burst_Wait_For_Wishbone") + ) + ).Else( + #NextValue(self.led_display.value, 0x0000000060 | 0x0000000001), + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Error") + ) ).Elif(((SBUS_3V3_SELs_i == 0) & (SBUS_3V3_ASs_i == 0) & (SIZ_BYTE == SBUS_3V3_SIZ_i) & @@ -519,27 +526,31 @@ class SBusFPGABus(Module): (SBUS_3V3_ASs_i == 0) & (SIZ_HWORD == SBUS_3V3_SIZ_i) & (SBUS_3V3_PPRD_i == 0)), - NextValue(sbus_oe_master_in, 1), - NextValue(sbus_last_pa, SBUS_3V3_PA_i), - If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), - NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), - If(~self.wishbone_master.cyc, - NextValue(SBUS_3V3_ACKs_o, ACK_HWORD), + NextValue(sbus_oe_master_in, 1), + NextValue(sbus_last_pa, SBUS_3V3_PA_i), + If(SBUS_3V3_PA_i[0:1] != 0, + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), - #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), - NextState("Slave_Ack_Reg_Write_HWord") - ).Else( - NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), - NextValue(SBUS_3V3_ERRs_o, 1), - NextValue(sbus_slave_timeout, sbus_default_timeout), - NextState("Slave_Ack_Reg_Write_HWord_Wait_For_Wishbone") - ) - ).Else( - #NextValue(self.led_display.value, 0x0000000060 | 0x0000000001), - NextValue(SBUS_3V3_ACKs_o, ACK_ERR), - NextValue(SBUS_3V3_ERRs_o, 1), - NextState("Slave_Error") - ) + NextState("Slave_Error") + ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), + NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), + If(~self.wishbone_master.cyc, + NextValue(SBUS_3V3_ACKs_o, ACK_HWORD), + NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), + NextState("Slave_Ack_Reg_Write_HWord") + ).Else( + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextValue(SBUS_3V3_ERRs_o, 1), + NextValue(sbus_slave_timeout, sbus_default_timeout), + NextState("Slave_Ack_Reg_Write_HWord_Wait_For_Wishbone") + ) + ).Else( + #NextValue(self.led_display.value, 0x0000000060 | 0x0000000001), + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + NextState("Slave_Error") + ) ).Elif(SBUS_3V3_BGs_i & self.wishbone_slave.cyc & self.wishbone_slave.stb & @@ -596,33 +607,18 @@ class SBusFPGABus(Module): #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0x00), self.wishbone_slave.adr)), NextState("Master_Translation") ).Elif(((SBUS_3V3_SELs_i == 0) & - (SBUS_3V3_ASs_i == 0) & - ((SIZ_HWORD == SBUS_3V3_SIZ_i) | (SIZ_BYTE == SBUS_3V3_SIZ_i))), - NextValue(sbus_oe_master_in, 1), - NextValue(sbus_last_pa, SBUS_3V3_PA_i), - #NextValue(self.led_display.value, 0x00000000a0 | SBUS_3V3_PPRD_i | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), - NextValue(SBUS_3V3_ACKs_o, ACK_ERR), - NextValue(SBUS_3V3_ERRs_o, 1), - NextState("Slave_Error") - ).Elif(((SBUS_3V3_SELs_i == 0) & - (SBUS_3V3_ASs_i == 0) & - (~self.wishbone_master.cyc)), + (SBUS_3V3_ASs_i == 0)), NextValue(sbus_oe_master_in, 1), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(self.led_display.value, 0x000000000F | Cat(Signal(8, reset = 0x00), SBUS_3V3_PA_i, SBUS_3V3_SIZ_i, SBUS_3V3_PPRD_i)), NextState("Slave_Error") ).Elif(((SBUS_3V3_SELs_i == 0) & - (SBUS_3V3_ASs_i == 0) & - (self.wishbone_master.cyc)), ## we need to answer, set ACK_RERUN + (SBUS_3V3_ASs_i == 0)), ## we need to answer, set ACK_ERR NextValue(sbus_oe_master_in, 1), - NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), #NextValue(self.led_display.value, 0x00000000C0 | Cat(self.wishbone_master.cyc, self.wishbone_master.stb, self.wishbone_master.we, self.wishbone_master.ack, Signal(4, reset = 0x00), SBUS_3V3_PA_i, SBUS_3V3_SIZ_i, SBUS_3V3_PPRD_i)), NextState("Slave_Error") - ).Elif(((SBUS_3V3_SELs_i != 0) & - (SBUS_3V3_ASs_i != 0) & - (self.wishbone_master.cyc)), - NextValue(sbus_oe_master_in, 0), ).Elif(~SBUS_3V3_BGs_i, ### ouch we got the bus but nothing more to do ?!? NextValue(SBUS_3V3_BRs_o, 1), diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 495f0fe..39ef3b9 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -224,7 +224,7 @@ class SBusFPGA(SoCCore): ##self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_to_wishbone.wishbone) ##self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_to_sbus.wishbone, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_bus.wishbone_master) - self.bus.add_slave(name="usb_fake_dma", slave=self.sbus_bus.wishbone_slave, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) + #self.bus.add_slave(name="usb_fake_dma", slave=self.sbus_bus.wishbone_slave, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) # self.soc = Module() # self.soc.mem_regions = self.mem_regions = {} From 6459b79ed1204e972e7e984db29d0091f520cd45 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Thu, 24 Jun 2021 05:00:36 -0400 Subject: [PATCH 23/78] bring back cross-domain using a wishbone adapter --- .../sbus_to_fpga_fsm.py | 103 +++++++++++++----- .../sbus_to_fpga_soc.py | 72 +++++++----- 2 files changed, 117 insertions(+), 58 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index fbaf164..dfc698c 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -30,8 +30,8 @@ WISHBONE_CSR_ADDR_PFX = Signal(12, reset = 4) USBOHCI_ADDR_PFX = Signal(12, reset = 8) SRAM_ADDR_PFX = Signal(12, reset = 9) -wishbone_default_timeout = 63 -sbus_default_timeout = 63 +wishbone_default_timeout = 120 ## must be > sbus_default_timeout +sbus_default_timeout = 100 ## must be below 127 as we can wait twice on it inside the 255 cycles def siz_is_word(siz): return (SIZ_WORD == siz) | (SIZ_BURST2 == siz) | (SIZ_BURST4 == siz) | (SIZ_BURST8 == siz) | (SIZ_BURST16 == siz) @@ -157,6 +157,14 @@ class LedDisplay(Module): NextValue(time_counter, time_counter - 1) ) ) + +LED_PARITY=0x11 +LED_ADDRESS=0x12 +LED_UNKNOWNREQ=0x14 +LED_RERUN=0x8 +LED_RERUN_WRITE=0x4 +LED_RERUN_WORD=0x2 +LED_RERUN_LATE=0x1 class SBusFPGABus(Module): def __init__(self, platform, prom, hold_reset, wishbone_slave, wishbone_master): @@ -166,9 +174,9 @@ class SBusFPGABus(Module): self.wishbone_slave = wishbone_slave self.wishbone_master = wishbone_master - #pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") - #SBUS_DATA_OE_LED_o = Signal() - #self.comb += pad_SBUS_DATA_OE_LED.eq(SBUS_DATA_OE_LED_o) + pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") + SBUS_DATA_OE_LED_o = Signal() + self.comb += pad_SBUS_DATA_OE_LED.eq(SBUS_DATA_OE_LED_o) ##pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") ##SBUS_DATA_OE_LED_2_o = Signal() ##self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) @@ -266,27 +274,35 @@ class SBusFPGABus(Module): sbus_master_throttle = Signal(4) - #self.submodules.led_display = LedDisplay(platform.request_all("user_led")) + self.submodules.led_display = LedDisplay(platform.request_all("user_led")) #self.sync += platform.request("user_led", 0).eq(self.wishbone_slave.cyc) #self.sync += platform.request("user_led", 1).eq(self.wishbone_slave.stb) #self.sync += platform.request("user_led", 2).eq(self.wishbone_slave.we) #self.sync += platform.request("user_led", 3).eq(self.wishbone_slave.ack) #self.sync += platform.request("user_led", 4).eq(self.wishbone_slave.err) - #led1 = platform.request("user_led", 0) + #led0 = platform.request("user_led", 0) #led1 = platform.request("user_led", 1) #led2 = platform.request("user_led", 2) #led3 = platform.request("user_led", 3) #led4 = platform.request("user_led", 4) - self.sync += platform.request("user_led", 0).eq(self.wishbone_master.cyc) - self.sync += platform.request("user_led", 1).eq(self.wishbone_master.stb) - self.sync += platform.request("user_led", 2).eq(self.wishbone_master.we) - self.sync += platform.request("user_led", 3).eq(self.wishbone_master.ack) - self.sync += platform.request("user_led", 4).eq(~SBUS_3V3_SELs_i) - self.sync += platform.request("user_led", 5).eq(~SBUS_3V3_ASs_i) - self.sync += platform.request("user_led", 6).eq(wishbone_master_timeout == 0) - led7 = platform.request("user_led", 7) + #led0123 = Signal(4) + #self.sync += platform.request("user_led", 0).eq(led0123[0]) + #self.sync += platform.request("user_led", 1).eq(led0123[1]) + #self.sync += platform.request("user_led", 2).eq(led0123[2]) + #self.sync += platform.request("user_led", 3).eq(led0123[3]) + + #self.sync += platform.request("user_led", 0).eq(self.wishbone_master.cyc) + #self.sync += platform.request("user_led", 1).eq(self.wishbone_master.stb) + #self.sync += platform.request("user_led", 2).eq(self.wishbone_master.we) + #self.sync += platform.request("user_led", 3).eq(self.wishbone_master.ack) + #self.sync += platform.request("user_led", 4).eq(~SBUS_3V3_SELs_i) + + #self.sync += platform.request("user_led", 4).eq(self.wishbone_master.cyc) + #self.sync += platform.request("user_led", 5).eq(~SBUS_3V3_ASs_i) + #self.sync += platform.request("user_led", 6).eq(wishbone_master_timeout == 0) + #led7 = platform.request("user_led", 7) #self.sync += platform.request("user_led", 5).eq(self.wishbone_slave.cyc) #self.sync += platform.request("user_led", 6).eq(~SBUS_3V3_BRs_o) @@ -337,6 +353,7 @@ class SBusFPGABus(Module): If((self.hold_reset == 0), NextState("Idle")) ) slave_fsm.act("Idle", + SBUS_DATA_OE_LED_o.eq(1), If(((SBUS_3V3_SELs_i == 0) & (SBUS_3V3_ASs_i == 0) & (siz_is_word(SBUS_3V3_SIZ_i)) & @@ -353,6 +370,7 @@ class SBusFPGABus(Module): If(SBUS_3V3_PA_i[0:2] != 0, NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(led0123, led0123 | LED_PARITY), NextState("Slave_Error") ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX), NextValue(SBUS_3V3_ACKs_o, ACK_WORD), @@ -385,6 +403,7 @@ class SBusFPGABus(Module): #NextValue(self.led_display.value, 0x0000000020 | 0x0000000001), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(led0123, led0123 | LED_ADDRESS), NextState("Slave_Error") ) ).Elif(((SBUS_3V3_SELs_i == 0) & @@ -422,6 +441,7 @@ class SBusFPGABus(Module): #NextValue(self.led_display.value, 0x0000000040 | 0x0000000001), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(led0123, led0123 | LED_ADDRESS), NextState("Slave_Error") ) ).Elif(((SBUS_3V3_SELs_i == 0) & @@ -433,6 +453,7 @@ class SBusFPGABus(Module): If(SBUS_3V3_PA_i[0:1] != 0, NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(led0123, led0123 | LED_PARITY), NextState("Slave_Error") ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet @@ -456,6 +477,7 @@ class SBusFPGABus(Module): #NextValue(self.led_display.value, 0x0000000040 | 0x0000000001), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(led0123, led0123 | LED_ADDRESS), NextState("Slave_Error") ) ).Elif(((SBUS_3V3_SELs_i == 0) & @@ -475,6 +497,7 @@ class SBusFPGABus(Module): If(SBUS_3V3_PA_i[0:2] != 0, NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(led0123, led0123 | LED_PARITY), NextState("Slave_Error") ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX) | @@ -495,6 +518,7 @@ class SBusFPGABus(Module): #NextValue(self.led_display.value, 0x0000000060 | 0x0000000001), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(led0123, led0123 | LED_ADDRESS), NextState("Slave_Error") ) ).Elif(((SBUS_3V3_SELs_i == 0) & @@ -520,6 +544,7 @@ class SBusFPGABus(Module): #NextValue(self.led_display.value, 0x0000000060 | 0x0000000001), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(led0123, led0123 | LED_ADDRESS), NextState("Slave_Error") ) ).Elif(((SBUS_3V3_SELs_i == 0) & @@ -531,6 +556,7 @@ class SBusFPGABus(Module): If(SBUS_3V3_PA_i[0:1] != 0, NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(led0123, led0123 | LED_PARITY), NextState("Slave_Error") ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), @@ -549,6 +575,7 @@ class SBusFPGABus(Module): #NextValue(self.led_display.value, 0x0000000060 | 0x0000000001), NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(led0123, led0123 | LED_ADDRESS), NextState("Slave_Error") ) ).Elif(SBUS_3V3_BGs_i & @@ -612,12 +639,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(self.led_display.value, 0x000000000F | Cat(Signal(8, reset = 0x00), SBUS_3V3_PA_i, SBUS_3V3_SIZ_i, SBUS_3V3_PPRD_i)), - NextState("Slave_Error") - ).Elif(((SBUS_3V3_SELs_i == 0) & - (SBUS_3V3_ASs_i == 0)), ## we need to answer, set ACK_ERR - NextValue(sbus_oe_master_in, 1), - NextValue(SBUS_3V3_ACKs_o, ACK_ERR), - #NextValue(self.led_display.value, 0x00000000C0 | Cat(self.wishbone_master.cyc, self.wishbone_master.stb, self.wishbone_master.we, self.wishbone_master.ack, Signal(4, reset = 0x00), SBUS_3V3_PA_i, SBUS_3V3_SIZ_i, SBUS_3V3_PPRD_i)), + #NextValue(led0123, led0123 | LED_UNKNOWNREQ), NextState("Slave_Error") ).Elif(~SBUS_3V3_BGs_i, ### ouch we got the bus but nothing more to do ?!? @@ -653,6 +675,7 @@ class SBusFPGABus(Module): ).Elif((sbus_last_pa[0:2] == 0x3), NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[ 0: 8])) ), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), NextState("Slave_Do_Read") ) slave_fsm.act("Slave_Do_Read", @@ -660,7 +683,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), - If((SBUS_3V3_ASs_i == 1), + If(((SBUS_3V3_ASs_i == 1) | ((SBUS_3V3_ASs_i == 0) & (SBUS_3V3_SELs_i == 1))), NextState("Idle") ) ) @@ -700,13 +723,15 @@ class SBusFPGABus(Module): NextValue(self.wishbone_master.cyc, 0), NextValue(self.wishbone_master.stb, 0), NextValue(wishbone_master_timeout, 0), + NextValue(sbus_slave_timeout, 0), NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextState("Slave_Ack_Read_Reg_Burst") ).Elif(sbus_slave_timeout == 0, ### this is taking too long NextValue(self.wishbone_master.cyc, 0), ## abort transaction NextValue(self.wishbone_master.stb, 0), NextValue(wishbone_master_timeout, 0), - NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + #NextValue(led0123, LED_RERUN | LED_RERUN_WORD | LED_RERUN_LATE), NextState("Slave_Error") ) ) @@ -724,6 +749,7 @@ class SBusFPGABus(Module): NextState("Slave_Ack_Read_Reg_Burst_Wait_For_Data") ).Elif(sbus_slave_timeout == 0, ### this is taking too long NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + #NextValue(led0123, LED_RERUN | LED_RERUN_WORD), NextState("Slave_Error") ) ) @@ -757,6 +783,7 @@ class SBusFPGABus(Module): NextValue(self.wishbone_master.cyc, 0), NextValue(self.wishbone_master.stb, 0), NextValue(wishbone_master_timeout, 0), + NextValue(sbus_slave_timeout, 0), NextValue(SBUS_3V3_ACKs_o, ACK_HWORD), NextState("Slave_Ack_Read_Reg_HWord") ).Elif(sbus_slave_timeout == 0, ### this is taking too long @@ -764,6 +791,7 @@ class SBusFPGABus(Module): NextValue(self.wishbone_master.stb, 0), NextValue(wishbone_master_timeout, 0), NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + #NextValue(led0123, LED_RERUN | LED_RERUN_LATE), NextState("Slave_Error") ) ) @@ -781,6 +809,7 @@ class SBusFPGABus(Module): NextState("Slave_Ack_Read_Reg_HWord_Wait_For_Data") ).Elif(sbus_slave_timeout == 0, ### this is taking too long NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + #NextValue(led0123, LED_RERUN), NextState("Slave_Error") ) ) @@ -812,6 +841,7 @@ class SBusFPGABus(Module): NextValue(self.wishbone_master.cyc, 0), NextValue(self.wishbone_master.stb, 0), NextValue(wishbone_master_timeout, 0), + NextValue(sbus_slave_timeout, 0), NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), NextState("Slave_Ack_Read_Reg_Byte") ).Elif(sbus_slave_timeout == 0, ### this is taking too long @@ -819,6 +849,7 @@ class SBusFPGABus(Module): NextValue(self.wishbone_master.stb, 0), NextValue(wishbone_master_timeout, 0), NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + #NextValue(led0123, LED_RERUN | LED_RERUN_LATE), NextState("Slave_Error") ) ) @@ -836,6 +867,7 @@ class SBusFPGABus(Module): NextState("Slave_Ack_Read_Reg_Byte_Wait_For_Data") ).Elif(sbus_slave_timeout == 0, ### this is taking too long NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + #NextValue(led0123, LED_RERUN), NextState("Slave_Error") ) ) @@ -872,17 +904,20 @@ class SBusFPGABus(Module): NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), - If((SBUS_3V3_ASs_i == 1), + If(((SBUS_3V3_ASs_i == 1) | ((SBUS_3V3_ASs_i == 0) & (SBUS_3V3_SELs_i == 1))), NextState("Idle") ) ) slave_fsm.act("Slave_Ack_Reg_Write_Burst_Wait_For_Wishbone", #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x68), self.led_display.value[8:40])), If(self.wishbone_master.cyc == 0, + NextValue(sbus_slave_timeout, 0), NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextState("Slave_Ack_Reg_Write_Burst") ).Elif(sbus_slave_timeout == 0, ### this is taking too long - NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + NextValue(self.led_display.value, Cat(Signal(8, reset = LED_RERUN | LED_RERUN_WRITE | LED_RERUN_WORD), sbus_last_pa, Signal(4, reset = 0))), + #NextValue(led0123, LED_RERUN | LED_RERUN_WRITE | LED_RERUN_WORD), NextState("Slave_Error") ) ) @@ -919,10 +954,12 @@ class SBusFPGABus(Module): ) slave_fsm.act("Slave_Ack_Reg_Write_HWord_Wait_For_Wishbone", If(self.wishbone_master.cyc == 0, + NextValue(sbus_slave_timeout, 0), NextValue(SBUS_3V3_ACKs_o, ACK_HWORD), NextState("Slave_Ack_Reg_Write_HWord") ).Elif(sbus_slave_timeout == 0, ### this is taking too long NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + #NextValue(led0123, LED_RERUN | LED_RERUN_WRITE), NextState("Slave_Error") ) ) @@ -959,20 +996,23 @@ class SBusFPGABus(Module): ) slave_fsm.act("Slave_Ack_Reg_Write_Byte_Wait_For_Wishbone", If(self.wishbone_master.cyc == 0, + NextValue(sbus_slave_timeout, 0), NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), NextState("Slave_Ack_Reg_Write_Byte") ).Elif(sbus_slave_timeout == 0, ### this is taking too long NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), + #NextValue(led0123, LED_RERUN | LED_RERUN_WRITE), NextState("Slave_Error") ) ) # ##### SLAVE ERROR ##### slave_fsm.act("Slave_Error", + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), #NextValue(self.led_display.value, 0x0000000080 | self.led_display.value), - NextValue(sbus_oe_data, 0), - NextValue(sbus_oe_slave_in, 0), - NextValue(sbus_oe_master_in, 0), - If((SBUS_3V3_ASs_i == 1), + If(((SBUS_3V3_ASs_i == 1) | ((SBUS_3V3_ASs_i == 0) & (SBUS_3V3_SELs_i == 1))), + NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), NextState("Idle") ) ) @@ -1125,7 +1165,10 @@ class SBusFPGABus(Module): NextValue(wishbone_master_timeout, wishbone_master_timeout -1) ), If(self.wishbone_master.cyc & self.wishbone_master.stb & self.wishbone_master.we, - If(self.wishbone_master.ack | (wishbone_master_timeout == 0), + If(self.wishbone_master.ack,# | (wishbone_master_timeout == 0), + #If(~self.wishbone_master.ack, + # NextValue(led7, 1) + #), NextValue(self.wishbone_master.cyc, 0), NextValue(self.wishbone_master.stb, 0), NextValue(self.wishbone_master.we, 0), diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 39ef3b9..12dc72d 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -7,6 +7,7 @@ from litex.build.xilinx.vivado import vivado_build_args, vivado_build_argdict from litex.soc.integration.soc import * from litex.soc.integration.soc_core import * from litex.soc.integration.builder import * +from litex.soc.interconnect import wishbone from litex.soc.cores.clock import * from litex.soc.cores.led import LedChaser from litex_boards.platforms import ztex213 @@ -45,10 +46,10 @@ _usb_io = [ class _CRG(Module): def __init__(self, platform, sys_clk_freq): -## self.clock_domains.cd_sys = ClockDomain() # 100 MHz PLL, reset'ed by SBus (via pll), SoC/Wishbone main clock - self.clock_domains.cd_sys = ClockDomain() # 16.67-25 MHz SBus, reset'ed by SBus, native SBus & SYS clock domain + self.clock_domains.cd_sys = ClockDomain() # 100 MHz PLL, reset'ed by SBus (via pll), SoC/Wishbone main clock +## self.clock_domains.cd_sys = ClockDomain() # 16.67-25 MHz SBus, reset'ed by SBus, native SBus & SYS clock domain self.clock_domains.cd_native = ClockDomain(reset_less=True) # 48MHz native, non-reset'ed (for power-on long delay, never reset, we don't want the delay after a warm reset) -## self.clock_domains.cd_sbus = ClockDomain() # 16.67-25 MHz SBus, reset'ed by SBus, native SBus clock domain + self.clock_domains.cd_sbus = ClockDomain() # 16.67-25 MHz SBus, reset'ed by SBus, native SBus clock domain # self.clock_domains.cd_por = ClockDomain() # 48 MHz native, reset'ed by SBus, power-on-reset timer self.clock_domains.cd_usb = ClockDomain() # 48 MHZ PLL, reset'ed by SBus (via pll), for USB controller @@ -56,21 +57,22 @@ class _CRG(Module): clk48 = platform.request("clk48") self.cd_native.clk = clk48 clk_sbus = platform.request("SBUS_3V3_CLK") - ##self.cd_sbus.clk = clk_sbus + self.cd_sbus.clk = clk_sbus rst_sbus = platform.request("SBUS_3V3_RSTs") - ##self.comb += self.cd_sbus.rst.eq(~rst_sbus) - self.cd_sys.clk = clk_sbus - self.comb += self.cd_sys.rst.eq(~rst_sbus) + self.comb += self.cd_sbus.rst.eq(~rst_sbus) + ##self.cd_sys.clk = clk_sbus + ##self.comb += self.cd_sys.rst.eq(~rst_sbus) - ##self.submodules.pll = pll = S7MMCM(speedgrade=-1) - ##pll.register_clkin(clk48, 48e6) - ##pll.create_clkout(self.cd_sys, sys_clk_freq) + self.submodules.pll = pll = S7MMCM(speedgrade=-1) + pll.register_clkin(clk48, 48e6) + pll.create_clkout(self.cd_sys, sys_clk_freq) + self.comb += pll.reset.eq(~rst_sbus) # | ~por_done - ##platform.add_false_path_constraints(self.cd_native.clk, self.cd_sbus.clk) - ##platform.add_false_path_constraints(self.cd_sys.clk, self.cd_sbus.clk) - ##platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_native.clk) - ##platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_sys.clk) - platform.add_false_path_constraints(self.cd_native.clk, self.cd_sys.clk) + platform.add_false_path_constraints(self.cd_native.clk, self.cd_sbus.clk) + platform.add_false_path_constraints(self.cd_sys.clk, self.cd_sbus.clk) + platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_native.clk) + platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_sys.clk) + ##platform.add_false_path_constraints(self.cd_native.clk, self.cd_sys.clk) # Power on reset, reset propagate from SBus to SYS # por_count = Signal(16, reset=2**16-1) @@ -96,7 +98,7 @@ class SBusFPGA(SoCCore): kwargs["with_uart"] = False kwargs["with_timer"] = False - self.sys_clk_freq = sys_clk_freq = 25e6 ## 100e6 + self.sys_clk_freq = sys_clk_freq = 100e6 ## 25e6 self.platform = platform = ztex213.Platform(variant="ztex2.13a", expansion="sbus") self.platform.add_extension(_sbus_sbus) @@ -130,13 +132,13 @@ class SBusFPGA(SoCCore): self.comb += SBUS_3V3_INT1s_o.eq(~self.usb_host.interrupt) ## - pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") - SBUS_DATA_OE_LED_o = Signal() - self.comb += pad_SBUS_DATA_OE_LED.eq(SBUS_DATA_OE_LED_o) + #pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") + #SBUS_DATA_OE_LED_o = Signal() + #self.comb += pad_SBUS_DATA_OE_LED.eq(SBUS_DATA_OE_LED_o) #pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") #SBUS_DATA_OE_LED_2_o = Signal() #self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) - self.comb += SBUS_DATA_OE_LED_o.eq(~SBUS_3V3_INT1s_o) + #self.comb += SBUS_DATA_OE_LED_o.eq(~SBUS_3V3_INT1s_o) prom_file = "prom_migen.fc" prom_data = soc_core.get_mem_data(prom_file, "big") @@ -213,18 +215,32 @@ class SBusFPGA(SoCCore): ## master_rd_fifo_data=wishbone_to_sbus_rd_fifo_data) ##self.submodules.sbus_bus = ClockDomainsRenamer("sbus")(_sbus_bus) - wishbone_slave = wishbone.Interface(data_width=self.bus.data_width) - wishbone_master = wishbone.Interface(data_width=self.bus.data_width) - self.submodules.sbus_bus = SBusFPGABus(platform=self.platform, - prom=prom, - hold_reset=hold_reset, - wishbone_slave=wishbone_slave, - wishbone_master=wishbone_master) + #wishbone_slave = wishbone.Interface(data_width=self.bus.data_width) + #wishbone_master = wishbone.Interface(data_width=self.bus.data_width) + + #wishbone_slave = wishbone.Interface(data_width=self.bus.data_width) + #wishbone_master = wishbone.Interface(data_width=self.bus.data_width) + + wishbone_slave_sbus = wishbone.Interface(data_width=self.bus.data_width) + wishbone_master_sys = wishbone.Interface(data_width=self.bus.data_width) + self.submodules.wishbone_master_sbus = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_master_sys, cd_master="sbus", cd_slave="sys") + self.submodules.wishbone_slave_sys = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_slave_sbus, cd_master="sys", cd_slave="sbus") + + _sbus_bus = SBusFPGABus(platform=self.platform, + prom=prom, + hold_reset=hold_reset, + wishbone_slave=wishbone_slave_sbus, + wishbone_master=self.wishbone_master_sbus) + #self.submodules.sbus_bus = _sbus_bus + self.submodules.sbus_bus = ClockDomainsRenamer("sbus")(_sbus_bus) ##self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_to_wishbone.wishbone) ##self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_to_sbus.wishbone, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) - self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_bus.wishbone_master) + + #self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_bus.wishbone_master) #self.bus.add_slave(name="usb_fake_dma", slave=self.sbus_bus.wishbone_slave, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) + self.bus.add_master(name="SBusBridgeToWishbone", master=wishbone_master_sys) + #self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_slave_sys, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) # self.soc = Module() # self.soc.mem_regions = self.mem_regions = {} From 4dd86935765e3549af599b2c73990ffc7acb3ea4 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Fri, 25 Jun 2021 06:43:19 -0400 Subject: [PATCH 24/78] DMA supports byte & hword (needed), identify devices but unstable --- .../sbus_to_fpga_fsm.py | 139 +++++++++++++++--- .../sbus_to_fpga_soc.py | 2 +- 2 files changed, 119 insertions(+), 22 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index dfc698c..cf8d539 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -165,6 +165,10 @@ LED_RERUN=0x8 LED_RERUN_WRITE=0x4 LED_RERUN_WORD=0x2 LED_RERUN_LATE=0x1 + +LED_M_WRITE = 0x10 +LED_M_READ = 0x20 +LED_M_CACHE = 0x40 class SBusFPGABus(Module): def __init__(self, platform, prom, hold_reset, wishbone_slave, wishbone_master): @@ -263,6 +267,8 @@ class SBusFPGABus(Module): master_data = Signal(32) # could be merged with p_data master_addr = Signal(30) # could be meged with data_read_addr + master_size = Signal(4) + master_idx = Signal(2) master_we = Signal() @@ -274,24 +280,23 @@ class SBusFPGABus(Module): sbus_master_throttle = Signal(4) - self.submodules.led_display = LedDisplay(platform.request_all("user_led")) + #self.submodules.led_display = LedDisplay(platform.request_all("user_led")) #self.sync += platform.request("user_led", 0).eq(self.wishbone_slave.cyc) #self.sync += platform.request("user_led", 1).eq(self.wishbone_slave.stb) #self.sync += platform.request("user_led", 2).eq(self.wishbone_slave.we) #self.sync += platform.request("user_led", 3).eq(self.wishbone_slave.ack) #self.sync += platform.request("user_led", 4).eq(self.wishbone_slave.err) - #led0 = platform.request("user_led", 0) - #led1 = platform.request("user_led", 1) - #led2 = platform.request("user_led", 2) - #led3 = platform.request("user_led", 3) - #led4 = platform.request("user_led", 4) + led4 = platform.request("user_led", 4) + led5 = platform.request("user_led", 5) + led6 = platform.request("user_led", 6) + led7 = platform.request("user_led", 7) - #led0123 = Signal(4) - #self.sync += platform.request("user_led", 0).eq(led0123[0]) - #self.sync += platform.request("user_led", 1).eq(led0123[1]) - #self.sync += platform.request("user_led", 2).eq(led0123[2]) - #self.sync += platform.request("user_led", 3).eq(led0123[3]) + led0123 = Signal(4) + self.sync += platform.request("user_led", 0).eq(led0123[0]) + self.sync += platform.request("user_led", 1).eq(led0123[1]) + self.sync += platform.request("user_led", 2).eq(led0123[2]) + self.sync += platform.request("user_led", 3).eq(led0123[3]) #self.sync += platform.request("user_led", 0).eq(self.wishbone_master.cyc) #self.sync += platform.request("user_led", 1).eq(self.wishbone_master.stb) @@ -307,6 +312,7 @@ class SBusFPGABus(Module): #self.sync += platform.request("user_led", 5).eq(self.wishbone_slave.cyc) #self.sync += platform.request("user_led", 6).eq(~SBUS_3V3_BRs_o) #self.sync += platform.request("user_led", 7).eq(~SBUS_3V3_BGs_i) + self.sync += SBUS_DATA_OE_LED_o.eq(~SBUS_3V3_BGs_i), #cycle_counter = Signal(8, reset = 0) #self.sync += cycle_counter.eq(cycle_counter + 1) @@ -353,7 +359,6 @@ class SBusFPGABus(Module): If((self.hold_reset == 0), NextState("Idle")) ) slave_fsm.act("Idle", - SBUS_DATA_OE_LED_o.eq(1), If(((SBUS_3V3_SELs_i == 0) & (SBUS_3V3_ASs_i == 0) & (siz_is_word(SBUS_3V3_SIZ_i)) & @@ -578,6 +583,16 @@ class SBusFPGABus(Module): #NextValue(led0123, led0123 | LED_ADDRESS), NextState("Slave_Error") ) + ).Elif(self.wishbone_slave.cyc & + self.wishbone_slave.stb & + ~self.wishbone_slave.ack & + ~self.wishbone_slave.err & + self.wishbone_slave.we & + (self.wishbone_slave.sel == 0) & + (wishbone_slave_timeout == 0), + ## sel == 0 so nothing to write, don't acquire the SBus + NextValue(self.wishbone_slave.ack, 1), + NextValue(wishbone_slave_timeout, wishbone_default_timeout), ).Elif(SBUS_3V3_BGs_i & self.wishbone_slave.cyc & self.wishbone_slave.stb & @@ -605,13 +620,57 @@ class SBusFPGABus(Module): self.wishbone_slave.dat_w[16:24], self.wishbone_slave.dat_w[ 8:16], self.wishbone_slave.dat_w[ 0: 8])), + Case(self.wishbone_slave.sel, { + 0xf: [NextValue(burst_counter, 0), + NextValue(burst_limit_m1, 0), ## only single word for now + NextValue(master_size, SIZ_WORD), + NextValue(SBUS_3V3_SIZ_o, SIZ_WORD), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), + ], + 0x1: [NextValue(master_idx, 3), + NextValue(master_size, SIZ_BYTE), + NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), + ], + 0x2: [NextValue(master_idx, 2), + NextValue(master_size, SIZ_BYTE), + NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 1), self.wishbone_slave.adr)), + ], + 0x4: [NextValue(master_idx, 1), + NextValue(master_size, SIZ_BYTE), + NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 2), self.wishbone_slave.adr)), + ], + 0x8: [NextValue(master_idx, 0), + NextValue(master_size, SIZ_BYTE), + NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 3), self.wishbone_slave.adr)), + ], + 0x3: [NextValue(master_idx, 2), + NextValue(master_size, SIZ_HWORD), + NextValue(SBUS_3V3_SIZ_o, SIZ_HWORD), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), + ], + 0xc: [NextValue(master_idx, 0), + NextValue(master_size, SIZ_HWORD), + NextValue(SBUS_3V3_SIZ_o, SIZ_HWORD), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 2), self.wishbone_slave.adr)), + ], + "default":[NextValue(burst_counter, 0), # FIXME if it happens! + NextValue(burst_limit_m1, 0), ## only single word for now + NextValue(master_size, SIZ_WORD), + NextValue(SBUS_3V3_SIZ_o, SIZ_WORD), + NextValue(led0123, self.wishbone_slave.sel) + ] + }), +# NextValue(master_data, self.wishbone_slave.dat_w), NextValue(self.wishbone_slave.ack, 1), NextValue(wishbone_slave_timeout, wishbone_default_timeout), - NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), NextValue(SBUS_3V3_PPRD_o, 0), - NextValue(SBUS_3V3_SIZ_o, SIZ_WORD), NextValue(master_we, 1), #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0x00), self.wishbone_slave.adr)), + #NextValue(self.led_display.value, Cat(Signal(8, reset = LED_M_WRITE), Signal(2, reset = 0), self.wishbone_slave.adr)), NextState("Master_Translation") ).Elif(SBUS_3V3_BGs_i & self.master_read_buffer_start & @@ -632,6 +691,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_SIZ_o, SIZ_BURST4), NextValue(master_we, 0), #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0x00), self.wishbone_slave.adr)), + #NextValue(self.led_display.value, Cat(Signal(8, reset = LED_M_READ), Signal(2, reset = 0), self.master_read_buffer_addr)), NextState("Master_Translation") ).Elif(((SBUS_3V3_SELs_i == 0) & (SBUS_3V3_ASs_i == 0)), @@ -916,7 +976,7 @@ class SBusFPGABus(Module): NextState("Slave_Ack_Reg_Write_Burst") ).Elif(sbus_slave_timeout == 0, ### this is taking too long NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), - NextValue(self.led_display.value, Cat(Signal(8, reset = LED_RERUN | LED_RERUN_WRITE | LED_RERUN_WORD), sbus_last_pa, Signal(4, reset = 0))), + #NextValue(self.led_display.value, Cat(Signal(8, reset = LED_RERUN | LED_RERUN_WRITE | LED_RERUN_WORD), sbus_last_pa, Signal(4, reset = 0))), #NextValue(led0123, LED_RERUN | LED_RERUN_WRITE | LED_RERUN_WORD), NextState("Slave_Error") ) @@ -1021,13 +1081,41 @@ class SBusFPGABus(Module): #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x09), self.led_display.value[8:40])), If(master_we, NextValue(sbus_oe_data, 1), - NextValue(SBUS_3V3_D_o, master_data) + Case(master_size, { + SIZ_WORD: NextValue(SBUS_3V3_D_o, master_data), + SIZ_BYTE: Case(master_idx, { + 0: NextValue(SBUS_3V3_D_o, Cat(master_data[ 0: 8], + master_data[ 0: 8], + master_data[ 0: 8], + master_data[ 0: 8],)), + 1: NextValue(SBUS_3V3_D_o, Cat(master_data[ 8:16], + master_data[ 8:16], + master_data[ 8:16], + master_data[ 8:16],)), + 2: NextValue(SBUS_3V3_D_o, Cat(master_data[16:24], + master_data[16:24], + master_data[16:24], + master_data[16:24],)), + 3: NextValue(SBUS_3V3_D_o, Cat(master_data[24:32], + master_data[24:32], + master_data[24:32], + master_data[24:32],)), + }), + SIZ_HWORD: Case(master_idx, { + 0: NextValue(SBUS_3V3_D_o, Cat(master_data[ 0:16], + master_data[ 0:16],)), + 2: NextValue(SBUS_3V3_D_o, Cat(master_data[16:32], + master_data[16:32],)), + }) + }) ).Else( NextValue(sbus_oe_data, 0) ), Case(SBUS_3V3_ACKs_i, { ACK_ERR: ## ouch - [NextValue(sbus_oe_data, 0), + [NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), NextState("Idle")], @@ -1119,13 +1207,19 @@ class SBusFPGABus(Module): slave_fsm.act("Master_Write", #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x0d), self.led_display.value[8:40])), Case(SBUS_3V3_ACKs_i, { - ACK_WORD: + ACK_WORD: # FIXME: check againt master_size ? [If(burst_counter == burst_limit_m1, NextState("Master_Write_Final"), ).Else( NextValue(SBUS_3V3_D_o, master_data), ## FIXME: we're not updating master_data for burst mode yet NextValue(burst_counter, burst_counter + 1), )], + ACK_BYTE: # FIXME: check againt master_size ? + [NextState("Master_Write_Final"), + ], + ACK_HWORD: # FIXME: check againt master_size ? + [NextState("Master_Write_Final"), + ], ACK_IDLE: [NextState("Master_Write") ## redundant ], @@ -1187,19 +1281,19 @@ class SBusFPGABus(Module): NextValue(wishbone_slave_timeout, wishbone_slave_timeout -1) ), If(self.wishbone_slave.ack & self.wishbone_slave.we, - If((~self.wishbone_slave.stb) | (wishbone_slave_timeout == 0), #~self.wishbone_slave.cyc & + If((~self.wishbone_slave.stb), # | (wishbone_slave_timeout == 0), #~self.wishbone_slave.cyc & NextValue(self.wishbone_slave.ack, 0), NextValue(wishbone_slave_timeout, 0) ) ), If(self.wishbone_slave.ack & ~self.wishbone_slave.we, - If((~self.wishbone_slave.stb) | (wishbone_slave_timeout == 0), #~self.wishbone_slave.cyc & + If((~self.wishbone_slave.stb), # | (wishbone_slave_timeout == 0), #~self.wishbone_slave.cyc & NextValue(self.wishbone_slave.ack, 0), NextValue(wishbone_slave_timeout, 0) ) ), If(self.wishbone_slave.err, - If((~self.wishbone_slave.stb) | (wishbone_slave_timeout == 0), #~self.wishbone_slave.cyc & + If((~self.wishbone_slave.stb), # | (wishbone_slave_timeout == 0), #~self.wishbone_slave.cyc & NextValue(self.wishbone_slave.err, 0), NextValue(wishbone_slave_timeout, 0) ) @@ -1251,6 +1345,8 @@ class SBusFPGABus(Module): self.master_read_buffer_data[self.wishbone_slave.adr[0:2]][16:24], self.master_read_buffer_data[self.wishbone_slave.adr[0:2]][ 8:16], self.master_read_buffer_data[self.wishbone_slave.adr[0:2]][ 0: 8])), +# NextValue(self.wishbone_slave.dat_r, self.master_read_buffer_data[self.wishbone_slave.adr[0:2]]), + #NextValue(self.led_display.value, Cat(Signal(8, reset = LED_M_READ | LED_M_CACHE), Signal(2, reset = 0), self.wishbone_slave.adr)), NextValue(self.master_read_buffer_read[self.wishbone_slave.adr[0:2]], 1), NextValue(wishbone_slave_timeout, wishbone_default_timeout) ).Elif(~self.master_read_buffer_start, @@ -1280,6 +1376,7 @@ class SBusFPGABus(Module): self.master_read_buffer_data[last_word_idx][16:24], self.master_read_buffer_data[last_word_idx][ 8:16], self.master_read_buffer_data[last_word_idx][ 0: 8])), +# NextValue(self.wishbone_slave.dat_r, self.master_read_buffer_data[last_word_idx]), NextValue(self.master_read_buffer_read[last_word_idx], 1), NextValue(wishbone_slave_timeout, wishbone_default_timeout), NextState("Idle") diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 12dc72d..47f18e1 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -240,7 +240,7 @@ class SBusFPGA(SoCCore): #self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_bus.wishbone_master) #self.bus.add_slave(name="usb_fake_dma", slave=self.sbus_bus.wishbone_slave, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) self.bus.add_master(name="SBusBridgeToWishbone", master=wishbone_master_sys) - #self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_slave_sys, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) + self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_slave_sys, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) # self.soc = Module() # self.soc.mem_regions = self.mem_regions = {} From 1c69ec1d6f2e6f8414433b29bc3ab9079e7f0d87 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 27 Jun 2021 09:49:46 -0400 Subject: [PATCH 25/78] configure the sdram; prom is not initializing it properly yet --- .../sbus_to_fpga_fsm.py | 18 +++++---- .../sbus_to_fpga_soc.py | 38 +++++++++++++++++-- 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index cf8d539..7e9d6e5 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -282,15 +282,15 @@ class SBusFPGABus(Module): #self.submodules.led_display = LedDisplay(platform.request_all("user_led")) - #self.sync += platform.request("user_led", 0).eq(self.wishbone_slave.cyc) - #self.sync += platform.request("user_led", 1).eq(self.wishbone_slave.stb) - #self.sync += platform.request("user_led", 2).eq(self.wishbone_slave.we) - #self.sync += platform.request("user_led", 3).eq(self.wishbone_slave.ack) + self.sync += platform.request("user_led", 4).eq(self.wishbone_slave.cyc) + #self.sync += platform.request("user_led", 5).eq(self.wishbone_slave.stb) + #self.sync += platform.request("user_led", 6).eq(self.wishbone_slave.we) + #self.sync += platform.request("user_led", 7).eq(self.wishbone_slave.ack) #self.sync += platform.request("user_led", 4).eq(self.wishbone_slave.err) - led4 = platform.request("user_led", 4) - led5 = platform.request("user_led", 5) - led6 = platform.request("user_led", 6) - led7 = platform.request("user_led", 7) + #led4 = platform.request("user_led", 4) + #led5 = platform.request("user_led", 5) + #led6 = platform.request("user_led", 6) + #led7 = platform.request("user_led", 7) led0123 = Signal(4) self.sync += platform.request("user_led", 0).eq(led0123[0]) @@ -334,6 +334,8 @@ class SBusFPGABus(Module): self.submodules.slave_fsm = slave_fsm = FSM(reset_state="Reset") + self.sync += platform.request("user_led", 5).eq(~slave_fsm.ongoing("Idle")) + slave_fsm.act("Reset", #NextValue(self.led_display.value, 0x0000000000), NextValue(sbus_oe_data, 0), diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 47f18e1..fa9fa76 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -13,6 +13,9 @@ from litex.soc.cores.led import LedChaser from litex_boards.platforms import ztex213 from migen.genlib.fifo import * +from litedram.modules import MT41J128M16 +from litedram.phy import s7ddrphy + from sbus_to_fpga_fsm import *; from sbus_to_fpga_wishbone import *; @@ -47,6 +50,9 @@ _usb_io = [ class _CRG(Module): def __init__(self, platform, sys_clk_freq): self.clock_domains.cd_sys = ClockDomain() # 100 MHz PLL, reset'ed by SBus (via pll), SoC/Wishbone main clock + self.clock_domains.cd_sys4x = ClockDomain(reset_less=True) + self.clock_domains.cd_sys4x_dqs = ClockDomain(reset_less=True) + self.clock_domains.cd_idelay = ClockDomain() ## self.clock_domains.cd_sys = ClockDomain() # 16.67-25 MHz SBus, reset'ed by SBus, native SBus & SYS clock domain self.clock_domains.cd_native = ClockDomain(reset_less=True) # 48MHz native, non-reset'ed (for power-on long delay, never reset, we don't want the delay after a warm reset) self.clock_domains.cd_sbus = ClockDomain() # 16.67-25 MHz SBus, reset'ed by SBus, native SBus clock domain @@ -66,8 +72,9 @@ class _CRG(Module): self.submodules.pll = pll = S7MMCM(speedgrade=-1) pll.register_clkin(clk48, 48e6) pll.create_clkout(self.cd_sys, sys_clk_freq) + pll.create_clkout(self.cd_sys4x, 4*sys_clk_freq) + pll.create_clkout(self.cd_sys4x_dqs, 4*sys_clk_freq, phase=90) self.comb += pll.reset.eq(~rst_sbus) # | ~por_done - platform.add_false_path_constraints(self.cd_native.clk, self.cd_sbus.clk) platform.add_false_path_constraints(self.cd_sys.clk, self.cd_sbus.clk) platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_native.clk) @@ -89,6 +96,13 @@ class _CRG(Module): usb_pll.create_clkout(self.cd_usb, 48e6, margin = 0) self.comb += usb_pll.reset.eq(~rst_sbus) # | ~por_done platform.add_false_path_constraints(self.cd_sys.clk, self.cd_usb.clk) + + self.submodules.pll_idelay = pll_idelay = S7PLL(speedgrade=-1) + pll_idelay.register_clkin(clk48, 48e6) + pll_idelay.create_clkout(self.cd_idelay, 200e6) + self.comb += pll_idelay.reset.eq(~rst_sbus) # | ~por_done + + self.submodules.idelayctrl = S7IDELAYCTRL(self.cd_idelay) class SBusFPGA(SoCCore): def __init__(self, **kwargs): @@ -103,12 +117,18 @@ class SBusFPGA(SoCCore): self.platform = platform = ztex213.Platform(variant="ztex2.13a", expansion="sbus") self.platform.add_extension(_sbus_sbus) self.platform.add_extension(_usb_io) - SoCCore.__init__(self, platform=platform, sys_clk_freq=sys_clk_freq, clk_freq=sys_clk_freq, **kwargs) + SoCCore.__init__(self, + platform=platform, + sys_clk_freq=sys_clk_freq, + clk_freq=sys_clk_freq, + csr_paging=0x1000, # default is 0x800 + **kwargs) wb_mem_map = { "prom": 0x00000000, "csr" : 0x00040000, "usb_host": 0x00080000, "usb_shared_mem": 0x00090000, + "main_ram": 0x80000000, "usb_fake_dma": 0xfc000000, } self.mem_map.update(wb_mem_map) @@ -240,7 +260,19 @@ class SBusFPGA(SoCCore): #self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_bus.wishbone_master) #self.bus.add_slave(name="usb_fake_dma", slave=self.sbus_bus.wishbone_slave, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) self.bus.add_master(name="SBusBridgeToWishbone", master=wishbone_master_sys) - self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_slave_sys, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) + self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_slave_sys, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) + + #self.add_sdcard() + + self.submodules.ddrphy = s7ddrphy.A7DDRPHY(platform.request("ddram"), + memtype = "DDR3", + nphases = 4, + sys_clk_freq = sys_clk_freq) + self.add_sdram("sdram", + phy = self.ddrphy, + module = MT41J128M16(sys_clk_freq, "1:4"), + l2_cache_size = 0 + ) # self.soc = Module() # self.soc.mem_regions = self.mem_regions = {} From 564c5276dc174272d8962bec0d8fd8eefbe11db3 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 27 Jun 2021 09:52:07 -0400 Subject: [PATCH 26/78] drop unused file --- .../sbus_to_fpga_soc.py | 1 - .../sbus_to_fpga_wishbone.py | 189 ------------------ 2 files changed, 190 deletions(-) delete mode 100644 sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index fa9fa76..698020a 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -17,7 +17,6 @@ from litedram.modules import MT41J128M16 from litedram.phy import s7ddrphy from sbus_to_fpga_fsm import *; -from sbus_to_fpga_wishbone import *; _sbus_sbus = [ ("SBUS_3V3_CLK", 0, Pins("D15"), IOStandard("lvttl")), diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py deleted file mode 100644 index 9f5b50b..0000000 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_wishbone.py +++ /dev/null @@ -1,189 +0,0 @@ - -from migen import * -from litex.soc.interconnect import wishbone - -# ******************************************************************************************************** -class SBusToWishbone(Module): - def __init__(self, platform, wr_fifo, rd_fifo_addr, rd_fifo_data, wishbone): - self.platform = platform - self.wr_fifo = wr_fifo - self.rd_fifo_addr = rd_fifo_addr - self.rd_fifo_data = rd_fifo_data - self.wishbone = wishbone - - #pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") - #SBUS_DATA_OE_LED_o = Signal() - #self.comb += pad_SBUS_DATA_OE_LED.eq(SBUS_DATA_OE_LED_o) - #pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") - #SBUS_DATA_OE_LED_2_o = Signal() - #self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) - - data = Signal(32) - adr = Signal(30) - timeout = Signal(9) - - # ##### FSM: read/write from/to WB ##### - self.submodules.fsm = fsm = FSM(reset_state="Reset") - fsm.act("Reset", - self.wishbone.we.eq(0), - self.wishbone.cyc.eq(0), - self.wishbone.stb.eq(0), - NextState("Idle") - ) - fsm.act("Idle", - # write first, we don't want a read to pass before a previous write - If(self.wr_fifo.readable & ~self.wishbone.cyc, - self.wr_fifo.re.eq(1), - NextValue(adr, self.wr_fifo.dout[0:30]), - NextValue(data, self.wr_fifo.dout[30:62]), - NextValue(timeout, 511), - NextState("Write") - ).Elif (rd_fifo_addr.readable & ~self.wishbone.cyc & self.rd_fifo_data.writable, - rd_fifo_addr.re.eq(1), - NextValue(adr, self.rd_fifo_addr.dout[0:30]), - NextValue(timeout, 511), - NextState("Read") - ) - ) - fsm.act("Write", - self.wishbone.adr.eq(adr), - self.wishbone.dat_w.eq(data), - self.wishbone.we.eq(1), - self.wishbone.cyc.eq(1), - self.wishbone.stb.eq(1), - self.wishbone.sel.eq(2**len(self.wishbone.sel)-1), - NextValue(timeout, timeout - 1), - If(self.wishbone.ack, - self.wishbone.we.eq(0), - self.wishbone.cyc.eq(0), - self.wishbone.stb.eq(0), - NextState("Idle") - ).Elif(timeout == 0, # fixme, what to do to signal a problem ? - self.wishbone.we.eq(0), - self.wishbone.cyc.eq(0), - self.wishbone.stb.eq(0), - NextState("Idle") - ) - ) - fsm.act("Read", - self.wishbone.adr.eq(adr), - self.wishbone.we.eq(0), - self.wishbone.cyc.eq(1), - self.wishbone.stb.eq(1), - self.wishbone.sel.eq(2**len(self.wishbone.sel)-1), - NextValue(timeout, timeout - 1), - If(self.wishbone.ack, - self.rd_fifo_data.we.eq(1), - self.rd_fifo_data.din.eq(Cat(self.wishbone.dat_r, Signal(reset = 0))), - self.wishbone.we.eq(0), - self.wishbone.cyc.eq(0), - self.wishbone.stb.eq(0), - NextState("Idle") - ).Elif(timeout == 0, - self.rd_fifo_data.we.eq(1), - self.rd_fifo_data.din.eq(Cat(Signal(32, reset = 0xDEADBEEF), Signal(reset = 1))), - self.wishbone.we.eq(0), - self.wishbone.cyc.eq(0), - self.wishbone.stb.eq(0), - NextState("Idle") - ) - ) - -# ******************************************************************************************************** -class WishboneToSBus(Module): - def __init__(self, platform, soc, wr_fifo, rd_fifo_addr, rd_fifo_data, wishbone): - self.platform = platform - self.wr_fifo = wr_fifo - self.rd_fifo_addr = rd_fifo_addr - self.rd_fifo_data = rd_fifo_data - self.wishbone = wishbone - self.soc = soc - - #pad_SBUS_DATA_OE_LED_2 = platform.request("SBUS_DATA_OE_LED_2") - #SBUS_DATA_OE_LED_2_o = Signal() - #self.comb += pad_SBUS_DATA_OE_LED_2.eq(SBUS_DATA_OE_LED_2_o) - - data = Signal(32) - adr = Signal(30) - timeout = Signal(9) - - # ##### FSM: read/write from/to SBus ##### - self.submodules.fsm = fsm = FSM(reset_state="Reset") - fsm.act("Reset", - NextState("Idle") - ) - fsm.act("Idle", - If(self.wishbone.stb & self.wishbone.cyc & self.wishbone.we & self.wr_fifo.writable, - If(self.wishbone.adr[24:30] == 0x3f, ## in our DMA range (3f == fc>>2) - self.wr_fifo.we.eq(1), - self.wr_fifo.din.eq(Cat(self.wishbone.adr[0:30], self.wishbone.dat_w[0:32])) - ), - NextValue(timeout, 511), - NextState("WriteWait") - ).Elif(self.wishbone.stb & self.wishbone.cyc & ~self.wishbone.we & self.rd_fifo_addr.writable, - If(self.wishbone.adr[24:30] == 0x3f, ## in our DMA range - NextValue(adr, self.wishbone.adr), - self.rd_fifo_addr.we.eq(1), - self.rd_fifo_addr.din.eq(self.wishbone.adr[0:30]) - ), - NextValue(timeout, 511), - NextState("ReadWait"), - ) - ) - fsm.act("WriteWait", - If(self.wishbone.adr[24:30] == 0x3f, ## in our DMA range - self.wishbone.ack.eq(1) - ).Else( - self.wishbone.err.eq(1) - ), - NextValue(timeout, timeout - 1), - If(~self.wishbone.stb, - NextState("Idle") - ).Elif(timeout == 0, # fixme, what to do to signal a problem ? - NextState("Idle") - ) - ) - fsm.act("ReadWait", - NextValue(timeout, timeout - 1), - If(adr[24:30] == 0x3f, ## in our DMA range - If(self.rd_fifo_data.readable, - If(self.rd_fifo_data.dout[32] == 0, - self.wishbone.ack.eq(1), - self.rd_fifo_data.re.eq(1), - NextValue(data, self.rd_fifo_data.dout), - self.wishbone.dat_r.eq(self.rd_fifo_data.dout[0:32]), - NextState("ReadWait2") - ).Else( - self.wishbone.err.eq(1), - self.rd_fifo_data.re.eq(1), - NextState("ReadWaitErr") - ) - ).Elif(timeout == 0, # fixme, what to do to signal a problem ? - NextState("Idle") - ) - ).Else( - self.wishbone.err.eq(1), - If(~self.wishbone.stb, - NextState("Idle") - ) - ) - ) - fsm.act("ReadWait2", - NextValue(timeout, timeout - 1), - self.wishbone.ack.eq(1), - self.wishbone.dat_r.eq(data), - If(~self.wishbone.stb, - NextState("Idle") - ).Elif(timeout == 0, # fixme, what to do to signal a problem ? - NextState("Idle") - ) - ) - fsm.act("ReadWaitErr", - NextValue(timeout, timeout - 1), - self.wishbone.err.eq(1), - If(~self.wishbone.stb, - NextState("Idle") - ).Elif(timeout == 0, # fixme, what to do to signal a problem ? - NextState("Idle") - ) - ) From b2ff488740e47fe500945b3fc86cd950ac8d88bf Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Wed, 14 Jul 2021 05:14:19 -0400 Subject: [PATCH 27/78] prom --- .../forth_to_migen_rom.sh | 21 + sbus-to-ztex-gateware-migen/prom_migen.bth | 31 + sbus-to-ztex-gateware-migen/prom_migen.fth | 124 ++++ sbus-to-ztex-gateware-migen/sdram_csr.fth | 228 ++++++++ sbus-to-ztex-gateware-migen/sdram_init.fth | 533 ++++++++++++++++++ 5 files changed, 937 insertions(+) create mode 100755 sbus-to-ztex-gateware-migen/forth_to_migen_rom.sh create mode 100644 sbus-to-ztex-gateware-migen/prom_migen.bth create mode 100644 sbus-to-ztex-gateware-migen/prom_migen.fth create mode 100644 sbus-to-ztex-gateware-migen/sdram_csr.fth create mode 100644 sbus-to-ztex-gateware-migen/sdram_init.fth diff --git a/sbus-to-ztex-gateware-migen/forth_to_migen_rom.sh b/sbus-to-ztex-gateware-migen/forth_to_migen_rom.sh new file mode 100755 index 0000000..5bcd747 --- /dev/null +++ b/sbus-to-ztex-gateware-migen/forth_to_migen_rom.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +PFX=prom_migen + +rm -f ${PFX}.fc + +# (export BP=~/SPARC/SBusFPGA/sbus-to-ztex/openfirmware ; toke ${PFX}.forth ) + +( export BP=`pwd`/openfirmware ; openfirmware/cpu/x86/Linux/forth openfirmware/cpu/x86/build/builder.dic prom_migen.bth ) 2>&1 | tee forth.log + +rm -f /tmp/${PFX}.hexa + +od --endian=big -w4 -x ${PFX}.fc | awk '{ print $2,$3"," }' >| /tmp/${PFX}.hexa + +rm -f /tmp/${PFX}.txt_hexa + +cat /tmp/${PFX}.hexa | sed -e 's/^\([a-f0-9][a-f0-9][a-f0-9][a-f0-9]\) \([a-f0-9][a-f0-9][a-f0-9][a-f0-9]\),/0x\1\2,/g' -e 's/^\([a-f0-9][a-f0-9]*\) ,/0x\10000,/' -e 's/^ ,/0x00000000,/' -e 's/\(0x[0-9a-fA-F]*\),/if (idx == 0):\n\treturn \1;/' > /tmp/${PFX}.txt_hexa + +#echo "rom = [" +#cat /tmp/${PFX}.txt_hexa +#echo "]" diff --git a/sbus-to-ztex-gateware-migen/prom_migen.bth b/sbus-to-ztex-gateware-migen/prom_migen.bth new file mode 100644 index 0000000..2285c83 --- /dev/null +++ b/sbus-to-ztex-gateware-migen/prom_migen.bth @@ -0,0 +1,31 @@ +purpose: Load file for SBusFPGA + +command: &builder &this + +\ in: ${BP}/dev/usb2/device/hub/build/hub.fc +\ in: ${BP}/dev/usb2/device/generic/build/generic.fc +\ in: ${BP}/dev/usb2/device/net/build/usbnet.fc +\ in: ${BP}/dev/usb2/device/serial/build/usbserial.fc +\ in: ${BP}/dev/usb2/device/storage/build/usbstorage.fc +\ in: ${BP}/dev/usb2/device/keyboard/build/usbkbd.fc +\ in: ${BP}/dev/usb2/device/mouse/build/usbmouse.fc + +build-now + +\ silent on + +begin-tokenizing prom_migen.fc + +fload prom_migen.fth + +end-tokenizing + +\ h# 8000 to reserved-start +\ h# f000 to reserved-end +\ " ${BP}/dev/usb2/device/hub/build/hub.fc" " usb,class9" $add-dropin +\ " ${BP}/dev/usb2/device/generic/build/generic.fc" " usbdevice" $add-deflated-dropin +\ " ${BP}/dev/usb2/device/net/build/usbnet.fc" " usbnet" $add-deflated-dropin +\ " ${BP}/dev/usb2/device/keyboard/build/usbkbd.fc" " usb,class3,1,1" $add-deflated-dropin +\ " ${BP}/dev/usb2/device/mouse/build/usbmouse.fc" " usb,class3,1,2" $add-deflated-dropin +\ " ${BP}/dev/usb2/device/serial/build/usbserial.fc" " usbserial" $add-deflated-dropin +\ " ${BP}/dev/usb2/device/storage/build/usbstorage.fc" " usbstorage" $add-deflated-dropin diff --git a/sbus-to-ztex-gateware-migen/prom_migen.fth b/sbus-to-ztex-gateware-migen/prom_migen.fth new file mode 100644 index 0000000..786d0ac --- /dev/null +++ b/sbus-to-ztex-gateware-migen/prom_migen.fth @@ -0,0 +1,124 @@ +fcode-version2 + +\ loads constants +fload prom_csr.fth + +\ fload v2compat.fth + +\ Absolute minimal stuff; name & registers def. +" RDOL,led" device-name +my-address sbusfpga_csraddr_leds + my-space h# 4 reg +\ we don't support ET or HWORD +h# 7d xdrint " slave-burst-sizes" attribute +h# 7d xdrint " burst-sizes" attribute + +headers +-1 instance value led-virt +my-address constant my-sbus-address +my-space constant my-sbus-space + +: map-in ( adr space size -- virt ) " map-in" $call-parent ; +: map-out ( virt size -- ) " map-out" $call-parent ; + +: map-in-led ( -- ) my-sbus-address sbusfpga_csraddr_leds + my-sbus-space h# 4 map-in is led-virt ; +: map-out-led ( -- ) led-virt h# 4 map-out ; + +\ external + +: setled! ( pattern -- ) + map-in-led + led-virt l! ( pattern virt -- ) + map-out-led +; + +\ h# a5 setled! + +\ OpenBIOS tokenizer won't accept finish-device without new-device +\ Cheat by using the tokenizer so we can do OpenBoot 2.x siblings +\ tokenizer[ 01 emit-byte h# 27 emit-byte h# 01 emit-byte h# 1f emit-byte ]tokenizer +\ The OpenFirmware tokenizer does accept the 'clean' syntax +finish-device +new-device + +\ Absolute minimal stuff; name & registers def. +" generic-ohci" device-name + +my-address h# 80000 + my-space h# 1000 reg +\ we don't support ET or anything non-32bits +h# 7c xdrint " slave-burst-sizes" attribute +h# 7c xdrint " burst-sizes" attribute + +1 xdrint " interrupts" attribute + +headers +-1 instance value regs-virt +my-address constant my-sbus-address +my-space constant my-sbus-space + +: map-in ( adr space size -- virt ) " map-in" $call-parent ; +: map-out ( virt size -- ) " map-out" $call-parent ; + +: map-in-regs ( -- ) my-sbus-address h# 80000 + my-sbus-space h# 1000 map-in is regs-virt ; +: map-out-regs ( -- ) regs-virt h# 1000 map-out ; + +: my-reset! ( -- ) + map-in-regs + 00000001 regs-virt h# 4 + l! ( -- ) ( reset the HC ) + 00000000 regs-virt h# 18 + l! ( -- ) ( reset HCCA & friends ) + 00000000 regs-virt h# 1c + l! ( -- ) + 00000000 regs-virt h# 20 + l! ( -- ) + 00000000 regs-virt h# 24 + l! ( -- ) + 00000000 regs-virt h# 28 + l! ( -- ) + 00000000 regs-virt h# 2c + l! ( -- ) + 00000000 regs-virt h# 30 + l! ( -- ) + map-out-regs +; + +my-reset! + +\ " ohci" encode-string " device_type" property +\ fload openfirmware/dev/usb2/hcd/ohci/loadpkg-sbus.fth +\ open + +\ OpenBIOS tokenizer won't accept finish-device without new-device +\ Cheat by using the tokenizer so we can do OpenBoot 2.x siblings +\ tokenizer[ 01 emit-byte h# 27 emit-byte h# 01 emit-byte h# 1f emit-byte ]tokenizer +\ The OpenFirmware tokenizer does accept the 'clean' syntax +finish-device +new-device + +\ Absolute minimal stuff; name & registers def. +" RDOL,sdram" device-name +\ two pages of registers: +my-address sbusfpga_csraddr_ddrphy + my-space xdrphys \ Offset#1 +h# 1000 xdrint xdr+ \ Merge size#1 +my-address sbusfpga_csraddr_sdram + my-space xdrphys xdr+ \ Merge offset#2 +h# 1000 xdrint xdr+ \ Merge size#2 +" reg" attribute + +\ we don't support ET or anything non-32bits +h# 7c xdrint " slave-burst-sizes" attribute +h# 7c xdrint " burst-sizes" attribute + +headers +-1 instance value mregs-ddrphy-virt +-1 instance value mregs-sdramdfii-virt +my-address constant my-sbus-address +my-space constant my-sbus-space +: map-in ( adr space size -- virt ) " map-in" $call-parent ; +: map-out ( virt size -- ) " map-out" $call-parent ; + +: map-in-mregs ( -- ) + my-sbus-address sbusfpga_csraddr_ddrphy + my-sbus-space h# 1000 map-in is mregs-ddrphy-virt + my-sbus-address sbusfpga_csraddr_sdram + my-sbus-space h# 1000 map-in is mregs-sdramdfii-virt +; +: map-out-mregs ( -- ) + mregs-ddrphy-virt h# 1000 map-out + mregs-sdramdfii-virt h# 1000 map-out +; + +\ fload sdram_init.fth + +\ init! + +end0 diff --git a/sbus-to-ztex-gateware-migen/sdram_csr.fth b/sbus-to-ztex-gateware-migen/sdram_csr.fth new file mode 100644 index 0000000..84275c7 --- /dev/null +++ b/sbus-to-ztex-gateware-migen/sdram_csr.fth @@ -0,0 +1,228 @@ +: dphy_rst_rd ( -- csr_value ) + mregs-virt h# 1000 + l@ +; +: dphy_half_sys8x_taps_rd ( -- csr_value ) + mregs-virt h# 1004 + l@ +; +: dphy_wlevel_en_rd ( -- csr_value ) + mregs-virt h# 1008 + l@ +; +: dphy_wlevel_strobe_rd ( -- csr_value ) + mregs-virt h# 100c + l@ +; +: dphy_dly_sel_rd ( -- csr_value ) + mregs-virt h# 1010 + l@ +; +: dphy_rdly_dq_rst_rd ( -- csr_value ) + mregs-virt h# 1014 + l@ +; +: dphy_rdly_dq_inc_rd ( -- csr_value ) + mregs-virt h# 1018 + l@ +; +: dphy_rdly_dq_bitslip_rst_rd ( -- csr_value ) + mregs-virt h# 101c + l@ +; +: dphy_rdly_dq_bitslip_rd ( -- csr_value ) + mregs-virt h# 1020 + l@ +; +: dphy_wdly_dq_bitslip_rst_rd ( -- csr_value ) + mregs-virt h# 1024 + l@ +; +: dphy_wdly_dq_bitslip_rd ( -- csr_value ) + mregs-virt h# 1028 + l@ +; +: dphy_rdphase_rd ( -- csr_value ) + mregs-virt h# 102c + l@ +; +: dphy_wrphase_rd ( -- csr_value ) + mregs-virt h# 1030 + l@ +; +: sdr_dfii_control_rd ( -- csr_value ) + mregs-virt h# 2000 + l@ +; +: sdr_dfii_pi0_command_rd ( -- csr_value ) + mregs-virt h# 2004 + l@ +; +: sdr_dfii_pi0_command_issue_rd ( -- csr_value ) + mregs-virt h# 2008 + l@ +; +: sdr_dfii_pi0_address_rd ( -- csr_value ) + mregs-virt h# 200c + l@ +; +: sdr_dfii_pi0_baddress_rd ( -- csr_value ) + mregs-virt h# 2010 + l@ +; +: sdr_dfii_pi0_wrdata_rd ( -- csr_value ) + mregs-virt h# 2014 + l@ +; +: sdr_dfii_pi0_rddata_rd ( -- csr_value ) + mregs-virt h# 2018 + l@ +; +: sdr_dfii_pi1_command_rd ( -- csr_value ) + mregs-virt h# 201c + l@ +; +: sdr_dfii_pi1_command_issue_rd ( -- csr_value ) + mregs-virt h# 2020 + l@ +; +: sdr_dfii_pi1_address_rd ( -- csr_value ) + mregs-virt h# 2024 + l@ +; +: sdr_dfii_pi1_baddress_rd ( -- csr_value ) + mregs-virt h# 2028 + l@ +; +: sdr_dfii_pi1_wrdata_rd ( -- csr_value ) + mregs-virt h# 202c + l@ +; +: sdr_dfii_pi1_rddata_rd ( -- csr_value ) + mregs-virt h# 2030 + l@ +; +: sdr_dfii_pi2_command_rd ( -- csr_value ) + mregs-virt h# 2034 + l@ +; +: sdr_dfii_pi2_command_issue_rd ( -- csr_value ) + mregs-virt h# 2038 + l@ +; +: sdr_dfii_pi2_address_rd ( -- csr_value ) + mregs-virt h# 203c + l@ +; +: sdr_dfii_pi2_baddress_rd ( -- csr_value ) + mregs-virt h# 2040 + l@ +; +: sdr_dfii_pi2_wrdata_rd ( -- csr_value ) + mregs-virt h# 2044 + l@ +; +: sdr_dfii_pi2_rddata_rd ( -- csr_value ) + mregs-virt h# 2048 + l@ +; +: sdr_dfii_pi3_command_rd ( -- csr_value ) + mregs-virt h# 204c + l@ +; +: sdr_dfii_pi3_command_issue_rd ( -- csr_value ) + mregs-virt h# 2050 + l@ +; +: sdr_dfii_pi3_address_rd ( -- csr_value ) + mregs-virt h# 2054 + l@ +; +: sdr_dfii_pi3_baddress_rd ( -- csr_value ) + mregs-virt h# 2058 + l@ +; +: sdr_dfii_pi3_wrdata_rd ( -- csr_value ) + mregs-virt h# 205c + l@ +; +: sdr_dfii_pi3_rddata_rd ( -- csr_value ) + mregs-virt h# 2060 + l@ +; +: dphy_rst_wr ( value -- ) + mregs-virt h# 1000 + l! +; +: dphy_half_sys8x_taps_wr ( value -- ) + mregs-virt h# 1004 + l! +; +: dphy_wlevel_en_wr ( value -- ) + mregs-virt h# 1008 + l! +; +: dphy_wlevel_strobe_wr ( value -- ) + mregs-virt h# 100c + l! +; +: dphy_dly_sel_wr ( value -- ) + mregs-virt h# 1010 + l! +; +: dphy_rdly_dq_rst_wr ( value -- ) + mregs-virt h# 1014 + l! +; +: dphy_rdly_dq_inc_wr ( value -- ) + mregs-virt h# 1018 + l! +; +: dphy_rdly_dq_bitslip_rst_wr ( value -- ) + mregs-virt h# 101c + l! +; +: dphy_rdly_dq_bitslip_wr ( value -- ) + mregs-virt h# 1020 + l! +; +: dphy_wdly_dq_bitslip_rst_wr ( value -- ) + mregs-virt h# 1024 + l! +; +: dphy_wdly_dq_bitslip_wr ( value -- ) + mregs-virt h# 1028 + l! +; +: dphy_rdphase_wr ( value -- ) + mregs-virt h# 102c + l! +; +: dphy_wrphase_wr ( value -- ) + mregs-virt h# 1030 + l! +; +: sdr_dfii_control_wr ( value -- ) + mregs-virt h# 2000 + l! +; +: sdr_dfii_pi0_command_wr ( value -- ) + mregs-virt h# 2004 + l! +; +: sdr_dfii_pi0_command_issue_wr ( value -- ) + mregs-virt h# 2008 + l! +; +: sdr_dfii_pi0_address_wr ( value -- ) + mregs-virt h# 200c + l! +; +: sdr_dfii_pi0_baddress_wr ( value -- ) + mregs-virt h# 2010 + l! +; +: sdr_dfii_pi0_wrdata_wr ( value -- ) + mregs-virt h# 2014 + l! +; +: sdr_dfii_pi0_rddata_wr ( value -- ) + mregs-virt h# 2018 + l! +; +: sdr_dfii_pi1_command_wr ( value -- ) + mregs-virt h# 201c + l! +; +: sdr_dfii_pi1_command_issue_wr ( value -- ) + mregs-virt h# 2020 + l! +; +: sdr_dfii_pi1_address_wr ( value -- ) + mregs-virt h# 2024 + l! +; +: sdr_dfii_pi1_baddress_wr ( value -- ) + mregs-virt h# 2028 + l! +; +: sdr_dfii_pi1_wrdata_wr ( value -- ) + mregs-virt h# 202c + l! +; +: sdr_dfii_pi1_rddata_wr ( value -- ) + mregs-virt h# 2030 + l! +; +: sdr_dfii_pi2_command_wr ( value -- ) + mregs-virt h# 2034 + l! +; +: sdr_dfii_pi2_command_issue_wr ( value -- ) + mregs-virt h# 2038 + l! +; +: sdr_dfii_pi2_address_wr ( value -- ) + mregs-virt h# 203c + l! +; +: sdr_dfii_pi2_baddress_wr ( value -- ) + mregs-virt h# 2040 + l! +; +: sdr_dfii_pi2_wrdata_wr ( value -- ) + mregs-virt h# 2044 + l! +; +: sdr_dfii_pi2_rddata_wr ( value -- ) + mregs-virt h# 2048 + l! +; +: sdr_dfii_pi3_command_wr ( value -- ) + mregs-virt h# 204c + l! +; +: sdr_dfii_pi3_command_issue_wr ( value -- ) + mregs-virt h# 2050 + l! +; +: sdr_dfii_pi3_address_wr ( value -- ) + mregs-virt h# 2054 + l! +; +: sdr_dfii_pi3_baddress_wr ( value -- ) + mregs-virt h# 2058 + l! +; +: sdr_dfii_pi3_wrdata_wr ( value -- ) + mregs-virt h# 205c + l! +; +: sdr_dfii_pi3_rddata_wr ( value -- ) + mregs-virt h# 2060 + l! +; diff --git a/sbus-to-ztex-gateware-migen/sdram_init.fth b/sbus-to-ztex-gateware-migen/sdram_init.fth new file mode 100644 index 0000000..e86ed69 --- /dev/null +++ b/sbus-to-ztex-gateware-migen/sdram_init.fth @@ -0,0 +1,533 @@ +headers + +fload sdram_csr.fth + +external + +: popcnt ( n -- u) + 0 swap + BEGIN dup WHILE tuck 1 AND + swap 1 rshift REPEAT + DROP +; + +: cdelay ( count -- ) + \ Forth loop always have a least one iteration + dup 0<> if + 0 do noop loop + else drop then +; + +headers + +: sdram_software_control_on ( -- ) + sdr_dfii_control_rd + h# e <> if h# e sdr_dfii_control_wr then +; + +: sdram_software_control_off ( -- ) + sdr_dfii_control_rd + h# 1 <> if h# 1 sdr_dfii_control_wr then +; + +: command_p0 ( cmd -- ) + sdr_dfii_pi0_command_wr + 1 sdr_dfii_pi0_command_issue_wr +; +: command_p1 ( cmd -- ) + sdr_dfii_pi1_command_wr + 1 sdr_dfii_pi1_command_issue_wr +; +: command_p2 ( cmd -- ) + sdr_dfii_pi2_command_wr + 1 sdr_dfii_pi2_command_issue_wr +; +: command_p3 ( cmd -- ) + sdr_dfii_pi3_command_wr + 1 sdr_dfii_pi3_command_issue_wr +; + +: init_sequence ( -- ) + .( init_sequence ) cr + h# 0 sdr_dfii_pi0_address_wr + h# 0 sdr_dfii_pi0_baddress_wr + h# c sdr_dfii_control_wr + 50 ms + + h# 0 sdr_dfii_pi0_address_wr + h# 0 sdr_dfii_pi0_baddress_wr + h# e sdr_dfii_control_wr + 10 ms + + h# 200 sdr_dfii_pi0_address_wr + h# 2 sdr_dfii_pi0_baddress_wr + h# f command_p0 + + h# 0 sdr_dfii_pi0_address_wr + h# 3 sdr_dfii_pi0_baddress_wr + h# f command_p0 + + h# 6 sdr_dfii_pi0_address_wr + h# 1 sdr_dfii_pi0_baddress_wr + h# f command_p0 + + h# 920 sdr_dfii_pi0_address_wr + h# 0 sdr_dfii_pi0_baddress_wr + h# f command_p0 + 200 cdelay + + h# 400 sdr_dfii_pi0_address_wr + 0 sdr_dfii_pi0_baddress_wr + h# 3 command_p0 + 200 cdelay +; + +: sdram_read_leveling_rst_delay ( modulenum -- ) + h# 1 swap << dphy_dly_sel_wr + h# 1 dphy_rdly_dq_rst_wr + h# 0 dphy_dly_sel_wr +; + +: sdram_read_leveling_inc_delay ( modulenum -- ) + h# 1 swap << dphy_dly_sel_wr + h# 1 dphy_rdly_dq_inc_wr + h# 0 dphy_dly_sel_wr +; + +: sdram_read_leveling_rst_bitslip ( modulenum -- ) + h# 1 swap << dphy_dly_sel_wr + h# 1 dphy_rdly_dq_bitslip_rst_wr + h# 0 dphy_dly_sel_wr +; + +: sdram_read_leveling_inc_bitslip ( modulenum -- ) + h# 1 swap << dphy_dly_sel_wr + h# 1 dphy_rdly_dq_bitslip_wr + h# 0 dphy_dly_sel_wr +; + +: lfsr ( bits prev -- res ) + dup 1 and not ( bits prev -- bits prev ~{prev&1} ) + swap 1 >> ( bits prev ~{prev&1} -- bits ~{prev&1} {prev>>1} ) + swap ( bits prev ~{prev&1} -- bits {prev>>1} ~{prev&1} ) + rot ( bits {prev>>1} ~{prev&1} -- {prev>>1} ~{prev&1} bits ) + \ assume bits is 32, 'cause it is + drop h# 80200003 ( {prev>>1} ~{prev&1} bits -- {prev>>1} ~{prev&1} lfsr_taps[bits] ) + and + xor +; + +: sdram_activate_test_row ( -- ) + h# 0 sdr_dfii_pi0_address_wr + h# 0 sdr_dfii_pi0_baddress_wr + h# 9 command_p0 + 15 cdelay +; + +: sdram_precharge_test_row ( -- ) + h# 0 sdr_dfii_pi0_address_wr + h# 0 sdr_dfii_pi0_baddress_wr + h# b command_p0 + 15 cdelay +; + +: command_px ( phase value -- ) + over 3 = if dup command_p3 then + over 2 = if dup command_p2 then + over 1 = if dup command_p1 then + over 0 = if dup command_p0 then + 2drop +; + +: command_prd ( value -- ) + dphy_rdphase_rd + swap command_px +; + +: command_pwr ( value -- ) + dphy_wrphase_rd + swap command_px +; + +: sdr_dfii_pix_address_wr ( phase value -- ) + over 3 = if dup sdr_dfii_pi3_address_wr then + over 2 = if dup sdr_dfii_pi2_address_wr then + over 1 = if dup sdr_dfii_pi1_address_wr then + over 0 = if dup sdr_dfii_pi0_address_wr then + 2drop +; + +: sdr_dfii_pird_address_wr ( value -- ) + dphy_rdphase_rd + swap sdr_dfii_pix_address_wr +; + +: sdr_dfii_piwr_address_wr ( value -- ) + dphy_wrphase_rd + swap sdr_dfii_pix_address_wr +; + +: sdr_dfii_pix_baddress_wr ( phase value -- ) + over 3 = if dup sdr_dfii_pi3_baddress_wr then + over 2 = if dup sdr_dfii_pi2_baddress_wr then + over 1 = if dup sdr_dfii_pi1_baddress_wr then + over 0 = if dup sdr_dfii_pi0_baddress_wr then + 2drop +; + +: sdr_dfii_pird_baddress_wr ( value -- ) + dphy_rdphase_rd + swap sdr_dfii_pix_baddress_wr +; + +: sdr_dfii_piwr_baddress_wr ( value -- ) + dphy_wrphase_rd + swap sdr_dfii_pix_baddress_wr +; + +: sdr_wr_rd_chk_tst_pat_get ( seed -- A B C D ) +\ .( sdr_wr_rd_chk_tst_pat_get ) cr + dup 42 = if h# 00000080 swap then + dup 42 = if h# 00000000 swap then + dup 42 = if h# 00000000 swap then + dup 42 = if h# 15090700 swap then + dup 84 = if h# 00000000 swap then + dup 84 = if h# 00000000 swap then + dup 84 = if h# 00000000 swap then + dup 84 = if h# 2a150907 swap then + drop +; + +: sdr_wr_rd_check_test_pattern ( modulenum seed -- errors ) +\ .( sdr_wr_rd_check_test_pattern ) cr + sdram_activate_test_row + dup sdr_wr_rd_chk_tst_pat_get + \ should have the 4 patterns on top of the stack: modulenum seed p0 p1 p2 p3 + sdr_dfii_pi0_wrdata_wr + sdr_dfii_pi1_wrdata_wr + sdr_dfii_pi2_wrdata_wr + sdr_dfii_pi3_wrdata_wr + \ should be back at modulenum seed + h# 0 sdr_dfii_piwr_address_wr + h# 0 sdr_dfii_piwr_baddress_wr + h# 17 command_pwr + 15 cdelay + + h# 0 sdr_dfii_pird_address_wr + h# 0 sdr_dfii_pird_baddress_wr + h# 25 command_prd + 15 cdelay + + sdram_precharge_test_row + + sdr_wr_rd_chk_tst_pat_get + \ should have the 4 patterns on top of the stack: modulenum p0 p1 p2 p3 + sdr_dfii_pi0_rddata_rd xor popcnt + \ should be at modulenum p0 p1 p2 errors + swap sdr_dfii_pi0_rddata_rd xor popcnt + + \ should be at modulenum p0 p1 errors + swap sdr_dfii_pi0_rddata_rd xor popcnt + + \ should be at modulenum p0 errors + swap sdr_dfii_pi0_rddata_rd xor popcnt + + \ should be at modulenum errors + \ drop modulenum + nip +; + +: sdram_read_leveling_scan_module ( modulenum bitslip -- score ) +\ .( sdram_read_leveling_scan_module ) cr + over sdram_read_leveling_rst_delay + \ push score + 0 + \ we should be at 'modulenum bitslip score' + 32 0 do +\ .( starting rd_lvl_scan loop with stack: ) .s cr + 2 pick 42 sdr_wr_rd_check_test_pattern + \ now we have an error count at the top + 3 pick 84 sdr_wr_rd_check_test_pattern + \ merge both error count + + + \ we should be at 'modulenum bitslip score errorcount' + dup 0= + \ we should be at 'modulenum bitslip score errorcount working?' + if 16384 else 0 then + \ we should be at 'modulenum bitslip score errorcount (0|16384)' + swap 512 swap - + \ we should be at 'modulenum bitslip score (0|16384) (512-errorcount)' + + + + + \ we should be at 'modulenum bitslip score' + 2 pick sdram_read_leveling_inc_delay + loop + nip + nip +; + +: sdr_wr_lat_cal_bitslip_loop ( modulenum bestbitslip bestscore bitslip -- modulenum bestbitslip bestscore ) +\ .( sdr_wr_lat_cal_bitslip_loop for module: ) 3 pick . .( bitslip: ) dup . cr +\ .( sdr_wr_lat_cal_bitslip_loop, stack: ) .s cr + 1 4 pick << dphy_dly_sel_wr ( '4 pick' will extract modulenum, needed as we're stacking the '1' ) + 1 dphy_wdly_dq_bitslip_rst_wr + \ Forth loop always have a least one iteration + dup 0<> if + dup 0 do + 1 dphy_wdly_dq_bitslip_wr + loop + then + 0 dphy_dly_sel_wr +\ .( sdr_wr_lat_cal_bitslip_loop after bitslip init loop, stack: ) .s cr + \ push current score + 0 ( we should be at 'modulenum bestbitslip bestscore bitslip score' ) + 4 pick sdram_read_leveling_rst_bitslip + 8 0 do + 4 pick over sdram_read_leveling_scan_module + \ we should be at 'modulenum bestbitslip bestscore bitslip score score', max will merge scores + max + \ we should be at 'modulenum bestbitslip bestscore bitslip score' again + 4 pick sdram_read_leveling_inc_bitslip + loop + .( sdr_wr_lat_cal_bitslip_loop after bitslip check loop, stack: ) .s cr + dup 3 pick > + if +\ .( lat_cal best bitslip was: ) 3 pick . .( with score: ) 2 pick . cr + 2swap + .( lat_cal best bitslip now: ) 3 pick . .( with score: ) 2 pick . cr + then + 2drop +\ .( sdr_wr_lat_cal_bitslip_loop end, stack: ) .s cr +; + +: sdr_wr_lat_cal_module_loop ( modulenum -- ) + .( sdr_wr_lat_cal_module_loop for module: ) dup . cr + \ push best_bitslip + -1 + \ push best_score + 0 + \ we should have 'modulenum 1 0' + 8 0 do + i sdr_wr_lat_cal_bitslip_loop + 2 +loop + \ we should be at 'modulenum bestbitslip bestscore' + \ we don't need score anymore + drop + \ we should be at 'modulenum bestbitslip' + 1 2 pick << dphy_dly_sel_wr + 1 dphy_wdly_dq_bitslip_rst_wr + .( sdr_wr_lat_cal_module_loop: best bitslip: ) dup . cr + \ loop that consumes bestbitslip as the upper bound + \ Forth loop always have a least one iteration + dup 0<> if + 0 do + 1 dphy_wdly_dq_bitslip_wr + loop + else drop then + 0 dphy_dly_sel_wr + \ drop the modulenum + drop +; + +: sdram_write_latency_calibration ( -- ) + .( sdram_write_latency_calibration ) cr + 2 0 do + i sdr_wr_lat_cal_module_loop + loop +; + +: sdram_leveling_center_module ( modulenum -- ) + .( sdram_leveling_center_module ) cr + dup sdram_read_leveling_rst_delay + \ push delay_min + -1 + \ push delay + 0 + \ we should be at 'modulenum delay_min delay' + begin +\ .( starting lvl_center loop with stack: ) .s cr + 2 pick 42 sdr_wr_rd_check_test_pattern + .( we should be at 'modulenum delay_min delay error' stack: ) .s cr + 3 pick 84 sdr_wr_rd_check_test_pattern + .( we should be at 'modulenum delay_min delay error error' stack: ) .s cr + + + \ we should be at 'modulenum delay_min delay error' +\ .( we should be at 'modulenum delay_min delay error' stack: ) .s cr + 0= + \ we should be at 'modulenum delay_min delay working' +\ .( we should be at 'modulenum delay_min delay working' stack: ) .s cr + 2 pick 0< and + \ we should be at 'modulenum delay_min delay {working&delay_min<0}' +\ .( we should be at 'modulenum delay_min delay {working&delay_min<0}' stack: ) .s cr + dup if rot drop 2dup rot drop then + not + \ we should be at 'modulenum new_delay_min delay !{working&delay_min<0}' +\ .( we should be at 'modulenum new_delay_min delay !{working&delay_min<0}' stack: ) .s cr + \ test delay before incrementing, if already 31 no point in continuing/incrementing + over 31 < +\ .( we should be at 'modulenum new_delay_min delay !{working&delay_min<0} <31' stack: ) .s cr + dup if rot 1+ -rot then + dup if 4 pick sdram_read_leveling_inc_delay then + \ and the conditions to signal end-of-loop + and +\ .( we should be at 'modulenum new_delay_min delay !{working&delay_min<0}&<31' stack: ) .s cr +\ .( finishing lvl_center loop with stack: ) .s cr + not until + \ we should be at 'modulenum new_delay_min delay', the while has consumed the condition + .( we should be at 'modulenum new_delay_min delay' stack: ) .s cr + 1+ + 2 pick sdram_read_leveling_inc_delay + \ build a clean stack, startin with a copy of modulenum + 2 pick + \ push delay_max + -1 + \ we're at 'modulenum new_delay_min delay modulenum delay_max' + \ push delay + 2 pick + \ we're at 'modulenum new_delay_min delay modulenum delay_max delay' + .( we should be at 'modulenum new_delay_min delay modulenum delay_max delay ' stack: ) .s cr + \ this is almost the same loop, except with !working instead of working and delay_max instead of delay_min + begin + 2 pick 42 sdr_wr_rd_check_test_pattern + 3 pick 84 sdr_wr_rd_check_test_pattern + + + \ we should be at 'modulenum delay_max delay error' + 0<> + \ we should be at 'modulenum delay_max delay !working' + 2 pick 0< and + \ we should be at 'modulenum delay_max delay {!working&delay_max<0}' + dup if rot drop 2dup rot drop then + not + \ we should be at 'modulenum new_delay_max delay !{!working&delay_max<0}' + \ test delay before incrementing, if already 31 no point in continuing/incrementing + over 31 < + dup not if rot 1+ -rot then + dup not if 4 pick sdram_read_leveling_inc_delay then + \ and the conditions to signal end-of-loop + and + not until + \ we should be at 'modulenum new_delay_min delay modulenum new_delay_max delay', the while has consumed the condition + .( we should be at 'modulenum new_delay_min delay modulenum new_delay_max delay ' stack: ) .s cr + \ keep delay if new_delay_max<0, new_delay_max otherwise + over 0< if nip else drop then + \ we should be at 'modulenum new_delay_min delay modulenum new_delay_max' + nip + nip + \ we should be at 'modulenum new_delay_min new_delay_max' + .( we should be at 'modulenum new_delay_min new_delay_max' stack: ) .s cr + \ compute delay_mid + 2dup + 2/ 32 mod + \ we should be at 'modulenum new_delay_min new_delay_max {{new_delay_min+new_delay_max}/2%32}' + \ compute delay_range + 3dup drop swap - 2/ + \ we should be at 'modulenum new_delay_min new_delay_max {{new_delay_min+new_delay_max}/2%32} {{new_delay_max-new_delay_min}/2}' + .( we should be at 'modulenum new_delay_min new_delay_max delay_mid delay_range ' stack: ) .s cr + 4 pick sdram_read_leveling_rst_delay + 100 cdelay + \ Forth loop always have a least one iteration + over 0<> if + over 0 do + 4 pick sdram_read_leveling_inc_delay + 100 cdelay + loop + then + drop + drop + drop + drop + drop +; + +: sdr_rd_lvl_bitslip_loop ( modulenum bestbitslip bestscore bitslip -- modulenum bestbitslip bestscore ) +\ .( sdr_rd_lvl_bitslip_loop, stack: ) .s cr + 3 pick over sdram_read_leveling_scan_module + \ we should be at 'modulenum bestbitslip bestscore bitslip score' + 4 pick sdram_leveling_center_module + \ preserve a bitslip for the later test + over + \ (we should be at 'modulenum bestbitslip bestscore bitslip score bitslip') move it out of the way + .( we should be at 'modulenum bestbitslip bestscore bitslip score bitslip' stack: ) .s cr + 5 roll ( 'modulenum bestscore bitslip score bitslip bestbitslip' ) + 5 roll ( 'modulenum bitslip score bitslip bestbitslip bestscore' ) + 5 roll ( 'modulenum score bitslip bestbitslip bestscore bitslip' ) + 5 roll ( 'modulenum bitslip bestbitslip bestscore bitslip score' ) + .( we should be at 'modulenum bitslip bestbitslip bestscore bitslip score' stack: ) .s cr + \ compare the score and bestcore + dup 3 pick > + if + 2swap + .( rd_lvl best bitslip now: ) 3 pick . .( with score: ) 2 pick . cr + then + 2drop + \ we should be at 'modulenum bitslip bestbitslip bestscore' + rot + \ we should be at 'modulenum bestbitslip bestscore bitslip' + .( we should be at 'modulenum bestbitslip bestscore bitslip' stack: ) .s cr + 7 <> if 2 pick sdram_read_leveling_inc_bitslip then +; + +: sdr_rd_lvl_module_loop ( modulenum -- ) + .( sdr_rd_lvl_module_loop ) cr + 1 over << sdram_read_leveling_rst_bitslip + \ push best_bitslip + 0 + \ push best_score + 0 + \ we should have 'modulenum 0 0' + 8 0 do + i sdr_rd_lvl_bitslip_loop + loop + \ don't need the score anymore + drop + 2 pick sdram_read_leveling_rst_bitslip + .( sdr_rd_lvl_module_loop, best bitslip: ) dup . cr + \ Forth loop always have a least one iteration + dup 0<> if + \ consume best_bitslip as loop upper bound + 0 do + dup sdram_leveling_center_module + loop + else drop then + drop +; + +: sdram_read_leveling ( -- ) + .( sdram_read_leveling ) cr + 2 0 do + i sdr_rd_lvl_module_loop + loop +; + +: sdram_leveling ( -- ) + .( sdram_leveling ) cr + sdram_software_control_on + 2 0 do + i sdram_read_leveling_rst_delay + i sdram_read_leveling_rst_bitslip + loop + sdram_write_latency_calibration + sdram_read_leveling + sdram_software_control_off +; + +external + +: init_sdram ( -- ) + .( init_sdram ) cr + 2 dphy_rdphase_wr + 3 dphy_wrphase_wr + sdram_software_control_on + 1 dphy_rst_wr + 1 ms + 0 dphy_rst_wr + 1 ms + .( going to init_sequence ) cr + init_sequence + .( going to sdram_leveling ) cr + sdram_leveling + \ redundant + sdram_software_control_off +; + +: init! ( -- ) + .( init ) cr + map-in-mregs + init_sdram + map-out-mregs +; From b98fedf47a782f0329bd576c8d25ca662e23885b Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Wed, 14 Jul 2021 05:15:42 -0400 Subject: [PATCH 28/78] cleanup, CSR for prom & netbsd (preliminary) --- .../sbus_to_fpga_export.py | 133 ++++++++++++++++++ .../sbus_to_fpga_soc.py | 105 ++++---------- 2 files changed, 160 insertions(+), 78 deletions(-) create mode 100644 sbus-to-ztex-gateware-migen/sbus_to_fpga_export.py diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_export.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_export.py new file mode 100644 index 0000000..f772f5e --- /dev/null +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_export.py @@ -0,0 +1,133 @@ +import os +import json +import inspect +from shutil import which +from sysconfig import get_platform + +from migen import * + +from litex.soc.interconnect.csr import CSRStatus + +from litex.build.tools import generated_banner + +from litex.soc.doc.rst import reflow +from litex.soc.doc.module import gather_submodules, ModuleNotDocumented, DocumentedModule, DocumentedInterrupts +from litex.soc.doc.csr import DocumentedCSRRegion +from litex.soc.interconnect.csr import _CompoundCSR + +# for generating a timestamp in the description field, if none is otherwise given +import datetime +import time + +def _get_rw_functions_c(name, csr_name, reg_base, area_base, nwords, busword, alignment, read_only, with_access_functions): + reg_name = name + "_" + csr_name + r = "" + + addr_str = "CSR_{}_ADDR".format(reg_name.upper()) + size_str = "CSR_{}_SIZE".format(reg_name.upper()) + r += "#define {} (CSR_{}_BASE + {}L)\n".format(addr_str, name.upper(), hex(reg_base - area_base)) + r += "#define {} {}\n".format(size_str, nwords) + + size = nwords*busword//8 + if size > 8: + # downstream should select appropriate `csr_[rd|wr]_buf_uintX()` pair! + return r + elif size > 4: + ctype = "uint64_t" + elif size > 2: + ctype = "uint32_t" + elif size > 1: + ctype = "uint16_t" + else: + ctype = "uint8_t" + + stride = alignment//8; + if with_access_functions: + r += "static inline {} {}_read(struct sbusfpga_sdram_softc *sc) {{\n".format(ctype, reg_name) + if nwords > 1: + r += "\t{} r = bus_space_read_4(sc->sc_bustag, sc>sc_bhregs_{}, {}L);\n".format(ctype, name, hex(reg_base - area_base)) + for sub in range(1, nwords): + r += "\tr <<= {};\n".format(busword) + r += "\tr |= bus_space_read_4(sc->sc_bustag, sc>sc_bhregs_{}, {}L);\n".format(name, hex(reg_base - area_base + sub*stride)) + r += "\treturn r;\n}\n" + else: + r += "\treturn bus_space_read_4(sc->sc_bustag, sc>sc_bhregs_{}, {}L);\n}}\n".format(name, hex(reg_base - area_base)) + + if not read_only: + r += "static inline void {}_write(struct sbusfpga_sdram_softc *sc, {} v) {{\n".format(reg_name, ctype) + for sub in range(nwords): + shift = (nwords-sub-1)*busword + if shift: + v_shift = "v >> {}".format(shift) + else: + v_shift = "v" + r += "\tbus_space_write_4(sc->sc_bustag, sc>sc_bhregs_{}, {}L, {});\n".format(name, hex(reg_base - area_base + sub*stride), v_shift) + r += "}\n" + return r + + +def get_csr_header(regions, constants, csr_base=None, with_access_functions=True): + alignment = constants.get("CONFIG_CSR_ALIGNMENT", 32) + r = generated_banner("//") + #if with_access_functions: # FIXME + # r += "#include \n" + r += "#ifndef __GENERATED_CSR_H\n#define __GENERATED_CSR_H\n" + #if with_access_functions: + # r += "#include \n" + # r += "#include \n" + # r += "#ifndef CSR_ACCESSORS_DEFINED\n" + # r += "#include \n" + # r += "#endif /* ! CSR_ACCESSORS_DEFINED */\n" + csr_base = csr_base if csr_base is not None else regions[next(iter(regions))].origin + r += "#ifndef CSR_BASE\n" + r += "#define CSR_BASE {}L\n".format(hex(csr_base)) + r += "#endif\n" + for name, region in regions.items(): + origin = region.origin - csr_base + r += "\n/* "+name+" */\n" + r += "#ifndef CSR_"+name.upper()+"_BASE\n" + r += "#define CSR_"+name.upper()+"_BASE (CSR_BASE + "+hex(origin)+"L)\n" + r += "#endif\n" + if not isinstance(region.obj, Memory): + for csr in region.obj: + nr = (csr.size + region.busword - 1)//region.busword + r += _get_rw_functions_c(name, csr.name, origin, region.origin - csr_base, nr, region.busword, alignment, + getattr(csr, "read_only", False), with_access_functions) + origin += alignment//8*nr + if hasattr(csr, "fields"): + for field in csr.fields.fields: + offset = str(field.offset) + size = str(field.size) + r += "#define CSR_"+name.upper()+"_"+csr.name.upper()+"_"+field.name.upper()+"_OFFSET "+offset+"\n" + r += "#define CSR_"+name.upper()+"_"+csr.name.upper()+"_"+field.name.upper()+"_SIZE "+size+"\n" + if with_access_functions and csr.size <= 32: # FIXME: Implement extract/read functions for csr.size > 32-bit. + reg_name = name + "_" + csr.name.lower() + field_name = reg_name + "_" + field.name.lower() + r += "static inline uint32_t " + field_name + "_extract(struct sbusfpga_sdram_softc *sc, uint32_t oldword) {\n" + r += "\tuint32_t mask = ((1 << " + size + ")-1);\n" + r += "\treturn ( (oldword >> " + offset + ") & mask );\n}\n" + r += "static inline uint32_t " + field_name + "_read(struct sbusfpga_sdram_softc *sc) {\n" + r += "\tuint32_t word = " + reg_name + "_read(sc);\n" + r += "\treturn " + field_name + "_extract(word);\n" + r += "}\n" + if not getattr(csr, "read_only", False): + r += "static inline uint32_t " + field_name + "_replace(struct sbusfpga_sdram_softc *sc, uint32_t oldword, uint32_t plain_value) {\n" + r += "\tuint32_t mask = ((1 << " + size + ")-1);\n" + r += "\treturn (oldword & (~(mask << " + offset + "))) | (mask & plain_value)<< " + offset + " ;\n}\n" + r += "static inline void " + field_name + "_write(struct sbusfpga_sdram_softc *sc, uint32_t plain_value) {\n" + r += "\tuint32_t oldword = " + reg_name + "_read(sc);\n" + r += "\tuint32_t newword = " + field_name + "_replace(sc, oldword, plain_value);\n" + r += "\t" + reg_name + "_write(sc, newword);\n" + r += "}\n" + + r += "\n#endif\n" + return r + + +def get_csr_forth_header(csr_regions, mem_regions, constants, csr_base=None): + r = "\\ auto-generated base regions for CSRs in the PROM\n" + for name, region in csr_regions.items(): + r += "h# " + hex(region.origin).replace("0x", "") + " constant " + "sbusfpga_csraddr_{}".format(name) + "\n" + for name, region in mem_regions.items(): + r += "h# " + hex(region.origin).replace("0x", "") + " constant " + "sbusfpga_regionaddr_{}".format(name) + "\n" + return r diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 698020a..1a3b8b6 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -18,6 +18,8 @@ from litedram.phy import s7ddrphy from sbus_to_fpga_fsm import *; +import sbus_to_fpga_export; + _sbus_sbus = [ ("SBUS_3V3_CLK", 0, Pins("D15"), IOStandard("lvttl")), ("SBUS_3V3_ASs", 0, Pins("T4"), IOStandard("lvttl")), @@ -98,7 +100,7 @@ class _CRG(Module): self.submodules.pll_idelay = pll_idelay = S7PLL(speedgrade=-1) pll_idelay.register_clkin(clk48, 48e6) - pll_idelay.create_clkout(self.cd_idelay, 200e6) + pll_idelay.create_clkout(self.cd_idelay, 200e6, margin = 0) self.comb += pll_idelay.reset.eq(~rst_sbus) # | ~por_done self.submodules.idelayctrl = S7IDELAYCTRL(self.cd_idelay) @@ -122,6 +124,13 @@ class SBusFPGA(SoCCore): clk_freq=sys_clk_freq, csr_paging=0x1000, # default is 0x800 **kwargs) + + # This mem-map is also exposed in the FSM (matched prefixes) + # and in the PROM (to tell NetBSD where everything is) + # Currently it is a straight mapping between the two: + # the physical address here are used as offset in the SBus + # reserved area of 256 MiB + # Anything at 0x10000000 is therefore unreachable directly wb_mem_map = { "prom": 0x00000000, "csr" : 0x00040000, @@ -180,66 +189,8 @@ class SBusFPGA(SoCCore): hold_reset = Signal(reset=1) self.comb += hold_reset.eq(~(hold_reset_ctr == 0)) - - # FIFO to send data & address from SBus to the Wishbone - ##sbus_to_wishbone_wr_fifo = AsyncFIFOBuffered(width=32+30, depth=16) - ##sbus_to_wishbone_wr_fifo = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(sbus_to_wishbone_wr_fifo) - ##self.submodules += sbus_to_wishbone_wr_fifo - - # FIFOs to send address / receive data from SBus to the Wishbone - ##sbus_to_wishbone_rd_fifo_addr = AsyncFIFOBuffered(width=30, depth=16) - ##sbus_to_wishbone_rd_fifo_addr = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(sbus_to_wishbone_rd_fifo_addr) - ##self.submodules += sbus_to_wishbone_rd_fifo_addr - ##sbus_to_wishbone_rd_fifo_data = AsyncFIFOBuffered(width=32+1, depth=16) - ##sbus_to_wishbone_rd_fifo_data = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(sbus_to_wishbone_rd_fifo_data) - ##self.submodules += sbus_to_wishbone_rd_fifo_data - - # SBus to Wishbone, 'Slave' on the SBus side, 'Master' on the Wishbone side - ##self.submodules.sbus_to_wishbone = SBusToWishbone(platform=self.platform, - ## wr_fifo=sbus_to_wishbone_wr_fifo, - ## rd_fifo_addr=sbus_to_wishbone_rd_fifo_addr, - ## rd_fifo_data=sbus_to_wishbone_rd_fifo_data, - ## wishbone=wishbone.Interface(data_width=self.bus.data_width)) - - - # FIFO to send data & address from Wishbone to the SBus - ##wishbone_to_sbus_wr_fifo = AsyncFIFOBuffered(width=32+30, depth=16) - ##wishbone_to_sbus_wr_fifo = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(wishbone_to_sbus_wr_fifo) - ##self.submodules += wishbone_to_sbus_wr_fifo - - # FIFOs to send address / receive data from Wishbone to the SBus - ##wishbone_to_sbus_rd_fifo_addr = AsyncFIFOBuffered(width=30, depth=4) - ##wishbone_to_sbus_rd_fifo_addr = ClockDomainsRenamer({"write": "sys", "read": "sbus"})(wishbone_to_sbus_rd_fifo_addr) - ##self.submodules += wishbone_to_sbus_rd_fifo_addr - ##wishbone_to_sbus_rd_fifo_data = AsyncFIFOBuffered(width=32+1, depth=4) - ##wishbone_to_sbus_rd_fifo_data = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(wishbone_to_sbus_rd_fifo_data) - ##self.submodules += wishbone_to_sbus_rd_fifo_data - - # Wishbone to SBus, 'Master' on the SBus side, 'Slave' on the Wishbone side - ##self.submodules.wishbone_to_sbus = WishboneToSBus(platform=self.platform, - ## soc=self, - ## wr_fifo=wishbone_to_sbus_wr_fifo, - ## rd_fifo_addr=wishbone_to_sbus_rd_fifo_addr, - ## rd_fifo_data=wishbone_to_sbus_rd_fifo_data, - ## wishbone=wishbone.Interface(data_width=self.bus.data_width)) - - ##_sbus_bus = SBusFPGABus(platform=self.platform, - ## prom=prom, - ## hold_reset=hold_reset, - ## wr_fifo=sbus_to_wishbone_wr_fifo, - ## rd_fifo_addr=sbus_to_wishbone_rd_fifo_addr, - ## rd_fifo_data=sbus_to_wishbone_rd_fifo_data, - ## master_wr_fifo=wishbone_to_sbus_wr_fifo, - ## master_rd_fifo_addr=wishbone_to_sbus_rd_fifo_addr, - ## master_rd_fifo_data=wishbone_to_sbus_rd_fifo_data) - ##self.submodules.sbus_bus = ClockDomainsRenamer("sbus")(_sbus_bus) - - #wishbone_slave = wishbone.Interface(data_width=self.bus.data_width) - #wishbone_master = wishbone.Interface(data_width=self.bus.data_width) - - #wishbone_slave = wishbone.Interface(data_width=self.bus.data_width) - #wishbone_master = wishbone.Interface(data_width=self.bus.data_width) - + # Interface SBus to wishbone + # we need to cross clock domains wishbone_slave_sbus = wishbone.Interface(data_width=self.bus.data_width) wishbone_master_sys = wishbone.Interface(data_width=self.bus.data_width) self.submodules.wishbone_master_sbus = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_master_sys, cd_master="sbus", cd_slave="sys") @@ -253,11 +204,6 @@ class SBusFPGA(SoCCore): #self.submodules.sbus_bus = _sbus_bus self.submodules.sbus_bus = ClockDomainsRenamer("sbus")(_sbus_bus) - ##self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_to_wishbone.wishbone) - ##self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_to_sbus.wishbone, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) - - #self.bus.add_master(name="SBusBridgeToWishbone", master=self.sbus_bus.wishbone_master) - #self.bus.add_slave(name="usb_fake_dma", slave=self.sbus_bus.wishbone_slave, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) self.bus.add_master(name="SBusBridgeToWishbone", master=wishbone_master_sys) self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_slave_sys, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) @@ -273,18 +219,6 @@ class SBusFPGA(SoCCore): l2_cache_size = 0 ) -# self.soc = Module() - # self.soc.mem_regions = self.mem_regions = {} - # region = litex.soc.integration.soc.SoCRegion(origin=0x0, size=0x0) - # region.length = 0 - # self.mem_regions['csr'] = region - # self.soc.constants = self.constants = {} - # self.soc.csr_regions = self.csr_regions = {} - # self.soc.cpu_type = self.cpu_type = None - -# def do_finalize(self): -# self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) - def main(): parser = argparse.ArgumentParser(description="SbusFPGA") parser.add_argument("--build", action="store_true", help="Build bitstream") @@ -298,5 +232,20 @@ def main(): builder = Builder(soc, **builder_argdict(args)) builder.build(**vivado_build_argdict(args), run=args.build) + # Generate modified CSR registers definitions/access functions to netbsd_csr.h. + csr_contents = sbus_to_fpga_export.get_csr_header( + regions = soc.csr_regions, + constants = soc.constants, + csr_base = soc.mem_regions['csr'].origin) + write_to_file(os.path.join("netbsd_csr.h"), csr_contents) + + # tells the prom where to find what + csr_forth_contents = sbus_to_fpga_export.get_csr_forth_header( + csr_regions = soc.csr_regions, + mem_regions = soc.mem_regions, + constants = soc.constants, + csr_base = soc.mem_regions['csr'].origin) + write_to_file(os.path.join("prom_csr.fth"), csr_forth_contents) + if __name__ == "__main__": main() From bd011b9d9fa72b2f9d203ee2f9496f270f00b9dd Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Wed, 14 Jul 2021 06:35:20 -0400 Subject: [PATCH 29/78] Forgotten-to-commit update --- .../9.0/usr/src/sys/dev/sbus/rdfpga_sdcard.c | 193 +++--------------- .../9.0/usr/src/sys/dev/sbus/rdfpga_sdcard.h | 4 +- 2 files changed, 35 insertions(+), 162 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/rdfpga_sdcard.c b/NetBSD/9.0/usr/src/sys/dev/sbus/rdfpga_sdcard.c index de00be6..8cee48e 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/rdfpga_sdcard.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/rdfpga_sdcard.c @@ -112,7 +112,7 @@ extern struct cfdriver rdfpga_sdcard_cd; static int rdfpga_sdcard_wait_dma_ready(struct rdfpga_sdcard_softc *sc, const int count); static int rdfpga_sdcard_wait_device_ready(struct rdfpga_sdcard_softc *sc, const int count); -static int rdfpga_sdcard_read_block(struct rdfpga_sdcard_softc *sc, const u_int32_t block, void *data); +static int rdfpga_sdcard_read_block(struct rdfpga_sdcard_softc *sc, const u_int32_t block, const u_int32_t blkcnt, void *data); static int rdfpga_sdcard_write_block(struct rdfpga_sdcard_softc *sc, const u_int32_t block, void *data); struct rdfpga_sdcard_rb_32to512 { @@ -179,7 +179,7 @@ rdfpga_sdcard_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) case RDFPGA_SDCARD_RB: { struct rdfpga_sdcard_rb_32to512* u = data; - err = rdfpga_sdcard_read_block(sc, u->block, u->data); + err = rdfpga_sdcard_read_block(sc, u->block, 1, u->data); break; } case RDFPGA_SDCARD_WB: @@ -188,49 +188,6 @@ rdfpga_sdcard_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) err = rdfpga_sdcard_write_block(sc, u->block, u->data); break; } - - - #if 0 - case DIOCGDINFO: - *(struct disklabel *)data = *(sc->dk.sc_dkdev.dk_label); - break; - - case DIOCGDEFLABEL: - { - struct disklabel *lp = sc->dk.sc_dkdev.dk_label; - struct cpu_disklabel *clp = sc->dk.sc_dkdev.dk_cpulabel; - memset(lp, 0, sizeof(struct disklabel)); - memset(clp, 0, sizeof(struct cpu_disklabel)); - if (readdisklabel(dev, rdfpga_sdcard_strategy, lp, clp) != NULL) { - int i; - aprint_normal_dev(sc->dk.sc_dev, "read disk label OK\n"); - strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); - /* - * Reset the partition info; it might have gotten - * trashed in readdisklabel(). - * - * XXX Why do we have to do this? readdisklabel() - * should be safe... - */ - for (i = 0; i < MAXPARTITIONS; ++i) { - lp->d_partitions[i].p_offset = 0; - if (i == RAW_PART) { - lp->d_partitions[i].p_size = - lp->d_secpercyl * lp->d_ncylinders; - lp->d_partitions[i].p_fstype = FS_BSDFFS; - } else { - lp->d_partitions[i].p_size = 0; - lp->d_partitions[i].p_fstype = FS_UNUSED; - } - } - lp->d_npartitions = RAW_PART + 1; - memcpy(data, lp, sizeof(struct disklabel)); - } else { - aprint_normal_dev(sc->dk.sc_dev, "read disk label FAILED\n"); - } - } - break; -#endif /* case VNDIOCCLR: */ /* case VNDIOCCLR50: */ @@ -505,9 +462,10 @@ static int rdfpga_sdcard_wait_device_ready(struct rdfpga_sdcard_softc *sc, const return rdfpga_sdcard_wait_dma_ready(sc, count); } -static int rdfpga_sdcard_read_block(struct rdfpga_sdcard_softc *sc, const u_int32_t block, void *data) { +static int rdfpga_sdcard_read_block(struct rdfpga_sdcard_softc *sc, const u_int32_t block, const u_int32_t blkcnt, void *data) { int res = 0; - u_int32_t ctrl; + u_int32_t ctrl = 0; + u_int32_t idx = 0; /* aprint_normal_dev(sc->dk.sc_dev, "Reading block %u from sdcard\n", block); */ if ((res = rdfpga_sdcard_wait_device_ready(sc, 50000)) != 0) @@ -524,9 +482,6 @@ static int rdfpga_sdcard_read_block(struct rdfpga_sdcard_softc *sc, const u_int3 bus_dmamem_free(sc->sc_dmatag, &sc->sc_segs, 1); return ENXIO; } - - /* for testing only, remove */ - //memcpy(kvap, data, 512); if (bus_dmamap_load(sc->sc_dmatag, sc->sc_dmamap, kvap, RDFPGA_SDCARD_VAL_DMA_MAX_SZ, /* kernel space */ NULL, BUS_DMA_NOWAIT | BUS_DMA_STREAMING | BUS_DMA_WRITE)) { @@ -536,24 +491,28 @@ static int rdfpga_sdcard_read_block(struct rdfpga_sdcard_softc *sc, const u_int3 return ENXIO; } - bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, 512, BUS_DMASYNC_PREWRITE); + bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, blkcnt * 512, BUS_DMASYNC_PREWRITE); - /* set DMA address */ - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs, RDFPGA_SDCARD_REG_DMAW_ADDR, (uint32_t)(sc->sc_dmamap->dm_segs[0].ds_addr)); - /* set block to read */ - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs, RDFPGA_SDCARD_REG_ADDR, block); - ctrl = RDFPGA_SDCARD_CTRL_START | RDFPGA_SDCARD_CTRL_READ; - /* initiate reading block from SDcard; once the read request is acknowledged, the HW will start the DMA engine */ - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs, RDFPGA_SDCARD_REG_CTRL, ctrl); + for (idx = 0 ; idx < blkcnt && !res; idx++) { + bus_addr_t addr = sc->sc_dmamap->dm_segs[0].ds_addr + 512 * idx; + + /* set DMA address */ + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs, RDFPGA_SDCARD_REG_DMAW_ADDR, (uint32_t)(addr)); + /* set block to read */ + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs, RDFPGA_SDCARD_REG_ADDR, (block + idx)); + ctrl = RDFPGA_SDCARD_CTRL_START | RDFPGA_SDCARD_CTRL_READ; + /* initiate reading block from SDcard; once the read request is acknowledged, the HW will start the DMA engine */ + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs, RDFPGA_SDCARD_REG_CTRL, ctrl); + + res = rdfpga_sdcard_wait_device_ready(sc, 100000); + } - res = rdfpga_sdcard_wait_device_ready(sc, 100000); - - bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, 512, BUS_DMASYNC_POSTWRITE); + bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, blkcnt * 512, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_dmatag, sc->sc_dmamap); /* aprint_normal_dev(sc->dk.sc_dev, "dma: unloaded\n"); */ - memcpy(data, kvap, 512); + memcpy(data, kvap, blkcnt * 512); bus_dmamem_unmap(sc->sc_dmatag, kvap, RDFPGA_SDCARD_VAL_DMA_MAX_SZ); /* aprint_normal_dev(sc->dk.sc_dev, "dma: unmapped\n"); */ @@ -625,100 +584,9 @@ static int rdfpga_sdcard_write_block(struct rdfpga_sdcard_softc *sc, const u_int void rdfpga_sdcard_strategy(struct buf *bp) { -#if 0 - struct rdfpga_sdcard_softc *sc = device_lookup_private(&rdfpga_sdcard_cd, DISKUNIT(bp->b_dev)); - int err = 0; - if (sc == NULL) { - aprint_error("%s:%d: sc == NULL! giving up\n", __PRETTY_FUNCTION__, __LINE__); - bp->b_resid = bp->b_bcount; - bp->b_error = EINVAL; - goto done; - } - /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bflags = 0x%08x\n", __PRETTY_FUNCTION__, __LINE__, bp->b_flags); */ - /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bufsize = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_bufsize); */ - /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_blkno = %lld\n", __PRETTY_FUNCTION__, __LINE__, bp->b_blkno); */ - /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_rawblkno = %lld\n", __PRETTY_FUNCTION__, __LINE__, bp->b_rawblkno); */ - /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bcount = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_bcount); */ - - bp->b_resid = bp->b_bcount; - - if (bp->b_bcount == 0) { - goto done; - } - - if (bp->b_flags & B_READ) { - unsigned char* data = bp->b_data; - daddr_t blk = bp->b_blkno; - struct partition *p = NULL; - - if (DISKPART(bp->b_dev) != RAW_PART) { - if ((err = bounds_check_with_label(&sc->dk.sc_dkdev, bp, 0)) <= 0) { - aprint_error("%s:%d: bounds_check_with_label -> %d\n", __PRETTY_FUNCTION__, __LINE__, err); - bp->b_resid = bp->b_bcount; - goto done; - } - p = &sc->dk.sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; - blk = bp->b_blkno + p->p_offset; - } - - while (bp->b_resid >= 512 && !bp->b_error) { - if (blk < 62521344) { - aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_blkno = %lld, computed %lld (part %d)\n", __PRETTY_FUNCTION__, __LINE__, bp->b_blkno, blk, DISKPART(bp->b_dev)); -aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_rawblkno = %lld\n", __PRETTY_FUNCTION__, __LINE__, bp->b_rawblkno); - bp->b_error = rdfpga_sdcard_read_block(sc, blk, data); - } else { - aprint_error("%s:%d: blk = %lld read out of range! giving up\n", __PRETTY_FUNCTION__, __LINE__, blk); - bp->b_error = EINVAL; - } - blk ++; - data += 512; - bp->b_resid -= 512; - } - } else { -#if 1 - bp->b_error = EINVAL; - aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bflags = 0x%08x\n", __PRETTY_FUNCTION__, __LINE__, bp->b_flags); - aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bufsize = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_bufsize); - aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_blkno = %lld\n", __PRETTY_FUNCTION__, __LINE__, bp->b_blkno); - aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_rawblkno = %lld\n", __PRETTY_FUNCTION__, __LINE__, bp->b_rawblkno); - aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bcount = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_bcount); -#else - unsigned char* data = bp->b_data; - daddr_t blk = bp->b_blkno; - - if (DISKPART(bp->b_dev) != RAW_PART) { - if (bounds_check_with_label(&sc->dk.sc_dkdev, bp, 0) <= 0) { - bp->b_resid = bp->b_bcount; - goto done; - } - p = &sc->dk.sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; - blk = bp->b_blkno + p->p_offset; - } - - while (bp->b_resid >= 512 && !bp->b_error) { - if (blk < 62521344) { - bp->b_error = rdfpga_sdcard_write_block(sc, blk, data); - } else { - aprint_error("%s:%d: blk = %lld write out of range! giving up\n", __PRETTY_FUNCTION__, __LINE__, blk); - bp->b_error = EINVAL; - } - blk ++; - data += 512; - bp->b_resid -= 512; - } -#endif - } - - /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_resid = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_resid); */ - /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_error = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_error); */ - - done: - biodone(bp); -#else struct rdfpga_sdcard_softc *sc = device_lookup_private(&rdfpga_sdcard_cd, DISKUNIT(bp->b_dev)); dk_strategy(&sc->dk, bp); -#endif } static void rdfpga_sdcard_set_geometry(struct rdfpga_sdcard_softc *sc) { @@ -749,8 +617,8 @@ rdfpga_sdcard_size(dev_t dev) { static void rdfpga_sdcard_minphys(struct buf *bp) { - if (bp->b_bcount > 16) - bp->b_bcount = 16; + if (bp->b_bcount > (RDFPGA_SDCARD_VAL_DMA_MAX_SZ/512)) + bp->b_bcount = (RDFPGA_SDCARD_VAL_DMA_MAX_SZ/512); } static int @@ -792,15 +660,20 @@ rdfpga_sdcard_diskstart(device_t self, struct buf *bp) /* } */ while (bp->b_resid >= 512 && !err) { - if (blk < 62521344) { - err = rdfpga_sdcard_read_block(sc, blk, data); + u_int32_t blkcnt = bp->b_resid / 512; + + if (blkcnt > (RDFPGA_SDCARD_VAL_DMA_MAX_SZ/512)) + blkcnt = (RDFPGA_SDCARD_VAL_DMA_MAX_SZ/512); + + if (blk+blkcnt <= 62521344) { + err = rdfpga_sdcard_read_block(sc, blk, blkcnt, data); } else { aprint_error("%s:%d: blk = %lld read out of range! giving up\n", __PRETTY_FUNCTION__, __LINE__, blk); err = EINVAL; } - blk ++; - data += 512; - bp->b_resid -= 512; + blk += blkcnt; + data += 512 * blkcnt; + bp->b_resid -= 512 * blkcnt; } } else { #if 1 diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/rdfpga_sdcard.h b/NetBSD/9.0/usr/src/sys/dev/sbus/rdfpga_sdcard.h index 2756faf..f00f0b0 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/rdfpga_sdcard.h +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/rdfpga_sdcard.h @@ -66,7 +66,7 @@ struct rdfpga_sdcard_softc { #define RDFPGA_SDCARD_CTRL_START 0x80000000 #define RDFPGA_SDCARD_CTRL_READ 0x40000000 -/* one page, though we're likely to only use 512 bytes (one block) ATM */ -#define RDFPGA_SDCARD_VAL_DMA_MAX_SZ (4096) +/* 16 pages, though we're likely to only use 512 bytes (one block) ATM */ +#define RDFPGA_SDCARD_VAL_DMA_MAX_SZ (65536) #endif /* _RDFPGA_SDCARD_H_ */ From 1f733a1a4c68795d06822d111f7698d47f404e31 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Wed, 14 Jul 2021 06:35:58 -0400 Subject: [PATCH 30/78] driver to initialize the SDRAM (should be done in the PROM, but easier in C...) --- .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c | 1080 +++++++++++++++++ .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h | 44 + 2 files changed, 1124 insertions(+) create mode 100644 NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c create mode 100644 NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c new file mode 100644 index 0000000..a016829 --- /dev/null +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c @@ -0,0 +1,1080 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2021 Romain Dolbeau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include + +#include + +int sbusfpga_sdram_match(device_t, cfdata_t, void *); +void sbusfpga_sdram_attach(device_t, device_t, void *); + +CFATTACH_DECL_NEW(sbusfpga_sdram, sizeof(struct sbusfpga_sdram_softc), + sbusfpga_sdram_match, sbusfpga_sdram_attach, NULL, NULL); + +dev_type_open(sbusfpga_sdram_open); +dev_type_close(sbusfpga_sdram_close); +dev_type_ioctl(sbusfpga_sdram_ioctl); + +const struct cdevsw sbusfpga_sdram_cdevsw = { + .d_open = sbusfpga_sdram_open, + .d_close = sbusfpga_sdram_close, + .d_read = noread, + .d_write = nowrite, + .d_ioctl = sbusfpga_sdram_ioctl, + .d_stop = nostop, + .d_tty = notty, + .d_poll = nopoll, + .d_mmap = nommap, + .d_kqfilter = nokqfilter, + .d_discard = nodiscard, + .d_flag = 0 +}; + +extern struct cfdriver sbusfpga_sdram_cd; +int +sbusfpga_sdram_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) +{ + //struct sbusfpga_sdram_softc *sc = device_lookup_private(&sbusfpga_sdram_cd, minor(dev)); + int err = 0; + + switch (cmd) { + default: + err = EINVAL; + break; + } + return(err); +} + +int +sbusfpga_sdram_open(dev_t dev, int flags, int mode, struct lwp *l) +{ + return (0); +} + +int +sbusfpga_sdram_close(dev_t dev, int flags, int mode, struct lwp *l) +{ + return (0); +} + +int +sbusfpga_sdram_match(device_t parent, cfdata_t cf, void *aux) +{ + struct sbus_attach_args *sa = (struct sbus_attach_args *)aux; + + return (strcmp("RDOL,sdram", sa->sa_name) == 0); +} + +int +sdram_init(struct sbusfpga_sdram_softc *sc); + +/* + * Attach all the sub-devices we can find + */ +void +sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) +{ + struct sbus_attach_args *sa = aux; + struct sbusfpga_sdram_softc *sc = device_private(self); + struct sbus_softc *sbsc = device_private(parent); + int node; + int sbusburst; + + sc->sc_bustag = sa->sa_bustag; + sc->sc_dev = self; + + if (sa->sa_nreg < 2) { + aprint_error(": Not enough registers spaces\n"); + return; + } + + /* map DDR PHY */ + if (sbus_bus_map(sc->sc_bustag, + sa->sa_reg[0].oa_space /* sa_slot */, + sa->sa_reg[0].oa_base /* sa_offset */, + sa->sa_reg[0].oa_size /* sa_size */, + BUS_SPACE_MAP_LINEAR, + &sc->sc_bhregs_ddrphy) != 0) { + aprint_error(": cannot map DDR PHY registers\n"); + return; + } else { + aprint_error(": DDR PHY registers @ %p\n", (void*)sc->sc_bhregs_ddrphy); + } + /* map SDRAM DFII */ + if (sbus_bus_map(sc->sc_bustag, + sa->sa_reg[1].oa_space /* sa_slot */, + sa->sa_reg[1].oa_base /* sa_offset */, + sa->sa_reg[1].oa_size /* sa_size */, + BUS_SPACE_MAP_LINEAR, + &sc->sc_bhregs_sdram) != 0) { + aprint_error(": cannot map SDRAM DFII registers\n"); + return; + } else { + aprint_error(": SDRAM DFII registers @ %p\n", (void*)sc->sc_bhregs_sdram); + } + + sc->sc_bufsiz_ddrphy = sa->sa_reg[0].oa_size; + sc->sc_bufsiz_sdram = sa->sa_reg[1].oa_size; + + node = sc->sc_node = sa->sa_node; + + /* + * Get transfer burst size from PROM + */ + sbusburst = sbsc->sc_burst; + if (sbusburst == 0) + sbusburst = SBUS_BURST_32 - 1; /* 1->16 */ + + sc->sc_burst = prom_getpropint(node, "burst-sizes", -1); + if (sc->sc_burst == -1) + /* take SBus burst sizes */ + sc->sc_burst = sbusburst; + + /* Clamp at parent's burst sizes */ + sc->sc_burst &= sbusburst; + + aprint_normal("\n"); + aprint_normal_dev(self, "nid 0x%x, bustag %p, burst 0x%x (parent 0x%0x)\n", + sc->sc_node, + sc->sc_bustag, + sc->sc_burst, + sbsc->sc_burst); + + sdram_init(sc); +} + +#define CONFIG_CSR_DATA_WIDTH 32 +// define CSR_LEDS_BASE to avoid defining the CSRs +#define CSR_LEDS_BASE +#include "dev/sbus/litex_csr.h" +#undef CSR_LEDS_BASE + +/* auto-generated sdram_phy.h + sc */ +#define DFII_CONTROL_SEL 0x01 +#define DFII_CONTROL_CKE 0x02 +#define DFII_CONTROL_ODT 0x04 +#define DFII_CONTROL_RESET_N 0x08 + +#define DFII_COMMAND_CS 0x01 +#define DFII_COMMAND_WE 0x02 +#define DFII_COMMAND_CAS 0x04 +#define DFII_COMMAND_RAS 0x08 +#define DFII_COMMAND_WRDATA 0x10 +#define DFII_COMMAND_RDDATA 0x20 + +#define SDRAM_PHY_A7DDRPHY +#define SDRAM_PHY_XDR 2 +#define SDRAM_PHY_DATABITS 16 +#define SDRAM_PHY_PHASES 4 +#define SDRAM_PHY_CL 6 +#define SDRAM_PHY_CWL 5 +#define SDRAM_PHY_CMD_LATENCY 0 +#define SDRAM_PHY_RDPHASE 2 +#define SDRAM_PHY_WRPHASE 3 +#define SDRAM_PHY_WRITE_LATENCY_CALIBRATION_CAPABLE +#define SDRAM_PHY_READ_LEVELING_CAPABLE +#define SDRAM_PHY_MODULES SDRAM_PHY_DATABITS/8 +#define SDRAM_PHY_DELAYS 32 +#define SDRAM_PHY_BITSLIPS 8 + +void cdelay(int i); + +__attribute__((unused)) static inline void command_p0(struct sbusfpga_sdram_softc *sc, int cmd) +{ + sdram_dfii_pi0_command_write(sc, cmd); + sdram_dfii_pi0_command_issue_write(sc, 1); +} +__attribute__((unused)) static inline void command_p1(struct sbusfpga_sdram_softc *sc, int cmd) +{ + sdram_dfii_pi1_command_write(sc, cmd); + sdram_dfii_pi1_command_issue_write(sc, 1); +} +__attribute__((unused)) static inline void command_p2(struct sbusfpga_sdram_softc *sc, int cmd) +{ + sdram_dfii_pi2_command_write(sc, cmd); + sdram_dfii_pi2_command_issue_write(sc, 1); +} +__attribute__((unused)) static inline void command_p3(struct sbusfpga_sdram_softc *sc, int cmd) +{ + sdram_dfii_pi3_command_write(sc, cmd); + sdram_dfii_pi3_command_issue_write(sc, 1); +} + +#define DFII_PIX_DATA_SIZE CSR_SDRAM_DFII_PI0_WRDATA_SIZE + +static inline unsigned long sdram_dfii_pix_wrdata_addr(int phase){ + switch (phase) { + case 0: return CSR_SDRAM_DFII_PI0_WRDATA_ADDR; + case 1: return CSR_SDRAM_DFII_PI1_WRDATA_ADDR; + case 2: return CSR_SDRAM_DFII_PI2_WRDATA_ADDR; + case 3: return CSR_SDRAM_DFII_PI3_WRDATA_ADDR; + default: return 0; + } +} + +static inline unsigned long sdram_dfii_pix_rddata_addr(int phase){ + switch (phase) { + case 0: return CSR_SDRAM_DFII_PI0_RDDATA_ADDR; + case 1: return CSR_SDRAM_DFII_PI1_RDDATA_ADDR; + case 2: return CSR_SDRAM_DFII_PI2_RDDATA_ADDR; + case 3: return CSR_SDRAM_DFII_PI3_RDDATA_ADDR; + default: return 0; + } +} + +#define DDRX_MR_WRLVL_ADDRESS 1 +#define DDRX_MR_WRLVL_RESET 6 +#define DDRX_MR_WRLVL_BIT 7 + +static inline void init_sequence(struct sbusfpga_sdram_softc *sc) +{ + /* Release reset */ + sdram_dfii_pi0_address_write(sc, 0x0); + sdram_dfii_pi0_baddress_write(sc, 0); + sdram_dfii_control_write(sc, DFII_CONTROL_ODT|DFII_CONTROL_RESET_N); + cdelay(50000); + + /* Bring CKE high */ + sdram_dfii_pi0_address_write(sc, 0x0); + sdram_dfii_pi0_baddress_write(sc, 0); + sdram_dfii_control_write(sc, DFII_CONTROL_CKE|DFII_CONTROL_ODT|DFII_CONTROL_RESET_N); + cdelay(10000); + + /* Load Mode Register 2, CWL=5 */ + sdram_dfii_pi0_address_write(sc, 0x200); + sdram_dfii_pi0_baddress_write(sc, 2); + command_p0(sc, DFII_COMMAND_RAS|DFII_COMMAND_CAS|DFII_COMMAND_WE|DFII_COMMAND_CS); + + /* Load Mode Register 3 */ + sdram_dfii_pi0_address_write(sc, 0x0); + sdram_dfii_pi0_baddress_write(sc, 3); + command_p0(sc, DFII_COMMAND_RAS|DFII_COMMAND_CAS|DFII_COMMAND_WE|DFII_COMMAND_CS); + + /* Load Mode Register 1 */ + sdram_dfii_pi0_address_write(sc, 0x6); + sdram_dfii_pi0_baddress_write(sc, 1); + command_p0(sc, DFII_COMMAND_RAS|DFII_COMMAND_CAS|DFII_COMMAND_WE|DFII_COMMAND_CS); + + /* Load Mode Register 0, CL=6, BL=8 */ + sdram_dfii_pi0_address_write(sc, 0x920); + sdram_dfii_pi0_baddress_write(sc, 0); + command_p0(sc, DFII_COMMAND_RAS|DFII_COMMAND_CAS|DFII_COMMAND_WE|DFII_COMMAND_CS); + cdelay(200); + + /* ZQ Calibration */ + sdram_dfii_pi0_address_write(sc, 0x400); + sdram_dfii_pi0_baddress_write(sc, 0); + command_p0(sc, DFII_COMMAND_WE|DFII_COMMAND_CS); + cdelay(200); +} + +/* from hw/common.h, +sc */ + +/* CSR data width (subreg. width) in bytes, for direct comparson to sizeof() */ +#define CSR_DW_BYTES (CONFIG_CSR_DATA_WIDTH/8) +#define CSR_OFFSET_BYTES 4 + +/* Number of subregs required for various total byte sizes, by subreg width: + * NOTE: 1, 2, 4, and 8 bytes represent uint[8|16|32|64]_t C types; However, + * CSRs of intermediate byte sizes (24, 40, 48, and 56) are NOT padded + * (with extra unallocated subregisters) to the next valid C type! + * +-----+-----------------+ + * | csr | bytes | + * | _dw | 1 2 3 4 5 6 7 8 | + * | |-----=---=-=-=---| + * | 1 | 1 2 3 4 5 6 7 8 | + * | 2 | 1 1 2 2 3 3 4 4 | + * | 4 | 1 1 1 1 2 2 2 2 | + * | 8 | 1 1 1 1 1 1 1 1 | + * +-----+-----------------+ */ +static inline int num_subregs(int csr_bytes) +{ + return (csr_bytes - 1) / CSR_DW_BYTES + 1; +} + +/* Read a CSR of size 'csr_bytes' located at address 'a'. */ +static inline uint64_t _csr_rd(struct sbusfpga_sdram_softc *sc, unsigned long a, int csr_bytes) +{ + uint64_t r = bus_space_read_4(sc->sc_bustag, 0, a); + for (int i = 1; i < num_subregs(csr_bytes); i++) { + r <<= CONFIG_CSR_DATA_WIDTH; + a += CSR_OFFSET_BYTES; + r |= bus_space_read_4(sc->sc_bustag, 0, a); + } + return r; +} + +/* Write value 'v' to a CSR of size 'csr_bytes' located at address 'a'. */ +static inline void _csr_wr(struct sbusfpga_sdram_softc *sc, unsigned long a, uint64_t v, int csr_bytes) +{ + int ns = num_subregs(csr_bytes); + for (int i = 0; i < ns; i++) { + bus_space_write_4(sc->sc_bustag, 0, a , v >> (CONFIG_CSR_DATA_WIDTH * (ns - 1 - i))); + a += CSR_OFFSET_BYTES; + } +} + +// FIXME: - should we provide 24, 40, 48, and 56 bit csr_[rd|wr] methods? + +static inline uint8_t csr_rd_uint8(struct sbusfpga_sdram_softc *sc, unsigned long a) +{ + return _csr_rd(sc, a, sizeof(uint8_t)); +} + +static inline void csr_wr_uint8(struct sbusfpga_sdram_softc *sc, uint8_t v, unsigned long a) +{ + _csr_wr(sc, a, v, sizeof(uint8_t)); +} + +static inline uint16_t csr_rd_uint16(struct sbusfpga_sdram_softc *sc, unsigned long a) +{ + return _csr_rd(sc, a, sizeof(uint16_t)); +} + +static inline void csr_wr_uint16(struct sbusfpga_sdram_softc *sc, uint16_t v, unsigned long a) +{ + _csr_wr(sc, a, v, sizeof(uint16_t)); +} + +static inline uint32_t csr_rd_uint32(struct sbusfpga_sdram_softc *sc, unsigned long a) +{ + return _csr_rd(sc, a, sizeof(uint32_t)); +} + +static inline void csr_wr_uint32(struct sbusfpga_sdram_softc *sc, uint32_t v, unsigned long a) +{ + _csr_wr(sc, a, v, sizeof(uint32_t)); +} + +static inline uint64_t csr_rd_uint64(struct sbusfpga_sdram_softc *sc, unsigned long a) +{ + return _csr_rd(sc, a, sizeof(uint64_t)); +} + +static inline void csr_wr_uint64(struct sbusfpga_sdram_softc *sc, uint64_t v, unsigned long a) +{ + _csr_wr(sc, a, v, sizeof(uint64_t)); +} + +/* Read a CSR located at address 'a' into an array 'buf' of 'cnt' elements. + * + * NOTE: Since CSR_DW_BYTES is a constant here, we might be tempted to further + * optimize things by leaving out one or the other of the if() branches below, + * depending on each unsigned type width; + * However, this code is also meant to serve as a reference for how CSRs are + * to be manipulated by other programs (e.g., an OS kernel), which may benefit + * from dynamically handling multiple possible CSR subregister data widths + * (e.g., by passing a value in through the Device Tree). + * Ultimately, if CSR_DW_BYTES is indeed a constant, the compiler should be + * able to determine on its own whether it can automatically optimize away one + * of the if() branches! */ +#define _csr_rd_buf(sc, a, buf, cnt) \ +{ \ + int i, j, nsubs, n_sub_elem; \ + uint64_t r; \ + if (sizeof(buf[0]) >= CSR_DW_BYTES) { \ + /* one or more subregisters per element */ \ + for (i = 0; i < cnt; i++) { \ + buf[i] = _csr_rd(sc, a, sizeof(buf[0])); \ + a += CSR_OFFSET_BYTES * num_subregs(sizeof(buf[0])); \ + } \ + } else { \ + /* multiple elements per subregister (2, 4, or 8) */ \ + nsubs = num_subregs(sizeof(buf[0]) * cnt); \ + n_sub_elem = CSR_DW_BYTES / sizeof(buf[0]); \ + for (i = 0; i < nsubs; i++) { \ + r = bus_space_read_4(sc->sc_bustag, 0, a); \ + for (j = n_sub_elem - 1; j >= 0; j--) { \ + if (i * n_sub_elem + j < cnt) \ + buf[i * n_sub_elem + j] = r; \ + r >>= sizeof(buf[0]) * 8; \ + } \ + a += CSR_OFFSET_BYTES; \ + } \ + } \ +} + +/* Write an array 'buf' of 'cnt' elements to a CSR located at address 'a'. + * + * NOTE: The same optimization considerations apply here as with _csr_rd_buf() + * above. + */ +#define _csr_wr_buf(sc, a, buf, cnt) \ +{ \ + int i, j, nsubs, n_sub_elem; \ + uint64_t v; \ + if (sizeof(buf[0]) >= CSR_DW_BYTES) { \ + /* one or more subregisters per element */ \ + for (i = 0; i < cnt; i++) { \ + _csr_wr(sc, a, buf[i], sizeof(buf[0])); \ + a += CSR_OFFSET_BYTES * num_subregs(sizeof(buf[0])); \ + } \ + } else { \ + /* multiple elements per subregister (2, 4, or 8) */ \ + nsubs = num_subregs(sizeof(buf[0]) * cnt); \ + n_sub_elem = CSR_DW_BYTES / sizeof(buf[0]); \ + for (i = 0; i < nsubs; i++) { \ + v = buf[i * n_sub_elem + 0]; \ + for (j = 1; j < n_sub_elem; j++) { \ + if (i * n_sub_elem + j == cnt) \ + break; \ + v <<= sizeof(buf[0]) * 8; \ + v |= buf[i * n_sub_elem + j]; \ + } \ + bus_space_write_4(sc->sc_bustag, 0, a, v); \ + a += CSR_OFFSET_BYTES; \ + } \ + } \ +} + +static inline void csr_rd_buf_uint8(struct sbusfpga_sdram_softc *sc, unsigned long a, uint8_t *buf, int cnt) +{ + _csr_rd_buf(sc, a, buf, cnt); +} + +static inline void csr_wr_buf_uint8(struct sbusfpga_sdram_softc *sc, unsigned long a, + const uint8_t *buf, int cnt) +{ + _csr_wr_buf(sc, a, buf, cnt); +} + +static inline void csr_rd_buf_uint16(struct sbusfpga_sdram_softc *sc, unsigned long a, uint16_t *buf, int cnt) +{ + _csr_rd_buf(sc, a, buf, cnt); +} + +static inline void csr_wr_buf_uint16(struct sbusfpga_sdram_softc *sc, unsigned long a, + const uint16_t *buf, int cnt) +{ + _csr_wr_buf(sc, a, buf, cnt); +} + +static inline void csr_rd_buf_uint32(struct sbusfpga_sdram_softc *sc, unsigned long a, uint32_t *buf, int cnt) +{ + _csr_rd_buf(sc, a, buf, cnt); +} + +static inline void csr_wr_buf_uint32(struct sbusfpga_sdram_softc *sc, unsigned long a, + const uint32_t *buf, int cnt) +{ + _csr_wr_buf(sc, a, buf, cnt); +} + +/* NOTE: the macros' "else" branch is unreachable, no need to be warned + * about a >= 64bit left shift! */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshift-count-overflow" +static inline void csr_rd_buf_uint64(struct sbusfpga_sdram_softc *sc, unsigned long a, uint64_t *buf, int cnt) +{ + _csr_rd_buf(sc, a, buf, cnt); +} + +static inline void csr_wr_buf_uint64(struct sbusfpga_sdram_softc *sc, unsigned long a, + const uint64_t *buf, int cnt) +{ + _csr_wr_buf(sc, a, buf, cnt); +} +#pragma GCC diagnostic pop + +/* sdram.c from liblitedram, preprocessed for our case, + sc */ + +static inline unsigned long +lfsr (unsigned long bits, unsigned long prev) +{ + static const unsigned long long lfsr_taps[] = { + 0x0L, + 0x0L, + 0x3L, + 0x6L, + 0xcL, + 0x14L, + 0x30L, + 0x60L, + 0xb8L, + 0x110L, + 0x240L, + 0x500L, + 0x829L, + 0x100dL, + 0x2015L, + 0x6000L, + 0xd008L, + 0x12000L, + 0x20400L, + 0x40023L, + 0x90000L, + 0x140000L, + 0x300000L, + 0x420000L, + 0xe10000L, + 0x1200000L, + 0x2000023L, + 0x4000013L, + 0x9000000L, + 0x14000000L, + 0x20000029L, + 0x48000000L, + 0x80200003L, + 0x100080000L, + 0x204000003L, + 0x500000000L, + 0x801000000L, + 0x100000001fL, + 0x2000000031L, + 0x4400000000L, + 0xa000140000L, + 0x12000000000L, + 0x300000c0000L, + 0x63000000000L, + 0xc0000030000L, + 0x1b0000000000L, + 0x300003000000L, + 0x420000000000L, + 0xc00000180000L, + 0x1008000000000L, + 0x3000000c00000L, + 0x6000c00000000L, + 0x9000000000000L, + 0x18003000000000L, + 0x30000000030000L, + 0x40000040000000L, + 0xc0000600000000L, + 0x102000000000000L, + 0x200004000000000L, + 0x600003000000000L, + 0xc00000000000000L, + 0x1800300000000000L, + 0x3000000000000030L, + 0x6000000000000000L, + 0x800000000000000dL + }; + unsigned long lsb = prev & 1; + prev >>= 1; + prev ^= (-lsb) & lfsr_taps[bits]; + return prev; +} + +__attribute__((unused)) + void + cdelay (int i) +{ + while (i > 0) { + __asm__ volatile (""); + i--; + } +} +#if 0 +int +sdram_get_databits (void) +{ + return 16; +} +int +sdram_get_freq (void) +{ + return 2 * 4 * 100000000; +} +int +sdram_get_cl (void) +{ + return 6; +} +int +sdram_get_cwl (void) +{ + return 5; +} +#endif +static unsigned char +sdram_dfii_get_rdphase(struct sbusfpga_sdram_softc *sc) +{ + return ddrphy_rdphase_read(sc); +} +static unsigned char +sdram_dfii_get_wrphase(struct sbusfpga_sdram_softc *sc) +{ + return ddrphy_wrphase_read(sc); +} +static void +sdram_dfii_pix_address_write(struct sbusfpga_sdram_softc *sc, unsigned char phase, unsigned int value) +{ + switch (phase) { + case 3: + sdram_dfii_pi3_address_write(sc, value); + break; + case 2: + sdram_dfii_pi2_address_write(sc, value); + break; + case 1: + sdram_dfii_pi1_address_write(sc, value); + break; + default: + sdram_dfii_pi0_address_write(sc, value); + } +} +static void +sdram_dfii_pird_address_write(struct sbusfpga_sdram_softc *sc, unsigned int value) +{ + unsigned char rdphase = sdram_dfii_get_rdphase(sc); + sdram_dfii_pix_address_write(sc, rdphase, value); +} +static void +sdram_dfii_piwr_address_write(struct sbusfpga_sdram_softc *sc, unsigned int value) +{ + unsigned char wrphase = sdram_dfii_get_wrphase(sc); + sdram_dfii_pix_address_write(sc, wrphase, value); +} +static void +sdram_dfii_pix_baddress_write(struct sbusfpga_sdram_softc *sc, unsigned char phase, unsigned int value) +{ + switch (phase) { + case 3: + sdram_dfii_pi3_baddress_write(sc, value); + break; + case 2: + sdram_dfii_pi2_baddress_write(sc, value); + break; + case 1: + sdram_dfii_pi1_baddress_write(sc, value); + break; + default: + sdram_dfii_pi0_baddress_write(sc, value); + } +} +static void +sdram_dfii_pird_baddress_write(struct sbusfpga_sdram_softc *sc, unsigned int value) +{ + unsigned char rdphase = sdram_dfii_get_rdphase(sc); + sdram_dfii_pix_baddress_write(sc, rdphase, value); +} +static void +sdram_dfii_piwr_baddress_write(struct sbusfpga_sdram_softc *sc, unsigned int value) +{ + unsigned char wrphase = sdram_dfii_get_wrphase(sc); + sdram_dfii_pix_baddress_write(sc, wrphase, value); +} +static void +command_px(struct sbusfpga_sdram_softc *sc, unsigned char phase, unsigned int value) +{ + switch (phase) { + case 3: + command_p3(sc, value); + break; + case 2: + command_p2(sc, value); + break; + case 1: + command_p1(sc, value); + break; + default: + command_p0(sc, value); + } +} +static void +command_prd(struct sbusfpga_sdram_softc *sc, unsigned int value) +{ + unsigned char rdphase = sdram_dfii_get_rdphase(sc); + command_px(sc, rdphase, value); +} +static void +command_pwr (struct sbusfpga_sdram_softc *sc, unsigned int value) +{ + unsigned char wrphase = sdram_dfii_get_wrphase(sc); + command_px(sc, wrphase, value); +} +static void +sdram_software_control_on(struct sbusfpga_sdram_softc *sc) +{ + unsigned int previous; + previous = sdram_dfii_control_read(sc); + if (previous != (0x02 | 0x04 | 0x08)) { + sdram_dfii_control_write(sc, (0x02 | 0x04 | 0x08)); + aprint_normal ("Switching SDRAM to software control.\n"); + } +} +static void +sdram_software_control_off(struct sbusfpga_sdram_softc *sc) +{ + unsigned int previous; + previous = sdram_dfii_control_read(sc); + if (previous != (0x01)) { + sdram_dfii_control_write(sc, (0x01)); + aprint_normal ("Switching SDRAM to hardware control.\n"); + } +} +__attribute__((unused)) static void +sdram_mode_register_write(struct sbusfpga_sdram_softc *sc, char reg, int value) +{ + sdram_dfii_pi0_address_write(sc, value); + sdram_dfii_pi0_baddress_write(sc, reg); + command_p0(sc, 0x08 | 0x04 | 0x02 | 0x01); +} +typedef void (*delay_callback) (struct sbusfpga_sdram_softc *sc, int module); +static void +sdram_activate_test_row(struct sbusfpga_sdram_softc *sc) +{ + sdram_dfii_pi0_address_write(sc, 0); + sdram_dfii_pi0_baddress_write(sc, 0); + command_p0(sc, 0x08 | 0x01); + cdelay (15); +} +static void +sdram_precharge_test_row(struct sbusfpga_sdram_softc *sc) +{ + sdram_dfii_pi0_address_write(sc, 0); + sdram_dfii_pi0_baddress_write(sc, 0); + command_p0(sc, 0x08 | 0x02 | 0x01); + cdelay (15); +} +#if 0 +// available from kern.h +static unsigned int +popcount (unsigned int x) +{ + x -= ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + x = (x + (x >> 4)) & 0x0F0F0F0F; + x += (x >> 8); + x += (x >> 16); + return x & 0x0000003F; +} +#endif +static void +print_scan_errors (unsigned int errors) +{ + aprint_normal ("%d", errors == 0); +} +static unsigned int +sdram_write_read_check_test_pattern (struct sbusfpga_sdram_softc *sc, int module, unsigned int seed) +{ + int p, i; + unsigned int errors; + unsigned int prv; + unsigned char tst[1 * 32 / 8]; + unsigned char prs[4][1 * 32 / 8]; + prv = seed; + for (p = 0; p < 4; p++) { + for (i = 0; i < 1 * 32 / 8; i++) { + prv = lfsr (32, prv); + prs[p][i] = prv; + } + } + sdram_activate_test_row(sc); + for (p = 0; p < 4; p++) + csr_wr_buf_uint8(sc, sc->sc_bhregs_sdram + (sdram_dfii_pix_wrdata_addr (p) - CSR_SDRAM_BASE), prs[p], 1 * 32 / 8); /* cleanme */ + sdram_dfii_piwr_address_write(sc, 0); + sdram_dfii_piwr_baddress_write(sc, 0); + command_pwr(sc, 0x04 | 0x02 | 0x01 | 0x10); + cdelay (15); + sdram_dfii_pird_address_write(sc, 0); + sdram_dfii_pird_baddress_write(sc, 0); + command_prd(sc, 0x04 | 0x01 | 0x20); + cdelay (15); + sdram_precharge_test_row(sc); + errors = 0; + for (p = 0; p < 4; p++) { + csr_rd_buf_uint8(sc, sc->sc_bhregs_sdram + (sdram_dfii_pix_rddata_addr (p) - CSR_SDRAM_BASE), tst, 1 * 32 / 8); /* cleanme */ + errors += + popcount (prs[p][16 / 8 - 1 - module] ^ tst[16 / 8 - 1 - module]); + errors += + popcount (prs[p][2 * 16 / 8 - 1 - module] ^ + tst[2 * 16 / 8 - 1 - module]); + } + return errors; +} +static void +sdram_leveling_center_module (struct sbusfpga_sdram_softc *sc, int module, int show_short, int show_long, + delay_callback rst_delay, + delay_callback inc_delay) +{ + int i; + int show; + int working; + unsigned int errors; + int delay, delay_mid, delay_range; + int delay_min = -1, delay_max = -1; + if (show_long) + aprint_normal ("m%d: |", module); + delay = 0; + rst_delay(sc, module); + while (1) { + errors = sdram_write_read_check_test_pattern(sc, module, 42); + errors += sdram_write_read_check_test_pattern(sc, module, 84); +aprint_normal("[min] delay %d -> errors %d\n", delay, errors); + working = errors == 0; + show = show_long; + if (show) + print_scan_errors (errors); + if (working && delay_min < 0) { + delay_min = delay; + break; + } + delay++; + if (delay >= 32) + break; + inc_delay(sc, module); + } + delay++; + inc_delay(sc, module); + while (1) { + errors = sdram_write_read_check_test_pattern(sc, module, 42); + errors += sdram_write_read_check_test_pattern(sc, module, 84); +aprint_normal("[max] delay %d -> errors %d\n", delay, errors); + working = errors == 0; + show = show_long; + if (show) + print_scan_errors (errors); + if (!working && delay_max < 0) { + delay_max = delay; + } + delay++; + if (delay >= 32) + break; + inc_delay(sc, module); + } + if (delay_max < 0) { + delay_max = delay; + } + if (show_long) + aprint_normal ("| "); + delay_mid = (delay_min + delay_max) / 2 % 32; + delay_range = (delay_max - delay_min) / 2; + if (show_short) { + if (delay_min < 0) + aprint_normal ("delays: -"); + else + aprint_normal ("delays: %02d+-%02d", delay_mid, delay_range); + } + if (show_long) + aprint_normal ("\n"); + rst_delay(sc, module); + cdelay (100); + for (i = 0; i < delay_mid; i++) { + inc_delay(sc, module); + cdelay (100); + } +} +int _sdram_tck_taps; +int _sdram_write_leveling_bitslips[16]; +static void +sdram_read_leveling_rst_delay (struct sbusfpga_sdram_softc *sc, int module) +{ + ddrphy_dly_sel_write(sc, 1 << module); + ddrphy_rdly_dq_rst_write(sc, 1); + ddrphy_dly_sel_write(sc, 0); +} +static void +sdram_read_leveling_inc_delay (struct sbusfpga_sdram_softc *sc, int module) +{ + ddrphy_dly_sel_write(sc, 1 << module); + ddrphy_rdly_dq_inc_write(sc, 1); + ddrphy_dly_sel_write(sc, 0); +} +static void +sdram_read_leveling_rst_bitslip (struct sbusfpga_sdram_softc *sc, char m) +{ + ddrphy_dly_sel_write(sc, 1 << m); + ddrphy_rdly_dq_bitslip_rst_write(sc, 1); + ddrphy_dly_sel_write(sc, 0); +} +static void +sdram_read_leveling_inc_bitslip (struct sbusfpga_sdram_softc *sc, char m) +{ + ddrphy_dly_sel_write(sc, 1 << m); + ddrphy_rdly_dq_bitslip_write(sc, 1); + ddrphy_dly_sel_write(sc, 0); +} +static unsigned int +sdram_read_leveling_scan_module (struct sbusfpga_sdram_softc *sc, int module, int bitslip, int show) +{ + const unsigned int max_errors = 2 * (4 * 2 * 32); + int i; + unsigned int score; + unsigned int errors; + score = 0; + if (show) + aprint_normal (" m%d, b%02d: |", module, bitslip); + sdram_read_leveling_rst_delay(sc, module); + for (i = 0; i < 32; i++) { + int working; + int _show = show; + errors = sdram_write_read_check_test_pattern(sc, module, 42); + errors += sdram_write_read_check_test_pattern(sc, module, 84); +aprint_normal("[scan] iter %d -> errors %d\n", i, errors); + working = errors == 0; + score += (working * max_errors * 32) + (max_errors - errors); + if (_show) { + print_scan_errors (errors); + } + sdram_read_leveling_inc_delay(sc, module); + } + if (show) + aprint_normal ("| "); + return score; +} +static void +sdram_read_leveling(struct sbusfpga_sdram_softc *sc) +{ + int module; + int bitslip; + unsigned int score; + unsigned int best_score; + int best_bitslip; + for (module = 0; module < 16 / 8; module++) { + best_score = 0; + best_bitslip = 0; + sdram_read_leveling_rst_bitslip(sc, module); + for (bitslip = 0; bitslip < 8; bitslip++) { + score = sdram_read_leveling_scan_module(sc, module, bitslip, 1); + sdram_leveling_center_module(sc, module, 1, 0, + sdram_read_leveling_rst_delay, + sdram_read_leveling_inc_delay); + aprint_normal ("\n"); + if (score > best_score) { + best_bitslip = bitslip; + best_score = score; + } + if (bitslip == 8 - 1) + break; + sdram_read_leveling_inc_bitslip(sc, module); + } + aprint_normal (" best: m%d, b%02d ", module, best_bitslip); + sdram_read_leveling_rst_bitslip(sc, module); + for (bitslip = 0; bitslip < best_bitslip; bitslip++) + sdram_read_leveling_inc_bitslip(sc, module); + sdram_leveling_center_module(sc, module, 1, 0, + sdram_read_leveling_rst_delay, + sdram_read_leveling_inc_delay); + aprint_normal ("\n"); + } +} +static void +sdram_write_latency_calibration(struct sbusfpga_sdram_softc *sc) +{ + int i; + int module; + int bitslip; + unsigned int score; + unsigned int subscore; + unsigned int best_score; + int best_bitslip; + for (module = 0; module < 16 / 8; module++) { + best_score = 0; + best_bitslip = -1; + for (bitslip = 0; bitslip < 8; bitslip += 2) { + score = 0; + ddrphy_dly_sel_write(sc, 1 << module); + ddrphy_wdly_dq_bitslip_rst_write(sc, 1); + for (i = 0; i < bitslip; i++) { + ddrphy_wdly_dq_bitslip_write(sc, 1); + } + ddrphy_dly_sel_write(sc, 0); + score = 0; + sdram_read_leveling_rst_bitslip(sc, module); + for (i = 0; i < 8; i++) { + subscore = sdram_read_leveling_scan_module(sc, module, i, 0); + score = subscore > score ? subscore : score; + sdram_read_leveling_inc_bitslip(sc, module); + } + if (score > best_score) { + best_bitslip = bitslip; + best_score = score; + } + } + if (_sdram_write_leveling_bitslips[module] < 0) + bitslip = best_bitslip; + else + bitslip = _sdram_write_leveling_bitslips[module]; + if (bitslip == -1) + aprint_normal ("m%d:- ", module); + else + aprint_normal ("m%d:%d ", module, bitslip); + ddrphy_dly_sel_write(sc, 1 << module); + ddrphy_wdly_dq_bitslip_rst_write(sc, 1); + for (i = 0; i < bitslip; i++) { + ddrphy_wdly_dq_bitslip_write(sc, 1); + } + ddrphy_dly_sel_write(sc, 0); + } + aprint_normal ("\n"); +} +static int +sdram_leveling(struct sbusfpga_sdram_softc *sc) +{ + int module; + sdram_software_control_on(sc); + for (module = 0; module < 16 / 8; module++) { + sdram_read_leveling_rst_delay(sc, module); + sdram_read_leveling_rst_bitslip(sc, module); + } + aprint_normal ("Write latency calibration:\n"); + sdram_write_latency_calibration(sc); + aprint_normal ("Read leveling:\n"); + sdram_read_leveling(sc); + sdram_software_control_off(sc); + return 1; +} +int +sdram_init(struct sbusfpga_sdram_softc *sc) +{ + ddrphy_rdphase_write(sc, 2); + ddrphy_wrphase_write(sc, 3); + aprint_normal ("Initializing SDRAM @0x%08lx...\n", 0x80000000L); + sdram_software_control_on(sc); + ddrphy_rst_write(sc, 1); + cdelay (1000); + ddrphy_rst_write(sc, 0); + cdelay (1000); + init_sequence(sc); + sdram_leveling(sc); + sdram_software_control_off(sc); +#if 0 + if (!memtest ((unsigned int *) 0x80000000L, (2 * 1024 * 1024))) { + return 0; + } + memspeed ((unsigned int *) 0x80000000L, (2 * 1024 * 1024), 0); +#endif + return 1; +} diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h new file mode 100644 index 0000000..2fa554a --- /dev/null +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h @@ -0,0 +1,44 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2020 Romain Dolbeau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SBUSFPGA_SDRAM_H_ +#define _SBUSFPGA_SDRAM_H_ + +struct sbusfpga_sdram_softc { + device_t sc_dev; /* us as a device */ + u_int sc_rev; /* revision */ + int sc_node; /* PROM node ID */ + int sc_burst; /* DVMA burst size in effect */ + bus_space_tag_t sc_bustag; /* bus tag */ + bus_space_handle_t sc_bhregs_ddrphy; /* bus handle */ + bus_space_handle_t sc_bhregs_sdram; /* bus handle */ + int sc_bufsiz_ddrphy; /* Size of buffer */ + int sc_bufsiz_sdram; /* Size of buffer */ +}; + +#endif /* _SBUSFPGA_SDRAM_H_ */ From d42a4672f9dc73031ed14d6a58101bddcab77323 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Wed, 14 Jul 2021 06:36:35 -0400 Subject: [PATCH 31/78] fix export for driver --- sbus-to-ztex-gateware-migen/sbus_to_fpga_export.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_export.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_export.py index f772f5e..3859bb6 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_export.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_export.py @@ -45,13 +45,13 @@ def _get_rw_functions_c(name, csr_name, reg_base, area_base, nwords, busword, al if with_access_functions: r += "static inline {} {}_read(struct sbusfpga_sdram_softc *sc) {{\n".format(ctype, reg_name) if nwords > 1: - r += "\t{} r = bus_space_read_4(sc->sc_bustag, sc>sc_bhregs_{}, {}L);\n".format(ctype, name, hex(reg_base - area_base)) + r += "\t{} r = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_{}, {}L);\n".format(ctype, name, hex(reg_base - area_base)) for sub in range(1, nwords): r += "\tr <<= {};\n".format(busword) - r += "\tr |= bus_space_read_4(sc->sc_bustag, sc>sc_bhregs_{}, {}L);\n".format(name, hex(reg_base - area_base + sub*stride)) + r += "\tr |= bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_{}, {}L);\n".format(name, hex(reg_base - area_base + sub*stride)) r += "\treturn r;\n}\n" else: - r += "\treturn bus_space_read_4(sc->sc_bustag, sc>sc_bhregs_{}, {}L);\n}}\n".format(name, hex(reg_base - area_base)) + r += "\treturn bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_{}, {}L);\n}}\n".format(name, hex(reg_base - area_base)) if not read_only: r += "static inline void {}_write(struct sbusfpga_sdram_softc *sc, {} v) {{\n".format(reg_name, ctype) @@ -61,7 +61,7 @@ def _get_rw_functions_c(name, csr_name, reg_base, area_base, nwords, busword, al v_shift = "v >> {}".format(shift) else: v_shift = "v" - r += "\tbus_space_write_4(sc->sc_bustag, sc>sc_bhregs_{}, {}L, {});\n".format(name, hex(reg_base - area_base + sub*stride), v_shift) + r += "\tbus_space_write_4(sc->sc_bustag, sc->sc_bhregs_{}, {}L, {});\n".format(name, hex(reg_base - area_base + sub*stride), v_shift) r += "}\n" return r @@ -87,7 +87,6 @@ def get_csr_header(regions, constants, csr_base=None, with_access_functions=True r += "\n/* "+name+" */\n" r += "#ifndef CSR_"+name.upper()+"_BASE\n" r += "#define CSR_"+name.upper()+"_BASE (CSR_BASE + "+hex(origin)+"L)\n" - r += "#endif\n" if not isinstance(region.obj, Memory): for csr in region.obj: nr = (csr.size + region.busword - 1)//region.busword @@ -108,7 +107,7 @@ def get_csr_header(regions, constants, csr_base=None, with_access_functions=True r += "\treturn ( (oldword >> " + offset + ") & mask );\n}\n" r += "static inline uint32_t " + field_name + "_read(struct sbusfpga_sdram_softc *sc) {\n" r += "\tuint32_t word = " + reg_name + "_read(sc);\n" - r += "\treturn " + field_name + "_extract(word);\n" + r += "\treturn " + field_name + "_extract(sc, word);\n" r += "}\n" if not getattr(csr, "read_only", False): r += "static inline uint32_t " + field_name + "_replace(struct sbusfpga_sdram_softc *sc, uint32_t oldword, uint32_t plain_value) {\n" @@ -119,6 +118,7 @@ def get_csr_header(regions, constants, csr_base=None, with_access_functions=True r += "\tuint32_t newword = " + field_name + "_replace(sc, oldword, plain_value);\n" r += "\t" + reg_name + "_write(sc, newword);\n" r += "}\n" + r += "#endif // CSR_"+name.upper()+"_BASE\n" r += "\n#endif\n" return r From b46bdf382a7fc4c51217bf98a5fb26242933a14e Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Wed, 14 Jul 2021 06:36:56 -0400 Subject: [PATCH 32/78] update prom --- sbus-to-ztex-gateware-migen/prom_csr.fth | 10 ++++++++++ sbus-to-ztex-gateware-migen/prom_migen.fth | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 sbus-to-ztex-gateware-migen/prom_csr.fth diff --git a/sbus-to-ztex-gateware-migen/prom_csr.fth b/sbus-to-ztex-gateware-migen/prom_csr.fth new file mode 100644 index 0000000..31b500c --- /dev/null +++ b/sbus-to-ztex-gateware-migen/prom_csr.fth @@ -0,0 +1,10 @@ +\ auto-generated base regions for CSRs in the PROM +h# 40000 constant sbusfpga_csraddr_leds +h# 41000 constant sbusfpga_csraddr_ddrphy +h# 42000 constant sbusfpga_csraddr_sdram +h# 80000 constant sbusfpga_regionaddr_usb_host_ctrl +h# 90000 constant sbusfpga_regionaddr_usb_shared_mem +h# 0 constant sbusfpga_regionaddr_prom +h# fc000000 constant sbusfpga_regionaddr_usb_fake_dma +h# 80000000 constant sbusfpga_regionaddr_main_ram +h# 40000 constant sbusfpga_regionaddr_csr diff --git a/sbus-to-ztex-gateware-migen/prom_migen.fth b/sbus-to-ztex-gateware-migen/prom_migen.fth index 786d0ac..6b9388a 100644 --- a/sbus-to-ztex-gateware-migen/prom_migen.fth +++ b/sbus-to-ztex-gateware-migen/prom_migen.fth @@ -43,7 +43,7 @@ new-device \ Absolute minimal stuff; name & registers def. " generic-ohci" device-name -my-address h# 80000 + my-space h# 1000 reg +my-address sbusfpga_regionaddr_usb_host_ctrl + my-space h# 1000 reg \ we don't support ET or anything non-32bits h# 7c xdrint " slave-burst-sizes" attribute h# 7c xdrint " burst-sizes" attribute @@ -58,7 +58,7 @@ my-space constant my-sbus-space : map-in ( adr space size -- virt ) " map-in" $call-parent ; : map-out ( virt size -- ) " map-out" $call-parent ; -: map-in-regs ( -- ) my-sbus-address h# 80000 + my-sbus-space h# 1000 map-in is regs-virt ; +: map-in-regs ( -- ) my-sbus-address sbusfpga_regionaddr_usb_host_ctrl + my-sbus-space h# 1000 map-in is regs-virt ; : map-out-regs ( -- ) regs-virt h# 1000 map-out ; : my-reset! ( -- ) From 9437d287db7f9143f44503132d06befbfcdb821a Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Wed, 14 Jul 2021 08:30:27 -0400 Subject: [PATCH 33/78] read PROM from Wishbone (simplify things, save resources as it was mapped there already anyway), add sd_card (unused, un-promed yet) --- sbus-to-ztex-gateware-migen/prom_csr.fth | 8 +- .../sbus_to_fpga_fsm.py | 71 ++------ .../sbus_to_fpga_soc.py | 45 ++--- sbus-to-ztex-gateware-migen/ztex213_sbus.py | 161 ++++++++++++++++++ 4 files changed, 193 insertions(+), 92 deletions(-) create mode 100644 sbus-to-ztex-gateware-migen/ztex213_sbus.py diff --git a/sbus-to-ztex-gateware-migen/prom_csr.fth b/sbus-to-ztex-gateware-migen/prom_csr.fth index 31b500c..5c43479 100644 --- a/sbus-to-ztex-gateware-migen/prom_csr.fth +++ b/sbus-to-ztex-gateware-migen/prom_csr.fth @@ -1,9 +1,13 @@ \ auto-generated base regions for CSRs in the PROM h# 40000 constant sbusfpga_csraddr_leds h# 41000 constant sbusfpga_csraddr_ddrphy -h# 42000 constant sbusfpga_csraddr_sdram +h# 42000 constant sbusfpga_csraddr_sdblock2mem +h# 43000 constant sbusfpga_csraddr_sdcore +h# 44000 constant sbusfpga_csraddr_sdirq +h# 45000 constant sbusfpga_csraddr_sdmem2block +h# 46000 constant sbusfpga_csraddr_sdphy +h# 47000 constant sbusfpga_csraddr_sdram h# 80000 constant sbusfpga_regionaddr_usb_host_ctrl -h# 90000 constant sbusfpga_regionaddr_usb_shared_mem h# 0 constant sbusfpga_regionaddr_prom h# fc000000 constant sbusfpga_regionaddr_usb_fake_dma h# 80000000 constant sbusfpga_regionaddr_main_ram diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index 7e9d6e5..9fa1900 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -171,7 +171,7 @@ LED_M_READ = 0x20 LED_M_CACHE = 0x40 class SBusFPGABus(Module): - def __init__(self, platform, prom, hold_reset, wishbone_slave, wishbone_master): + def __init__(self, platform, hold_reset, wishbone_slave, wishbone_master): self.platform = platform self.hold_reset = hold_reset @@ -379,14 +379,8 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(led0123, led0123 | LED_PARITY), NextState("Slave_Error") - ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX), - NextValue(SBUS_3V3_ACKs_o, ACK_WORD), - NextValue(SBUS_3V3_ERRs_o, 1), - NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), - NextValue(sbus_wishbone_le, 0), - #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 40))), - NextState("Slave_Ack_Read_Prom_Burst") - ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | + ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX) | + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet @@ -417,16 +411,10 @@ class SBusFPGABus(Module): (SBUS_3V3_ASs_i == 0) & (SIZ_BYTE == SBUS_3V3_SIZ_i) & (SBUS_3V3_PPRD_i == 1)), - NextValue(sbus_oe_master_in, 1), - NextValue(sbus_last_pa, SBUS_3V3_PA_i), - If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX), - NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), - NextValue(SBUS_3V3_ERRs_o, 1), - NextValue(sbus_wishbone_le, 0), - NextValue(p_data, prom[SBUS_3V3_PA_i[ADDR_PHYS_LOW+2:ADDR_PFX_LOW]]), - #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 80))), - NextState("Slave_Ack_Read_Prom_Byte") - ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), + NextValue(sbus_oe_master_in, 1), + NextValue(sbus_last_pa, SBUS_3V3_PA_i), + If(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX) | + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), @@ -444,13 +432,13 @@ class SBusFPGABus(Module): NextValue(sbus_slave_timeout, sbus_default_timeout), NextState("Slave_Ack_Read_Reg_Byte_Wait_For_Wishbone") ) - ).Else( - #NextValue(self.led_display.value, 0x0000000040 | 0x0000000001), - NextValue(SBUS_3V3_ACKs_o, ACK_ERR), - NextValue(SBUS_3V3_ERRs_o, 1), - #NextValue(led0123, led0123 | LED_ADDRESS), - NextState("Slave_Error") - ) + ).Else( + #NextValue(self.led_display.value, 0x0000000040 | 0x0000000001), + NextValue(SBUS_3V3_ACKs_o, ACK_ERR), + NextValue(SBUS_3V3_ERRs_o, 1), + #NextValue(led0123, led0123 | LED_ADDRESS), + NextState("Slave_Error") + ) ).Elif(((SBUS_3V3_SELs_i == 0) & (SBUS_3V3_ASs_i == 0) & (SIZ_HWORD == SBUS_3V3_SIZ_i) & @@ -462,7 +450,8 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(led0123, led0123 | LED_PARITY), NextState("Slave_Error") - ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), + ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX) | + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), @@ -712,34 +701,6 @@ class SBusFPGABus(Module): ) # ##### SLAVE READ ##### # ## BURST (1->16 words) ## - slave_fsm.act("Slave_Ack_Read_Prom_Burst", - #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x02), self.led_display.value[8:40])), - NextValue(sbus_oe_data, 1), - NextValue(SBUS_3V3_D_o, p_data), - NextValue(p_data, prom[Cat(index_with_wrap((burst_counter+1), burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW])]), - If((burst_counter == burst_limit_m1), - NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), - NextState("Slave_Do_Read") - ).Else( - NextValue(SBUS_3V3_ACKs_o, ACK_WORD), - NextValue(burst_counter, burst_counter + 1) - ) - ) - slave_fsm.act("Slave_Ack_Read_Prom_Byte", - #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x03), self.led_display.value[8:40])), - NextValue(sbus_oe_data, 1), - If((sbus_last_pa[0:2] == 0x0), - NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[24:32])) - ).Elif((sbus_last_pa[0:2] == 0x1), - NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[16:24])) - ).Elif((sbus_last_pa[0:2] == 0x2), - NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[ 8:16])) - ).Elif((sbus_last_pa[0:2] == 0x3), - NextValue(SBUS_3V3_D_o, Cat(Signal(24), p_data[ 0: 8])) - ), - NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), - NextState("Slave_Do_Read") - ) slave_fsm.act("Slave_Do_Read", #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x04), self.led_display.value[8:40])), NextValue(sbus_oe_data, 0), diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 1a3b8b6..2a259f7 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -10,7 +10,7 @@ from litex.soc.integration.builder import * from litex.soc.interconnect import wishbone from litex.soc.cores.clock import * from litex.soc.cores.led import LedChaser -from litex_boards.platforms import ztex213 +import ztex213_sbus from migen.genlib.fifo import * from litedram.modules import MT41J128M16 @@ -20,32 +20,6 @@ from sbus_to_fpga_fsm import *; import sbus_to_fpga_export; -_sbus_sbus = [ - ("SBUS_3V3_CLK", 0, Pins("D15"), IOStandard("lvttl")), - ("SBUS_3V3_ASs", 0, Pins("T4"), IOStandard("lvttl")), - ("SBUS_3V3_BGs", 0, Pins("T6"), IOStandard("lvttl")), - ("SBUS_3V3_BRs", 0, Pins("R6"), IOStandard("lvttl")), - ("SBUS_3V3_ERRs", 0, Pins("V2"), IOStandard("lvttl")), - ("SBUS_DATA_OE_LED", 0, Pins("U1"), IOStandard("lvttl")), - ("SBUS_DATA_OE_LED_2", 0, Pins("T3"), IOStandard("lvttl")), - ("SBUS_3V3_RSTs", 0, Pins("U2"), IOStandard("lvttl")), - ("SBUS_3V3_SELs", 0, Pins("K6"), IOStandard("lvttl")), - ("SBUS_3V3_INT1s", 0, Pins("R3"), IOStandard("lvttl")), - ("SBUS_3V3_INT7s", 0, Pins("N5"), IOStandard("lvttl")), - ("SBUS_3V3_PPRD", 0, Pins("N6"), IOStandard("lvttl")), - ("SBUS_OE", 0, Pins("P5"), IOStandard("lvttl")), - ("SBUS_3V3_ACKs", 0, Pins("M6 L6 N4"), IOStandard("lvttl")), - ("SBUS_3V3_SIZ", 0, Pins("R7 U3 V1"), IOStandard("lvttl")), - ("SBUS_3V3_D", 0, Pins("J18 K16 J17 K15 K13 J15 J13 J14 H14 H17 G14 G17 G16 G18 H16 F18 F16 E18 F15 D18 E17 G13 D17 F13 F14 E16 E15 C17 C16 A18 B18 C15"), IOStandard("lvttl")), - ("SBUS_3V3_PA", 0, Pins("B16 B17 D14 C14 D12 A16 A15 B14 B13 B12 C12 A14 A13 B11 A11 M4 R2 M3 P2 M2 N2 K5 N1 L4 M1 L3 L1 K3"), IOStandard("lvttl")), -] - -_usb_io = [ - ("usb", 0, - Subsignal("dp", Pins("V9")), # Serial TX - Subsignal("dm", Pins("U9")), # Serial RX - IOStandard("LVCMOS33")) -] # CRG ---------------------------------------------------------------------------------------------- class _CRG(Module): @@ -115,9 +89,9 @@ class SBusFPGA(SoCCore): self.sys_clk_freq = sys_clk_freq = 100e6 ## 25e6 - self.platform = platform = ztex213.Platform(variant="ztex2.13a", expansion="sbus") - self.platform.add_extension(_sbus_sbus) - self.platform.add_extension(_usb_io) + self.platform = platform = ztex213_sbus.Platform(variant="ztex2.13a") + + self.platform.add_extension(ztex213_sbus._usb_io) SoCCore.__init__(self, platform=platform, sys_clk_freq=sys_clk_freq, @@ -131,6 +105,8 @@ class SBusFPGA(SoCCore): # the physical address here are used as offset in the SBus # reserved area of 256 MiB # Anything at 0x10000000 is therefore unreachable directly + # The position of the 'usb_fake_dma' is so it overlaps + # the virtual address space used by NetBSD DMA allocators wb_mem_map = { "prom": 0x00000000, "csr" : 0x00040000, @@ -151,7 +127,7 @@ class SBusFPGA(SoCCore): self.add_usb_host(pads=platform.request("usb"), usb_clk_freq=48e6) #self.comb += self.cpu.interrupt[16].eq(self.usb_host.interrupt) #fixme: need to deal with interrupts - self.add_ram(name="usb_shared_mem", origin=self.mem_map["usb_shared_mem"], size=2**16) + # self.add_ram(name="usb_shared_mem", origin=self.mem_map["usb_shared_mem"], size=2**16) pad_SBUS_3V3_INT1s = platform.request("SBUS_3V3_INT1s") SBUS_3V3_INT1s_o = Signal(reset=1) @@ -170,7 +146,7 @@ class SBusFPGA(SoCCore): prom_file = "prom_migen.fc" prom_data = soc_core.get_mem_data(prom_file, "big") - prom = Array(prom_data) + # prom = Array(prom_data) #print("\n****************************************\n") #for i in range(len(prom)): # print(hex(prom[i])) @@ -197,7 +173,6 @@ class SBusFPGA(SoCCore): self.submodules.wishbone_slave_sys = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_slave_sbus, cd_master="sys", cd_slave="sbus") _sbus_bus = SBusFPGABus(platform=self.platform, - prom=prom, hold_reset=hold_reset, wishbone_slave=wishbone_slave_sbus, wishbone_master=self.wishbone_master_sbus) @@ -206,8 +181,6 @@ class SBusFPGA(SoCCore): self.bus.add_master(name="SBusBridgeToWishbone", master=wishbone_master_sys) self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_slave_sys, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) - - #self.add_sdcard() self.submodules.ddrphy = s7ddrphy.A7DDRPHY(platform.request("ddram"), memtype = "DDR3", @@ -218,6 +191,8 @@ class SBusFPGA(SoCCore): module = MT41J128M16(sys_clk_freq, "1:4"), l2_cache_size = 0 ) + + self.add_sdcard() def main(): parser = argparse.ArgumentParser(description="SbusFPGA") diff --git a/sbus-to-ztex-gateware-migen/ztex213_sbus.py b/sbus-to-ztex-gateware-migen/ztex213_sbus.py new file mode 100644 index 0000000..77dc0fb --- /dev/null +++ b/sbus-to-ztex-gateware-migen/ztex213_sbus.py @@ -0,0 +1,161 @@ +# +# This file is part of LiteX-Boards. +# +# Support for the ZTEX USB-FGPA Module 2.13: +# +# With (no-so-optional) expansion, either the ZTEX Debug board: +# +# Or the SBusFPGA adapter board: +# +# +# Copyright (c) 2015 Yann Sionneau +# Copyright (c) 2015-2019 Florent Kermarrec +# Copyright (c) 2020-2021 Romain Dolbeau +# SPDX-License-Identifier: BSD-2-Clause + +from litex.build.generic_platform import * +from litex.build.xilinx import XilinxPlatform +from litex.build.openocd import OpenOCD + +# IOs ---------------------------------------------------------------------------------------------- + +_io = [ + ## 48 MHz clock reference + ("clk48", 0, Pins("P15"), IOStandard("LVCMOS33")), + ## embedded 256 MiB DDR3 DRAM + ("ddram", 0, + Subsignal("a", Pins("C5 B6 C7 D5 A3 E7 A4 C6", "A6 D8 B2 A5 B3 B7"), + IOStandard("SSTL135")), + Subsignal("ba", Pins("E5 A1 E6"), IOStandard("SSTL135")), + Subsignal("ras_n", Pins("E3"), IOStandard("SSTL135")), + Subsignal("cas_n", Pins("D3"), IOStandard("SSTL135")), + Subsignal("we_n", Pins("D4"), IOStandard("SSTL135")), +# Subsignal("cs_n", Pins(""), IOStandard("SSTL135")), + Subsignal("dm", Pins("G1 G6"), IOStandard("SSTL135")), + Subsignal("dq", Pins( + "H1 F1 E2 E1 F4 C1 F3 D2", + "G4 H5 G3 H6 J2 J3 K1 K2"), + IOStandard("SSTL135"), + Misc("IN_TERM=UNTUNED_SPLIT_40")), + Subsignal("dqs_p", Pins("H2 J4"), + IOStandard("DIFF_SSTL135"), + Misc("IN_TERM=UNTUNED_SPLIT_40")), + Subsignal("dqs_n", Pins("G2 H4"), + IOStandard("DIFF_SSTL135"), + Misc("IN_TERM=UNTUNED_SPLIT_40")), + Subsignal("clk_p", Pins("C4"), IOStandard("DIFF_SSTL135")), + Subsignal("clk_n", Pins("B4"), IOStandard("DIFF_SSTL135")), + Subsignal("cke", Pins("B1"), IOStandard("SSTL135")), + Subsignal("odt", Pins("F5"), IOStandard("SSTL135")), + Subsignal("reset_n", Pins("J5"), IOStandard("SSTL135")), + Misc("SLEW=FAST"), + ), +] + +_sbus_io = [ + ## leds on the SBus board + ("user_led", 0, Pins("U8"), IOStandard("lvcmos33")), #LED0 + ("user_led", 1, Pins("U7"), IOStandard("lvcmos33")), #LED1 + ("user_led", 2, Pins("U6"), IOStandard("lvcmos33")), #LED2 + ("user_led", 3, Pins("T8"), IOStandard("lvcmos33")), #LED3 + ("user_led", 4, Pins("P4"), IOStandard("lvcmos33")), #LED4 + ("user_led", 5, Pins("P3"), IOStandard("lvcmos33")), #LED5 + ("user_led", 6, Pins("T1"), IOStandard("lvcmos33")), #LED6 + ("user_led", 7, Pins("R1"), IOStandard("lvcmos33")), #LED7 + #("user_led", 8, Pins("U1"), IOStandard("lvcmos33")), #SBUS_DATA_OE_LED + #("user_led", 9, Pins("T3"), IOStandard("lvcmos33")), #SBUS_DATA_OE_LED_2 + ## serial header for console + ("serial", 0, + Subsignal("tx", Pins("V9")), # FIXME: might be the other way round + Subsignal("rx", Pins("U9")), + IOStandard("LVCMOS33") + ), + ## sdcard connector + ("spisdcard", 0, + Subsignal("clk", Pins("R8")), + Subsignal("mosi", Pins("T5"), Misc("PULLUP")), + Subsignal("cs_n", Pins("V6"), Misc("PULLUP")), + Subsignal("miso", Pins("V5"), Misc("PULLUP")), + Misc("SLEW=FAST"), + IOStandard("LVCMOS33"), + ), + ("sdcard", 0, + Subsignal("data", Pins("V5 V4 V7 V6"), Misc("PULLUP")), + Subsignal("cmd", Pins("T5"), Misc("PULLUP")), + Subsignal("clk", Pins("R8")), + #Subsignal("cd", Pins("V6")), + Misc("SLEW=FAST"), + IOStandard("LVCMOS33"), + ), +] + +_sbus_sbus = [ + ("SBUS_3V3_CLK", 0, Pins("D15"), IOStandard("lvttl")), + ("SBUS_3V3_ASs", 0, Pins("T4"), IOStandard("lvttl")), + ("SBUS_3V3_BGs", 0, Pins("T6"), IOStandard("lvttl")), + ("SBUS_3V3_BRs", 0, Pins("R6"), IOStandard("lvttl")), + ("SBUS_3V3_ERRs", 0, Pins("V2"), IOStandard("lvttl")), + ("SBUS_DATA_OE_LED", 0, Pins("U1"), IOStandard("lvttl")), + ("SBUS_DATA_OE_LED_2", 0, Pins("T3"), IOStandard("lvttl")), + ("SBUS_3V3_RSTs", 0, Pins("U2"), IOStandard("lvttl")), + ("SBUS_3V3_SELs", 0, Pins("K6"), IOStandard("lvttl")), + ("SBUS_3V3_INT1s", 0, Pins("R3"), IOStandard("lvttl")), + ("SBUS_3V3_INT7s", 0, Pins("N5"), IOStandard("lvttl")), + ("SBUS_3V3_PPRD", 0, Pins("N6"), IOStandard("lvttl")), + ("SBUS_OE", 0, Pins("P5"), IOStandard("lvttl")), + ("SBUS_3V3_ACKs", 0, Pins("M6 L6 N4"), IOStandard("lvttl")), + ("SBUS_3V3_SIZ", 0, Pins("R7 U3 V1"), IOStandard("lvttl")), + ("SBUS_3V3_D", 0, Pins("J18 K16 J17 K15 K13 J15 J13 J14 H14 H17 G14 G17 G16 G18 H16 F18 F16 E18 F15 D18 E17 G13 D17 F13 F14 E16 E15 C17 C16 A18 B18 C15"), IOStandard("lvttl")), + ("SBUS_3V3_PA", 0, Pins("B16 B17 D14 C14 D12 A16 A15 B14 B13 B12 C12 A14 A13 B11 A11 M4 R2 M3 P2 M2 N2 K5 N1 L4 M1 L3 L1 K3"), IOStandard("lvttl")), +] + +# reusing the UART pins !!! +_usb_io = [ + ("usb", 0, + Subsignal("dp", Pins("V9")), # Serial TX + Subsignal("dm", Pins("U9")), # Serial RX + IOStandard("LVCMOS33")) +] + +# Connectors --------------------------------------------------------------------------------------- + +_connectors = [ +] + +# Platform ----------------------------------------------------------------------------------------- + +class Platform(XilinxPlatform): + default_clk_name = "clk48" + default_clk_period = 1e9/48e6 + + def __init__(self, variant="ztex2.13a"): + device = { + "ztex2.13a": "xc7a35tcsg324-1", + "ztex2.13b": "xc7a50tcsg324-1", #untested + "ztex2.13b2": "xc7a50tcsg324-1", #untested + "ztex2.13c": "xc7a75tcsg324-2", #untested + "ztex2.13d": "xc7a100tcsg324-2" #untested + }[variant] + XilinxPlatform.__init__(self, device, _io, _connectors, toolchain="vivado") + self.add_extension(_sbus_io) + self.add_extension(_sbus_sbus) + + self.toolchain.bitstream_commands = \ + ["set_property BITSTREAM.CONFIG.SPI_32BIT_ADDR No [current_design]", + "set_property BITSTREAM.CONFIG.SPI_BUSWIDTH 2 [current_design]", + "set_property BITSTREAM.CONFIG.CONFIGRATE 66 [current_design]", + "set_property BITSTREAM.GENERAL.COMPRESS true [current_design]", + "set_property BITSTREAM.GENERAL.CRC DISABLE [current_design]", + "set_property STEPS.SYNTH_DESIGN.ARGS.RETIMING true [get_runs synth_1]", + "set_property CONFIG_VOLTAGE 3.3 [current_design]", + "set_property CFGBVS VCCO [current_design]" +# , "set_property STEPS.SYNTH_DESIGN.ARGS.DIRECTIVE AreaOptimized_high [get_runs synth_1]" + ] + + def create_programmer(self): + bscan_spi = "bscan_spi_xc7a35t.bit" + return OpenOCD("openocd_xc7_ft2232.cfg", bscan_spi) #FIXME + + def do_finalize(self, fragment): + XilinxPlatform.do_finalize(self, fragment) + self.add_period_constraint(self.lookup_request("clk48", loose=True), 1e9/48e6) From 6d4ba3aaa106ad31d00d8c641e43a9d71b086164 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Wed, 14 Jul 2021 11:17:57 -0400 Subject: [PATCH 34/78] clean dmesg --- NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c index a016829..f273c8b 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c @@ -125,6 +125,8 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) sc->sc_bustag = sa->sa_bustag; sc->sc_dev = self; + aprint_normal("\n"); + if (sa->sa_nreg < 2) { aprint_error(": Not enough registers spaces\n"); return; @@ -140,7 +142,7 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) aprint_error(": cannot map DDR PHY registers\n"); return; } else { - aprint_error(": DDR PHY registers @ %p\n", (void*)sc->sc_bhregs_ddrphy); + aprint_normal_dev(self, ": DDR PHY registers @ %p\n", (void*)sc->sc_bhregs_ddrphy); } /* map SDRAM DFII */ if (sbus_bus_map(sc->sc_bustag, @@ -152,7 +154,7 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) aprint_error(": cannot map SDRAM DFII registers\n"); return; } else { - aprint_error(": SDRAM DFII registers @ %p\n", (void*)sc->sc_bhregs_sdram); + aprint_normal_dev(self, ": SDRAM DFII registers @ %p\n", (void*)sc->sc_bhregs_sdram); } sc->sc_bufsiz_ddrphy = sa->sa_reg[0].oa_size; @@ -175,7 +177,6 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) /* Clamp at parent's burst sizes */ sc->sc_burst &= sbusburst; - aprint_normal("\n"); aprint_normal_dev(self, "nid 0x%x, bustag %p, burst 0x%x (parent 0x%0x)\n", sc->sc_node, sc->sc_bustag, @@ -842,7 +843,6 @@ sdram_leveling_center_module (struct sbusfpga_sdram_softc *sc, int module, int s while (1) { errors = sdram_write_read_check_test_pattern(sc, module, 42); errors += sdram_write_read_check_test_pattern(sc, module, 84); -aprint_normal("[min] delay %d -> errors %d\n", delay, errors); working = errors == 0; show = show_long; if (show) @@ -861,7 +861,6 @@ aprint_normal("[min] delay %d -> errors %d\n", delay, errors); while (1) { errors = sdram_write_read_check_test_pattern(sc, module, 42); errors += sdram_write_read_check_test_pattern(sc, module, 84); -aprint_normal("[max] delay %d -> errors %d\n", delay, errors); working = errors == 0; show = show_long; if (show) @@ -942,7 +941,6 @@ sdram_read_leveling_scan_module (struct sbusfpga_sdram_softc *sc, int module, in int _show = show; errors = sdram_write_read_check_test_pattern(sc, module, 42); errors += sdram_write_read_check_test_pattern(sc, module, 84); -aprint_normal("[scan] iter %d -> errors %d\n", i, errors); working = errors == 0; score += (working * max_errors * 32) + (max_errors - errors); if (_show) { From 4303270b534e5968b6e1aede1dc279ef994b102d Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Thu, 15 Jul 2021 04:07:10 -0400 Subject: [PATCH 35/78] minor renaming, think about buffering writes --- .../sbus_to_fpga_fsm.py | 107 +++++++++++++++--- 1 file changed, 93 insertions(+), 14 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index 9fa1900..8155104 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -332,6 +332,11 @@ class SBusFPGABus(Module): self.master_read_buffer_read = Array(Signal() for a in range(4)) self.master_read_buffer_start = Signal() + self.master_write_buffer_data = Array(Signal(32) for a in range(4)) + self.master_write_buffer_addr = Signal(28) + self.master_write_buffer_todo = Array(Signal() for a in range(4)) + self.master_write_buffer_start = Signal() + self.submodules.slave_fsm = slave_fsm = FSM(reset_state="Reset") self.sync += platform.request("user_led", 5).eq(~slave_fsm.ongoing("Idle")) @@ -1285,13 +1290,13 @@ class SBusFPGABus(Module): ) # ##### Slave read buffering FSM #### - last_word_idx = Signal(2) - self.submodules.wishbone_slave_buffering_fsm = wishbone_slave_buffering_fsm = FSM(reset_state="Reset") + last_read_word_idx = Signal(2) + self.submodules.wishbone_slave_read_buffering_fsm = wishbone_slave_read_buffering_fsm = FSM(reset_state="Reset") #self.sync += led4.eq(self.master_read_buffer_start) - wishbone_slave_buffering_fsm.act("Reset", - NextState("Idle") + wishbone_slave_read_buffering_fsm.act("Reset", + NextState("Idle") ) - wishbone_slave_buffering_fsm.act("Idle", + wishbone_slave_read_buffering_fsm.act("Idle", If(self.wishbone_slave.cyc & self.wishbone_slave.stb & ~self.wishbone_slave.ack & @@ -1323,7 +1328,7 @@ class SBusFPGABus(Module): NextValue(self.master_read_buffer_read[1], 0), NextValue(self.master_read_buffer_read[2], 0), NextValue(self.master_read_buffer_read[3], 0), - NextValue(last_word_idx, self.wishbone_slave.adr[0:2]), + NextValue(last_read_word_idx, self.wishbone_slave.adr[0:2]), NextValue(self.master_read_buffer_start, 1), NextState("WaitForData") ).Else( @@ -1331,16 +1336,16 @@ class SBusFPGABus(Module): ) ) ) - wishbone_slave_buffering_fsm.act("WaitForData", + wishbone_slave_read_buffering_fsm.act("WaitForData", #led2.eq(1), - If(self.master_read_buffer_done[last_word_idx], + If(self.master_read_buffer_done[last_read_word_idx], NextValue(self.wishbone_slave.ack, 1), - NextValue(self.wishbone_slave.dat_r, Cat(self.master_read_buffer_data[last_word_idx][24:32], # LE - self.master_read_buffer_data[last_word_idx][16:24], - self.master_read_buffer_data[last_word_idx][ 8:16], - self.master_read_buffer_data[last_word_idx][ 0: 8])), -# NextValue(self.wishbone_slave.dat_r, self.master_read_buffer_data[last_word_idx]), - NextValue(self.master_read_buffer_read[last_word_idx], 1), + NextValue(self.wishbone_slave.dat_r, Cat(self.master_read_buffer_data[last_read_word_idx][24:32], # LE + self.master_read_buffer_data[last_read_word_idx][16:24], + self.master_read_buffer_data[last_read_word_idx][ 8:16], + self.master_read_buffer_data[last_read_word_idx][ 0: 8])), +# NextValue(self.wishbone_slave.dat_r, self.master_read_buffer_data[last_read_word_idx]), + NextValue(self.master_read_buffer_read[last_read_word_idx], 1), NextValue(wishbone_slave_timeout, wishbone_default_timeout), NextState("Idle") ), @@ -1349,3 +1354,77 @@ class SBusFPGABus(Module): ) ) + + #last_write_word_idx = Signal(2) + #last_write_timeout = Signal(3) + #self.submodules.wishbone_slave_write_buffering_fsm = wishbone_slave_write_buffering_fsm = FSM(reset_state="Reset") + #wishbone_slave_write_buffering_fsm.act("Reset", + # NextState("Idle") + #) + #wishbone_slave_write_buffering_fsm.act("Idle", + # If(self.wishbone_slave.cyc & + # self.wishbone_slave.stb & + # ~self.wishbone_slave.ack & + # ~self.wishbone_slave.err & + # (self.wishbone_slave.sel == 0xf) & # Full Words Only + # self.wishbone_slave.we, + # NextValue(self.master_write_buffer_addr, self.wishbone_slave.adr[2:30]), + # NextValue(self.master_write_buffer_data[self.wishbone_slave.adr[0:2]], + # Cat(self.wishbone_slave.dat_w[24:32], # LE + # self.wishbone_slave.dat_w[16:24], + # self.wishbone_slave.dat_w[ 8:16], + # self.wishbone_slave.dat_w[ 0: 8])), + # NextValue(self.master_write_buffer_todo[self.wishbone_slave.adr[0:2]], 1), + # NextValue(self.wishbone_slave.ack, 1), + # NextValue(last_write_word_idx, self.wishbone_slave.adr[0:2]), + # NextValue(wishbone_slave_timeout, wishbone_default_timeout), + # If(self.wishbone_slave.adr[0:2] == 0, + # NextValue(last_write_timeout, 5), # CHECKME: 5 is arbitrary + # NextState("WaitForMoreData"), + # ).Else( + # NextValue(self.master_write_buffer_start, 1), + # NextState("WaitForWrite"), + # ) + # ) + #) + #wishbone_slave_write_buffering_fsm.act("WaitForMoreData", + # If(last_write_timeout > 0, + # NextValue(last_write_timeout, last_write_timeout - 1), + # ), + # If(self.wishbone_slave.cyc & + # self.wishbone_slave.stb & + # ~self.wishbone_slave.ack & + # ~self.wishbone_slave.err & + # self.wishbone_slave.we, + # If(((self.wishbone_slave.adr[2:30] != self.master_write_buffer_addr) | + # (self.wishbone_slave.sel != 0xf)), + # NextValue(self.master_write_buffer_start, 1), + # NextState("WaitForWrite"), + # ).Else( + # NextValue(self.master_write_buffer_data[self.wishbone_slave.adr[0:2]], + # Cat(self.wishbone_slave.dat_w[24:32], # LE + # self.wishbone_slave.dat_w[16:24], + # self.wishbone_slave.dat_w[ 8:16], + # self.wishbone_slave.dat_w[ 0: 8])), + # NextValue(self.master_write_buffer_todo[self.wishbone_slave.adr[0:2]], 1), + # NextValue(self.wishbone_slave.ack, 1), + # NextValue(last_write_word_idx, self.wishbone_slave.adr[0:2]), + # NextValue(wishbone_slave_timeout, wishbone_default_timeout), + # NextValue(last_write_timeout, 5), # CHECKME: 5 is arbitrary + # ) + # ).Elif(self.master_write_buffer_todo[0] & + # self.master_write_buffer_todo[1] & + # self.master_write_buffer_todo[2] & + # self.master_write_buffer_todo[3], + # NextValue(self.master_write_buffer_start, 1), + # NextState("WaitForWrite"), + # ).Elif(last_write_timeout == 0, + # NextValue(self.master_write_buffer_start, 1), + # NextState("WaitForWrite"), + # ) + #) + #wishbone_slave_write_buffering_fsm.act("WaitForWrite", + # If(self.master_write_buffer_start == 0, + # NextState("Idle"), + # ) + #) From cd9fa81a828569b51ab3855a959aa3e3d38c0c05 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 17 Jul 2021 11:03:44 -0400 Subject: [PATCH 36/78] access the SDRAM using a custom DMA; unreliable yet --- .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c | 359 ++++++++++++-- .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h | 15 + sbus-to-ztex-gateware-migen/netbsd_csr.h | 465 ++++++++++++++++++ sbus-to-ztex-gateware-migen/prom_csr.fth | 8 +- sbus-to-ztex-gateware-migen/prom_migen.fth | 16 +- .../sbus_to_fpga_fsm.py | 173 ++++++- .../sbus_to_fpga_soc.py | 26 +- 7 files changed, 1006 insertions(+), 56 deletions(-) create mode 100644 sbus-to-ztex-gateware-migen/netbsd_csr.h diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c index f273c8b..035e98b 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c @@ -110,6 +110,12 @@ sbusfpga_sdram_match(device_t parent, cfdata_t cf, void *aux) int sdram_init(struct sbusfpga_sdram_softc *sc); +int +dma_init(struct sbusfpga_sdram_softc *sc); + +int +dma_memtest(struct sbusfpga_sdram_softc *sc); + /* * Attach all the sub-devices we can find */ @@ -123,11 +129,12 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) int sbusburst; sc->sc_bustag = sa->sa_bustag; + sc->sc_dmatag = sa->sa_dmatag; sc->sc_dev = self; aprint_normal("\n"); - if (sa->sa_nreg < 2) { + if (sa->sa_nreg < 3) { aprint_error(": Not enough registers spaces\n"); return; } @@ -142,7 +149,7 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) aprint_error(": cannot map DDR PHY registers\n"); return; } else { - aprint_normal_dev(self, ": DDR PHY registers @ %p\n", (void*)sc->sc_bhregs_ddrphy); + aprint_normal_dev(self, "DDR PHY registers @ %p\n", (void*)sc->sc_bhregs_ddrphy); } /* map SDRAM DFII */ if (sbus_bus_map(sc->sc_bustag, @@ -154,11 +161,42 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) aprint_error(": cannot map SDRAM DFII registers\n"); return; } else { - aprint_normal_dev(self, ": SDRAM DFII registers @ %p\n", (void*)sc->sc_bhregs_sdram); + aprint_normal_dev(self, "SDRAM DFII registers @ %p\n", (void*)sc->sc_bhregs_sdram); } - + /* custom DMA */ + if (sbus_bus_map(sc->sc_bustag, + sa->sa_reg[2].oa_space /* sa_slot */, + sa->sa_reg[2].oa_base /* sa_offset */, + sa->sa_reg[2].oa_size /* sa_size */, + BUS_SPACE_MAP_LINEAR, + &sc->sc_bhregs_exchange_with_mem) != 0) { + aprint_error(": cannot map DMA registers\n"); + return; + } else { + aprint_normal_dev(self, "DMA registers @ %p\n", (void*)sc->sc_bhregs_exchange_with_mem); + } + if (sa->sa_nreg >= 4) { + /* if we map some of the memory itself */ + /* normally disabled, it's a debug feature */ + if (sbus_bus_map(sc->sc_bustag, + sa->sa_reg[3].oa_space /* sa_slot */, + sa->sa_reg[3].oa_base /* sa_offset */, + sa->sa_reg[3].oa_size /* sa_size */, + BUS_SPACE_MAP_LINEAR, + &sc->sc_bhregs_mmap) != 0) { + aprint_error(": cannot map MMAP\n"); + return; + } else { + aprint_normal_dev(self, "MMAP @ %p\n", (void*)sc->sc_bhregs_mmap); + } + sc->sc_bufsiz_mmap = sa->sa_reg[3].oa_size; + } else { + sc->sc_bufsiz_mmap = 0; + } + sc->sc_bufsiz_ddrphy = sa->sa_reg[0].oa_size; sc->sc_bufsiz_sdram = sa->sa_reg[1].oa_size; + sc->sc_bufsiz_exchange_with_mem = sa->sa_reg[2].oa_size; node = sc->sc_node = sa->sa_node; @@ -183,14 +221,279 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) sc->sc_burst, sbsc->sc_burst); - sdram_init(sc); + if (!sdram_init(sc)) { + aprint_error_dev(self, "couldn't initialize SDRAM\n"); + return; + } + + if (!dma_init(sc)) { + aprint_error_dev(self, "couldn't initialize DMA for SDRAM\n"); + return; + } + + if (!dma_memtest(sc)) { + aprint_error_dev(self, "DMA-MEMTEST failed for SDRAM\n"); + return; + } } #define CONFIG_CSR_DATA_WIDTH 32 -// define CSR_LEDS_BASE to avoid defining the CSRs +// define CSR_LEDS_BASE & others to avoid defining the CSRs of HW we don't handle #define CSR_LEDS_BASE +#define CSR_SDBLOCK2MEM_BASE +#define CSR_SDCORE_BASE +#define CSR_SDIRQ_BASE +#define CSR_SDMEM2BLOCK_BASE +#define CSR_SDPHY_BASE #include "dev/sbus/litex_csr.h" #undef CSR_LEDS_BASE +#undef CSR_SDBLOCK2MEM_BASE +#undef CSR_SDCORE_BASE +#undef CSR_SDIRQ_BASE +#undef CSR_SDMEM2BLOCK_BASE +#undef CSR_SDPHY_BASE + +int +dma_init(struct sbusfpga_sdram_softc *sc) { + sc->dma_blk_size = exchange_with_mem_blk_size_read(sc); + sc->dma_blk_base = exchange_with_mem_blk_base_read(sc); + aprint_normal_dev(sc->sc_dev, "DMA: HW -> block size is %d, base address is 0x%08x\n", sc->dma_blk_size, sc->dma_blk_base * sc->dma_blk_size); + + /* Allocate a dmamap */ + if (bus_dmamap_create(sc->sc_dmatag, SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ, 1, SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, &sc->sc_dmamap) != 0) { + aprint_error_dev(sc->sc_dev, "DMA map create failed\n"); + return 0; + } else { + aprint_normal_dev(sc->sc_dev, "dmamap: %lu %lu %d (%p)\n", sc->sc_dmamap->dm_maxsegsz, sc->sc_dmamap->dm_mapsize, sc->sc_dmamap->dm_nsegs, sc->sc_dmatag->_dmamap_load); + } + + if (bus_dmamem_alloc(sc->sc_dmatag, SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ, 64, 64, &sc->sc_segs, 1, &sc->sc_rsegs, BUS_DMA_NOWAIT | BUS_DMA_STREAMING)) { + aprint_error_dev(sc->sc_dev, "cannot allocate DVMA memory"); + bus_dmamap_destroy(sc->sc_dmatag, sc->sc_dmamap); + return 0; + } + + if (bus_dmamem_map(sc->sc_dmatag, &sc->sc_segs, 1, SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ, &sc->sc_dma_kva, BUS_DMA_NOWAIT)) { + aprint_error_dev(sc->sc_dev, "cannot allocate DVMA address"); + bus_dmamem_free(sc->sc_dmatag, &sc->sc_segs, 1); + bus_dmamap_destroy(sc->sc_dmatag, sc->sc_dmamap); + return 0; + } + + if (bus_dmamap_load(sc->sc_dmatag, sc->sc_dmamap, sc->sc_dma_kva, SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ, /* kernel space */ NULL, + BUS_DMA_NOWAIT | BUS_DMA_STREAMING | BUS_DMA_WRITE)) { + aprint_error_dev(sc->sc_dev, "cannot load dma map"); + bus_dmamem_unmap(sc->sc_dmatag, &sc->sc_dma_kva, SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ); + bus_dmamem_free(sc->sc_dmatag, &sc->sc_segs, 1); + bus_dmamap_destroy(sc->sc_dmatag, sc->sc_dmamap); + return 0; + } + + aprint_normal_dev(sc->sc_dev, "DMA: SW -> kernal address is %p, dvma address is 0x%08llx\n", sc->sc_dma_kva, sc->sc_dmamap->dm_segs[0].ds_addr); + + return 1; +} + +static inline unsigned long +lfsr (unsigned long bits, unsigned long prev); +int +dma_memtest(struct sbusfpga_sdram_softc *sc) { + unsigned long *kva_ulong = (unsigned long*)sc->sc_dma_kva; + unsigned long val; + unsigned int blkn = 0; // 113; + unsigned int testdatasize = 4096; + unsigned int blkcnt ; + int count; + + aprint_normal_dev(sc->sc_dev, "Initializing DMA buffer.\n"); + + val = 0xDEADBEEF; + for (int i = 0 ; i < testdatasize/sizeof(unsigned long) ; i++) { + val = lfsr(32, val); + kva_ulong[i] = val; + } + aprint_normal_dev(sc->sc_dev, "First value: 0x%08lx\n", kva_ulong[0]); + + if (sc->sc_bufsiz_mmap > 0) { + int idx = blkn * sc->dma_blk_size / sizeof(unsigned long), x; + int bound = sc->sc_bufsiz_mmap / sizeof(unsigned long); + if (bound > idx) { + if ((bound - idx) > 10) + bound = idx + 10; + count = 0; + for (x = idx ; x < bound; x++) { + unsigned long data = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_mmap, x*sizeof(unsigned long)); + aprint_normal_dev(sc->sc_dev, "Prior to write [mmap] at %d: 0x%08lx\n", x, data); + } + } + } + + bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, 4096, BUS_DMASYNC_PREREAD); + + aprint_normal_dev(sc->sc_dev, "Starting DMA Write-to-Sdram.\n"); + + exchange_with_mem_blk_addr_write(sc, blkn + sc->dma_blk_base); + exchange_with_mem_dma_addr_write(sc, sc->sc_dmamap->dm_segs[0].ds_addr); + exchange_with_mem_blk_cnt_write(sc, 0x80000000 | (testdatasize / sc->dma_blk_size)); + + aprint_normal_dev(sc->sc_dev, "DMA Write-to-Sdram started, polling\n"); + + bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, 4096, BUS_DMASYNC_POSTREAD); + + delay(500); + + count = 0; + while (((blkcnt = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < 10)) { + aprint_normal_dev(sc->sc_dev, "DMA Write-to-Sdram ongoing (%u, status 0x%08x, lastblk req 0x%08x, last phys addr written 0x%08x)\n", + blkcnt & 0x0000FFFF, + exchange_with_mem_dma_status_read(sc), + exchange_with_mem_last_blk_read(sc), + exchange_with_mem_wr_tosdram_read(sc)); + count ++; + delay(500); + } + + if (blkcnt) { + aprint_error_dev(sc->sc_dev, "DMA Write-to-Sdram didn't finish ? (%u, status 0x%08x, 0x%08x, 0x%08x, lastblk req 0x%08x, last phys addr written 0x%08x)\n", + blkcnt & 0x0000FFFF, + exchange_with_mem_dma_status_read(sc), + exchange_with_mem_last_dma_read(sc), + exchange_with_mem_blk_rem_read(sc), + exchange_with_mem_last_blk_read(sc), + exchange_with_mem_wr_tosdram_read(sc)); + return 0; + } else { + aprint_normal_dev(sc->sc_dev, "DMA Write-to-Sdram done (status 0x%08x, 0x%08x, 0x%08x, 0x%08x, last phys addr written 0x%08x)\n", + exchange_with_mem_dma_status_read(sc), + exchange_with_mem_last_blk_read(sc), + exchange_with_mem_last_dma_read(sc), + exchange_with_mem_blk_rem_read(sc), + exchange_with_mem_wr_tosdram_read(sc)); + } + + count = 0; + while ((((blkcnt = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 10)) { + aprint_normal_dev(sc->sc_dev, "DMA Write-to-Sdram hasn't reached SDRAM yet (status 0x%08x)\n", blkcnt); + count ++; + delay(500); + } + + if (blkcnt & 0x3) { + aprint_error_dev(sc->sc_dev, "DMA Write-to-Sdram can't reach SDRAM ? (%u, status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", blkcnt & 0x0000FFFF, + exchange_with_mem_dma_status_read(sc), + exchange_with_mem_last_blk_read(sc), + exchange_with_mem_last_dma_read(sc), + exchange_with_mem_blk_rem_read(sc)); + return 0; + } else { + aprint_normal_dev(sc->sc_dev, "DMA Write-to-Sdram has reached SDRAM (status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", + exchange_with_mem_dma_status_read(sc), + exchange_with_mem_last_blk_read(sc), + exchange_with_mem_last_dma_read(sc), + exchange_with_mem_blk_rem_read(sc)); + } + + if (sc->sc_bufsiz_mmap > 0) { + int idx = blkn * sc->dma_blk_size / sizeof(unsigned long), x; + int bound = sc->sc_bufsiz_mmap / sizeof(unsigned long); + if (bound > idx) { + count = 0; + val = 0xDEADBEEF; + if ((bound - idx) > (testdatasize / sizeof(unsigned long))) + bound = idx + (testdatasize / sizeof(unsigned long)); + for (x = idx ; x < bound && count < 10; x++) { + unsigned long data = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_mmap, x*sizeof(unsigned long)); + val = lfsr(32, val); + if (val != data) { + aprint_error_dev(sc->sc_dev, "Read-after-write [mmap] error at %d: 0x%08lx vs. 0x%08lx (0x%08lx)\n", x, data, val, val ^ data); + count ++; + } + } + } + } + + for (int i = 0 ; i < testdatasize/sizeof(unsigned long) ; i++) { + kva_ulong[i] = 0x0c0ffee0; + } + aprint_normal_dev(sc->sc_dev, "First value: 0x%08lx\n", kva_ulong[0]); + + bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, 4096, BUS_DMASYNC_PREWRITE); + + aprint_normal_dev(sc->sc_dev, "Starting DMA Read-from-Sdram.\n"); + + exchange_with_mem_blk_addr_write(sc, blkn + sc->dma_blk_base); + exchange_with_mem_dma_addr_write(sc, sc->sc_dmamap->dm_segs[0].ds_addr); + exchange_with_mem_blk_cnt_write(sc, 0x00000000 | (testdatasize / sc->dma_blk_size)); + + aprint_normal_dev(sc->sc_dev, "DMA Read-from-Sdram started, polling\n"); + + bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, 4096, BUS_DMASYNC_POSTWRITE); + + delay(500); + + count = 0; + while (((blkcnt = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < 10)) { + aprint_normal_dev(sc->sc_dev, "DMA Read-from-Sdram ongoing (%u, status 0x%08x)\n", blkcnt & 0x0000FFFF, exchange_with_mem_dma_status_read(sc)); + count ++; + delay(500); + } + + if (blkcnt) { + aprint_error_dev(sc->sc_dev, "DMA Read-from-Sdram didn't finish ? (%u, status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", + blkcnt & 0x0000FFFF, + exchange_with_mem_dma_status_read(sc), + exchange_with_mem_last_blk_read(sc), + exchange_with_mem_last_dma_read(sc), + exchange_with_mem_blk_rem_read(sc)); + return 0; + } else { + aprint_normal_dev(sc->sc_dev, "DMA Read-from-Sdram done (status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", + exchange_with_mem_dma_status_read(sc), + exchange_with_mem_last_blk_read(sc), + exchange_with_mem_last_dma_read(sc), + exchange_with_mem_blk_rem_read(sc)); + } + + count = 0; + while ((((blkcnt = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 10)) { + aprint_normal_dev(sc->sc_dev, "DMA Read-from-Sdram hasn't reached memory yet (status 0x%08x)\n", blkcnt); + count ++; + delay(500); + } + + aprint_normal_dev(sc->sc_dev, "First value: 0x%08lx\n", kva_ulong[0]); + + if (blkcnt & 0x3) { + aprint_error_dev(sc->sc_dev, "DMA Read-from-Sdram can't reach memory ? (%u, status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", blkcnt & 0x0000FFFF, + exchange_with_mem_dma_status_read(sc), + exchange_with_mem_last_blk_read(sc), + exchange_with_mem_last_dma_read(sc), + exchange_with_mem_blk_rem_read(sc)); + return 0; + } else { + aprint_normal_dev(sc->sc_dev, "DMA Read-from-Sdram has reached memory (status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", + exchange_with_mem_dma_status_read(sc), + exchange_with_mem_last_blk_read(sc), + exchange_with_mem_last_dma_read(sc), + exchange_with_mem_blk_rem_read(sc)); + } + + count = 0; + val = 0xDEADBEEF; + for (int i = 0 ; i < testdatasize/sizeof(unsigned long) && count < 10; i++) { + val = lfsr(32, val); + if (kva_ulong[i] != val) { + aprint_error_dev(sc->sc_dev, "Read-after-write error at %d: 0x%08lx vs. 0x%08lx (0x%08lx)\n", i, kva_ulong[i], val, val ^ kva_ulong[i]); + count ++; + } + } + + if (count) + return 0; + + return 1; +} + /* auto-generated sdram_phy.h + sc */ #define DFII_CONTROL_SEL 0x01 @@ -732,7 +1035,7 @@ sdram_software_control_on(struct sbusfpga_sdram_softc *sc) previous = sdram_dfii_control_read(sc); if (previous != (0x02 | 0x04 | 0x08)) { sdram_dfii_control_write(sc, (0x02 | 0x04 | 0x08)); - aprint_normal ("Switching SDRAM to software control.\n"); + aprint_normal_dev(sc->sc_dev, "Switching SDRAM to software control.\n"); } } static void @@ -742,7 +1045,7 @@ sdram_software_control_off(struct sbusfpga_sdram_softc *sc) previous = sdram_dfii_control_read(sc); if (previous != (0x01)) { sdram_dfii_control_write(sc, (0x01)); - aprint_normal ("Switching SDRAM to hardware control.\n"); + aprint_normal_dev(sc->sc_dev, "Switching SDRAM to hardware control.\n"); } } __attribute__((unused)) static void @@ -785,7 +1088,7 @@ popcount (unsigned int x) static void print_scan_errors (unsigned int errors) { - aprint_normal ("%d", errors == 0); + aprint_normal("%d", errors == 0); } static unsigned int sdram_write_read_check_test_pattern (struct sbusfpga_sdram_softc *sc, int module, unsigned int seed) @@ -837,7 +1140,7 @@ sdram_leveling_center_module (struct sbusfpga_sdram_softc *sc, int module, int s int delay, delay_mid, delay_range; int delay_min = -1, delay_max = -1; if (show_long) - aprint_normal ("m%d: |", module); + aprint_normal_dev(sc->sc_dev, "m%d: |", module); delay = 0; rst_delay(sc, module); while (1) { @@ -846,7 +1149,7 @@ sdram_leveling_center_module (struct sbusfpga_sdram_softc *sc, int module, int s working = errors == 0; show = show_long; if (show) - print_scan_errors (errors); + print_scan_errors(errors); if (working && delay_min < 0) { delay_min = delay; break; @@ -864,7 +1167,7 @@ sdram_leveling_center_module (struct sbusfpga_sdram_softc *sc, int module, int s working = errors == 0; show = show_long; if (show) - print_scan_errors (errors); + print_scan_errors(errors); if (!working && delay_max < 0) { delay_max = delay; } @@ -877,17 +1180,17 @@ sdram_leveling_center_module (struct sbusfpga_sdram_softc *sc, int module, int s delay_max = delay; } if (show_long) - aprint_normal ("| "); + aprint_normal_dev(sc->sc_dev, "| "); delay_mid = (delay_min + delay_max) / 2 % 32; delay_range = (delay_max - delay_min) / 2; if (show_short) { if (delay_min < 0) - aprint_normal ("delays: -"); + aprint_normal("delays: -"); else - aprint_normal ("delays: %02d+-%02d", delay_mid, delay_range); + aprint_normal("delays: %02d+-%02d", delay_mid, delay_range); } if (show_long) - aprint_normal ("\n"); + aprint_normal("\n"); rst_delay(sc, module); cdelay (100); for (i = 0; i < delay_mid; i++) { @@ -934,7 +1237,7 @@ sdram_read_leveling_scan_module (struct sbusfpga_sdram_softc *sc, int module, in unsigned int errors; score = 0; if (show) - aprint_normal (" m%d, b%02d: |", module, bitslip); + aprint_normal_dev(sc->sc_dev, " m%d, b%02d: |", module, bitslip); sdram_read_leveling_rst_delay(sc, module); for (i = 0; i < 32; i++) { int working; @@ -944,12 +1247,12 @@ sdram_read_leveling_scan_module (struct sbusfpga_sdram_softc *sc, int module, in working = errors == 0; score += (working * max_errors * 32) + (max_errors - errors); if (_show) { - print_scan_errors (errors); + print_scan_errors(errors); } sdram_read_leveling_inc_delay(sc, module); } if (show) - aprint_normal ("| "); + aprint_normal("| "); return score; } static void @@ -969,7 +1272,7 @@ sdram_read_leveling(struct sbusfpga_sdram_softc *sc) sdram_leveling_center_module(sc, module, 1, 0, sdram_read_leveling_rst_delay, sdram_read_leveling_inc_delay); - aprint_normal ("\n"); + aprint_normal("\n"); if (score > best_score) { best_bitslip = bitslip; best_score = score; @@ -978,14 +1281,14 @@ sdram_read_leveling(struct sbusfpga_sdram_softc *sc) break; sdram_read_leveling_inc_bitslip(sc, module); } - aprint_normal (" best: m%d, b%02d ", module, best_bitslip); + aprint_normal_dev(sc->sc_dev, " best: m%d, b%02d ", module, best_bitslip); sdram_read_leveling_rst_bitslip(sc, module); for (bitslip = 0; bitslip < best_bitslip; bitslip++) sdram_read_leveling_inc_bitslip(sc, module); sdram_leveling_center_module(sc, module, 1, 0, sdram_read_leveling_rst_delay, sdram_read_leveling_inc_delay); - aprint_normal ("\n"); + aprint_normal("\n"); } } static void @@ -1026,9 +1329,9 @@ sdram_write_latency_calibration(struct sbusfpga_sdram_softc *sc) else bitslip = _sdram_write_leveling_bitslips[module]; if (bitslip == -1) - aprint_normal ("m%d:- ", module); + aprint_normal_dev(sc->sc_dev, "m%d:- ", module); else - aprint_normal ("m%d:%d ", module, bitslip); + aprint_normal_dev(sc->sc_dev, "m%d:%d ", module, bitslip); ddrphy_dly_sel_write(sc, 1 << module); ddrphy_wdly_dq_bitslip_rst_write(sc, 1); for (i = 0; i < bitslip; i++) { @@ -1036,7 +1339,7 @@ sdram_write_latency_calibration(struct sbusfpga_sdram_softc *sc) } ddrphy_dly_sel_write(sc, 0); } - aprint_normal ("\n"); + aprint_normal("\n"); } static int sdram_leveling(struct sbusfpga_sdram_softc *sc) @@ -1047,9 +1350,9 @@ sdram_leveling(struct sbusfpga_sdram_softc *sc) sdram_read_leveling_rst_delay(sc, module); sdram_read_leveling_rst_bitslip(sc, module); } - aprint_normal ("Write latency calibration:\n"); + aprint_normal_dev(sc->sc_dev, "Write latency calibration:\n"); sdram_write_latency_calibration(sc); - aprint_normal ("Read leveling:\n"); + aprint_normal_dev(sc->sc_dev, "Read leveling:\n"); sdram_read_leveling(sc); sdram_software_control_off(sc); return 1; @@ -1059,7 +1362,7 @@ sdram_init(struct sbusfpga_sdram_softc *sc) { ddrphy_rdphase_write(sc, 2); ddrphy_wrphase_write(sc, 3); - aprint_normal ("Initializing SDRAM @0x%08lx...\n", 0x80000000L); + aprint_normal_dev(sc->sc_dev, "Initializing SDRAM @0x%08lx...\n", 0x80000000L); sdram_software_control_on(sc); ddrphy_rst_write(sc, 1); cdelay (1000); diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h index 2fa554a..3c24afe 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h @@ -37,8 +37,23 @@ struct sbusfpga_sdram_softc { bus_space_tag_t sc_bustag; /* bus tag */ bus_space_handle_t sc_bhregs_ddrphy; /* bus handle */ bus_space_handle_t sc_bhregs_sdram; /* bus handle */ + bus_space_handle_t sc_bhregs_exchange_with_mem; /* bus handle */ + bus_space_handle_t sc_bhregs_mmap; /* bus handle */ int sc_bufsiz_ddrphy; /* Size of buffer */ int sc_bufsiz_sdram; /* Size of buffer */ + int sc_bufsiz_exchange_with_mem; /* bus handle */ + int sc_bufsiz_mmap; /* bus handle */ + /* specific of the DMA engine */ + u_int dma_blk_size; + u_int dma_blk_base; + /* DMA kernel structures */ + bus_dma_tag_t sc_dmatag; + bus_dmamap_t sc_dmamap; + bus_dma_segment_t sc_segs; + int sc_rsegs; + void * sc_dma_kva; }; +#define SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ (4*1024) + #endif /* _SBUSFPGA_SDRAM_H_ */ diff --git a/sbus-to-ztex-gateware-migen/netbsd_csr.h b/sbus-to-ztex-gateware-migen/netbsd_csr.h new file mode 100644 index 0000000..f7a1606 --- /dev/null +++ b/sbus-to-ztex-gateware-migen/netbsd_csr.h @@ -0,0 +1,465 @@ +//-------------------------------------------------------------------------------- +// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-07-17 11:01:08 +//-------------------------------------------------------------------------------- +#ifndef __GENERATED_CSR_H +#define __GENERATED_CSR_H +#ifndef CSR_BASE +#define CSR_BASE 0x40000L +#endif + +/* leds */ +#ifndef CSR_LEDS_BASE +#define CSR_LEDS_BASE (CSR_BASE + 0x0L) +#define CSR_LEDS_OUT_ADDR (CSR_LEDS_BASE + 0x0L) +#define CSR_LEDS_OUT_SIZE 1 +static inline uint32_t leds_out_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_leds, 0x0L); +} +static inline void leds_out_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_leds, 0x0L, v); +} +#endif // CSR_LEDS_BASE + +/* ddrphy */ +#ifndef CSR_DDRPHY_BASE +#define CSR_DDRPHY_BASE (CSR_BASE + 0x1000L) +#define CSR_DDRPHY_RST_ADDR (CSR_DDRPHY_BASE + 0x0L) +#define CSR_DDRPHY_RST_SIZE 1 +static inline uint32_t ddrphy_rst_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x0L); +} +static inline void ddrphy_rst_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x0L, v); +} +#define CSR_DDRPHY_HALF_SYS8X_TAPS_ADDR (CSR_DDRPHY_BASE + 0x4L) +#define CSR_DDRPHY_HALF_SYS8X_TAPS_SIZE 1 +static inline uint32_t ddrphy_half_sys8x_taps_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x4L); +} +static inline void ddrphy_half_sys8x_taps_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x4L, v); +} +#define CSR_DDRPHY_WLEVEL_EN_ADDR (CSR_DDRPHY_BASE + 0x8L) +#define CSR_DDRPHY_WLEVEL_EN_SIZE 1 +static inline uint32_t ddrphy_wlevel_en_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x8L); +} +static inline void ddrphy_wlevel_en_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x8L, v); +} +#define CSR_DDRPHY_WLEVEL_STROBE_ADDR (CSR_DDRPHY_BASE + 0xcL) +#define CSR_DDRPHY_WLEVEL_STROBE_SIZE 1 +static inline uint32_t ddrphy_wlevel_strobe_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0xcL); +} +static inline void ddrphy_wlevel_strobe_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0xcL, v); +} +#define CSR_DDRPHY_DLY_SEL_ADDR (CSR_DDRPHY_BASE + 0x10L) +#define CSR_DDRPHY_DLY_SEL_SIZE 1 +static inline uint32_t ddrphy_dly_sel_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x10L); +} +static inline void ddrphy_dly_sel_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x10L, v); +} +#define CSR_DDRPHY_RDLY_DQ_RST_ADDR (CSR_DDRPHY_BASE + 0x14L) +#define CSR_DDRPHY_RDLY_DQ_RST_SIZE 1 +static inline uint32_t ddrphy_rdly_dq_rst_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x14L); +} +static inline void ddrphy_rdly_dq_rst_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x14L, v); +} +#define CSR_DDRPHY_RDLY_DQ_INC_ADDR (CSR_DDRPHY_BASE + 0x18L) +#define CSR_DDRPHY_RDLY_DQ_INC_SIZE 1 +static inline uint32_t ddrphy_rdly_dq_inc_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x18L); +} +static inline void ddrphy_rdly_dq_inc_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x18L, v); +} +#define CSR_DDRPHY_RDLY_DQ_BITSLIP_RST_ADDR (CSR_DDRPHY_BASE + 0x1cL) +#define CSR_DDRPHY_RDLY_DQ_BITSLIP_RST_SIZE 1 +static inline uint32_t ddrphy_rdly_dq_bitslip_rst_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x1cL); +} +static inline void ddrphy_rdly_dq_bitslip_rst_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x1cL, v); +} +#define CSR_DDRPHY_RDLY_DQ_BITSLIP_ADDR (CSR_DDRPHY_BASE + 0x20L) +#define CSR_DDRPHY_RDLY_DQ_BITSLIP_SIZE 1 +static inline uint32_t ddrphy_rdly_dq_bitslip_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x20L); +} +static inline void ddrphy_rdly_dq_bitslip_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x20L, v); +} +#define CSR_DDRPHY_WDLY_DQ_BITSLIP_RST_ADDR (CSR_DDRPHY_BASE + 0x24L) +#define CSR_DDRPHY_WDLY_DQ_BITSLIP_RST_SIZE 1 +static inline uint32_t ddrphy_wdly_dq_bitslip_rst_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x24L); +} +static inline void ddrphy_wdly_dq_bitslip_rst_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x24L, v); +} +#define CSR_DDRPHY_WDLY_DQ_BITSLIP_ADDR (CSR_DDRPHY_BASE + 0x28L) +#define CSR_DDRPHY_WDLY_DQ_BITSLIP_SIZE 1 +static inline uint32_t ddrphy_wdly_dq_bitslip_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x28L); +} +static inline void ddrphy_wdly_dq_bitslip_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x28L, v); +} +#define CSR_DDRPHY_RDPHASE_ADDR (CSR_DDRPHY_BASE + 0x2cL) +#define CSR_DDRPHY_RDPHASE_SIZE 1 +static inline uint32_t ddrphy_rdphase_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x2cL); +} +static inline void ddrphy_rdphase_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x2cL, v); +} +#define CSR_DDRPHY_WRPHASE_ADDR (CSR_DDRPHY_BASE + 0x30L) +#define CSR_DDRPHY_WRPHASE_SIZE 1 +static inline uint32_t ddrphy_wrphase_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x30L); +} +static inline void ddrphy_wrphase_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x30L, v); +} +#endif // CSR_DDRPHY_BASE + +/* exchange_with_mem */ +#ifndef CSR_EXCHANGE_WITH_MEM_BASE +#define CSR_EXCHANGE_WITH_MEM_BASE (CSR_BASE + 0x2000L) +#define CSR_EXCHANGE_WITH_MEM_BLK_SIZE_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x0L) +#define CSR_EXCHANGE_WITH_MEM_BLK_SIZE_SIZE 1 +static inline uint32_t exchange_with_mem_blk_size_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x0L); +} +#define CSR_EXCHANGE_WITH_MEM_BLK_BASE_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x4L) +#define CSR_EXCHANGE_WITH_MEM_BLK_BASE_SIZE 1 +static inline uint32_t exchange_with_mem_blk_base_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x4L); +} +#define CSR_EXCHANGE_WITH_MEM_BLK_ADDR_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x8L) +#define CSR_EXCHANGE_WITH_MEM_BLK_ADDR_SIZE 1 +static inline uint32_t exchange_with_mem_blk_addr_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x8L); +} +static inline void exchange_with_mem_blk_addr_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x8L, v); +} +#define CSR_EXCHANGE_WITH_MEM_DMA_ADDR_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0xcL) +#define CSR_EXCHANGE_WITH_MEM_DMA_ADDR_SIZE 1 +static inline uint32_t exchange_with_mem_dma_addr_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0xcL); +} +static inline void exchange_with_mem_dma_addr_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0xcL, v); +} +#define CSR_EXCHANGE_WITH_MEM_BLK_CNT_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x10L) +#define CSR_EXCHANGE_WITH_MEM_BLK_CNT_SIZE 1 +static inline uint32_t exchange_with_mem_blk_cnt_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x10L); +} +static inline void exchange_with_mem_blk_cnt_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x10L, v); +} +#define CSR_EXCHANGE_WITH_MEM_LAST_BLK_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x14L) +#define CSR_EXCHANGE_WITH_MEM_LAST_BLK_SIZE 1 +static inline uint32_t exchange_with_mem_last_blk_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x14L); +} +#define CSR_EXCHANGE_WITH_MEM_LAST_DMA_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x18L) +#define CSR_EXCHANGE_WITH_MEM_LAST_DMA_SIZE 1 +static inline uint32_t exchange_with_mem_last_dma_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x18L); +} +#define CSR_EXCHANGE_WITH_MEM_BLK_REM_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x1cL) +#define CSR_EXCHANGE_WITH_MEM_BLK_REM_SIZE 1 +static inline uint32_t exchange_with_mem_blk_rem_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x1cL); +} +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x20L) +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_SIZE 1 +static inline uint32_t exchange_with_mem_dma_status_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x20L); +} +#define CSR_EXCHANGE_WITH_MEM_WR_TOSDRAM_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x24L) +#define CSR_EXCHANGE_WITH_MEM_WR_TOSDRAM_SIZE 1 +static inline uint32_t exchange_with_mem_wr_tosdram_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x24L); +} +#endif // CSR_EXCHANGE_WITH_MEM_BASE + +/* sdram */ +#ifndef CSR_SDRAM_BASE +#define CSR_SDRAM_BASE (CSR_BASE + 0x3000L) +#define CSR_SDRAM_DFII_CONTROL_ADDR (CSR_SDRAM_BASE + 0x0L) +#define CSR_SDRAM_DFII_CONTROL_SIZE 1 +static inline uint32_t sdram_dfii_control_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x0L); +} +static inline void sdram_dfii_control_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x0L, v); +} +#define CSR_SDRAM_DFII_CONTROL_SEL_OFFSET 0 +#define CSR_SDRAM_DFII_CONTROL_SEL_SIZE 1 +static inline uint32_t sdram_dfii_control_sel_extract(struct sbusfpga_sdram_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 0) & mask ); +} +static inline uint32_t sdram_dfii_control_sel_read(struct sbusfpga_sdram_softc *sc) { + uint32_t word = sdram_dfii_control_read(sc); + return sdram_dfii_control_sel_extract(sc, word); +} +static inline uint32_t sdram_dfii_control_sel_replace(struct sbusfpga_sdram_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 1)-1); + return (oldword & (~(mask << 0))) | (mask & plain_value)<< 0 ; +} +static inline void sdram_dfii_control_sel_write(struct sbusfpga_sdram_softc *sc, uint32_t plain_value) { + uint32_t oldword = sdram_dfii_control_read(sc); + uint32_t newword = sdram_dfii_control_sel_replace(sc, oldword, plain_value); + sdram_dfii_control_write(sc, newword); +} +#define CSR_SDRAM_DFII_CONTROL_CKE_OFFSET 1 +#define CSR_SDRAM_DFII_CONTROL_CKE_SIZE 1 +static inline uint32_t sdram_dfii_control_cke_extract(struct sbusfpga_sdram_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 1) & mask ); +} +static inline uint32_t sdram_dfii_control_cke_read(struct sbusfpga_sdram_softc *sc) { + uint32_t word = sdram_dfii_control_read(sc); + return sdram_dfii_control_cke_extract(sc, word); +} +static inline uint32_t sdram_dfii_control_cke_replace(struct sbusfpga_sdram_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 1)-1); + return (oldword & (~(mask << 1))) | (mask & plain_value)<< 1 ; +} +static inline void sdram_dfii_control_cke_write(struct sbusfpga_sdram_softc *sc, uint32_t plain_value) { + uint32_t oldword = sdram_dfii_control_read(sc); + uint32_t newword = sdram_dfii_control_cke_replace(sc, oldword, plain_value); + sdram_dfii_control_write(sc, newword); +} +#define CSR_SDRAM_DFII_CONTROL_ODT_OFFSET 2 +#define CSR_SDRAM_DFII_CONTROL_ODT_SIZE 1 +static inline uint32_t sdram_dfii_control_odt_extract(struct sbusfpga_sdram_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 2) & mask ); +} +static inline uint32_t sdram_dfii_control_odt_read(struct sbusfpga_sdram_softc *sc) { + uint32_t word = sdram_dfii_control_read(sc); + return sdram_dfii_control_odt_extract(sc, word); +} +static inline uint32_t sdram_dfii_control_odt_replace(struct sbusfpga_sdram_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 1)-1); + return (oldword & (~(mask << 2))) | (mask & plain_value)<< 2 ; +} +static inline void sdram_dfii_control_odt_write(struct sbusfpga_sdram_softc *sc, uint32_t plain_value) { + uint32_t oldword = sdram_dfii_control_read(sc); + uint32_t newword = sdram_dfii_control_odt_replace(sc, oldword, plain_value); + sdram_dfii_control_write(sc, newword); +} +#define CSR_SDRAM_DFII_CONTROL_RESET_N_OFFSET 3 +#define CSR_SDRAM_DFII_CONTROL_RESET_N_SIZE 1 +static inline uint32_t sdram_dfii_control_reset_n_extract(struct sbusfpga_sdram_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 3) & mask ); +} +static inline uint32_t sdram_dfii_control_reset_n_read(struct sbusfpga_sdram_softc *sc) { + uint32_t word = sdram_dfii_control_read(sc); + return sdram_dfii_control_reset_n_extract(sc, word); +} +static inline uint32_t sdram_dfii_control_reset_n_replace(struct sbusfpga_sdram_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 1)-1); + return (oldword & (~(mask << 3))) | (mask & plain_value)<< 3 ; +} +static inline void sdram_dfii_control_reset_n_write(struct sbusfpga_sdram_softc *sc, uint32_t plain_value) { + uint32_t oldword = sdram_dfii_control_read(sc); + uint32_t newword = sdram_dfii_control_reset_n_replace(sc, oldword, plain_value); + sdram_dfii_control_write(sc, newword); +} +#define CSR_SDRAM_DFII_PI0_COMMAND_ADDR (CSR_SDRAM_BASE + 0x4L) +#define CSR_SDRAM_DFII_PI0_COMMAND_SIZE 1 +static inline uint32_t sdram_dfii_pi0_command_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x4L); +} +static inline void sdram_dfii_pi0_command_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x4L, v); +} +#define CSR_SDRAM_DFII_PI0_COMMAND_ISSUE_ADDR (CSR_SDRAM_BASE + 0x8L) +#define CSR_SDRAM_DFII_PI0_COMMAND_ISSUE_SIZE 1 +static inline uint32_t sdram_dfii_pi0_command_issue_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x8L); +} +static inline void sdram_dfii_pi0_command_issue_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x8L, v); +} +#define CSR_SDRAM_DFII_PI0_ADDRESS_ADDR (CSR_SDRAM_BASE + 0xcL) +#define CSR_SDRAM_DFII_PI0_ADDRESS_SIZE 1 +static inline uint32_t sdram_dfii_pi0_address_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0xcL); +} +static inline void sdram_dfii_pi0_address_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0xcL, v); +} +#define CSR_SDRAM_DFII_PI0_BADDRESS_ADDR (CSR_SDRAM_BASE + 0x10L) +#define CSR_SDRAM_DFII_PI0_BADDRESS_SIZE 1 +static inline uint32_t sdram_dfii_pi0_baddress_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x10L); +} +static inline void sdram_dfii_pi0_baddress_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x10L, v); +} +#define CSR_SDRAM_DFII_PI0_WRDATA_ADDR (CSR_SDRAM_BASE + 0x14L) +#define CSR_SDRAM_DFII_PI0_WRDATA_SIZE 1 +static inline uint32_t sdram_dfii_pi0_wrdata_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x14L); +} +static inline void sdram_dfii_pi0_wrdata_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x14L, v); +} +#define CSR_SDRAM_DFII_PI0_RDDATA_ADDR (CSR_SDRAM_BASE + 0x18L) +#define CSR_SDRAM_DFII_PI0_RDDATA_SIZE 1 +static inline uint32_t sdram_dfii_pi0_rddata_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x18L); +} +#define CSR_SDRAM_DFII_PI1_COMMAND_ADDR (CSR_SDRAM_BASE + 0x1cL) +#define CSR_SDRAM_DFII_PI1_COMMAND_SIZE 1 +static inline uint32_t sdram_dfii_pi1_command_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x1cL); +} +static inline void sdram_dfii_pi1_command_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x1cL, v); +} +#define CSR_SDRAM_DFII_PI1_COMMAND_ISSUE_ADDR (CSR_SDRAM_BASE + 0x20L) +#define CSR_SDRAM_DFII_PI1_COMMAND_ISSUE_SIZE 1 +static inline uint32_t sdram_dfii_pi1_command_issue_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x20L); +} +static inline void sdram_dfii_pi1_command_issue_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x20L, v); +} +#define CSR_SDRAM_DFII_PI1_ADDRESS_ADDR (CSR_SDRAM_BASE + 0x24L) +#define CSR_SDRAM_DFII_PI1_ADDRESS_SIZE 1 +static inline uint32_t sdram_dfii_pi1_address_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x24L); +} +static inline void sdram_dfii_pi1_address_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x24L, v); +} +#define CSR_SDRAM_DFII_PI1_BADDRESS_ADDR (CSR_SDRAM_BASE + 0x28L) +#define CSR_SDRAM_DFII_PI1_BADDRESS_SIZE 1 +static inline uint32_t sdram_dfii_pi1_baddress_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x28L); +} +static inline void sdram_dfii_pi1_baddress_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x28L, v); +} +#define CSR_SDRAM_DFII_PI1_WRDATA_ADDR (CSR_SDRAM_BASE + 0x2cL) +#define CSR_SDRAM_DFII_PI1_WRDATA_SIZE 1 +static inline uint32_t sdram_dfii_pi1_wrdata_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x2cL); +} +static inline void sdram_dfii_pi1_wrdata_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x2cL, v); +} +#define CSR_SDRAM_DFII_PI1_RDDATA_ADDR (CSR_SDRAM_BASE + 0x30L) +#define CSR_SDRAM_DFII_PI1_RDDATA_SIZE 1 +static inline uint32_t sdram_dfii_pi1_rddata_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x30L); +} +#define CSR_SDRAM_DFII_PI2_COMMAND_ADDR (CSR_SDRAM_BASE + 0x34L) +#define CSR_SDRAM_DFII_PI2_COMMAND_SIZE 1 +static inline uint32_t sdram_dfii_pi2_command_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x34L); +} +static inline void sdram_dfii_pi2_command_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x34L, v); +} +#define CSR_SDRAM_DFII_PI2_COMMAND_ISSUE_ADDR (CSR_SDRAM_BASE + 0x38L) +#define CSR_SDRAM_DFII_PI2_COMMAND_ISSUE_SIZE 1 +static inline uint32_t sdram_dfii_pi2_command_issue_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x38L); +} +static inline void sdram_dfii_pi2_command_issue_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x38L, v); +} +#define CSR_SDRAM_DFII_PI2_ADDRESS_ADDR (CSR_SDRAM_BASE + 0x3cL) +#define CSR_SDRAM_DFII_PI2_ADDRESS_SIZE 1 +static inline uint32_t sdram_dfii_pi2_address_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x3cL); +} +static inline void sdram_dfii_pi2_address_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x3cL, v); +} +#define CSR_SDRAM_DFII_PI2_BADDRESS_ADDR (CSR_SDRAM_BASE + 0x40L) +#define CSR_SDRAM_DFII_PI2_BADDRESS_SIZE 1 +static inline uint32_t sdram_dfii_pi2_baddress_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x40L); +} +static inline void sdram_dfii_pi2_baddress_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x40L, v); +} +#define CSR_SDRAM_DFII_PI2_WRDATA_ADDR (CSR_SDRAM_BASE + 0x44L) +#define CSR_SDRAM_DFII_PI2_WRDATA_SIZE 1 +static inline uint32_t sdram_dfii_pi2_wrdata_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x44L); +} +static inline void sdram_dfii_pi2_wrdata_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x44L, v); +} +#define CSR_SDRAM_DFII_PI2_RDDATA_ADDR (CSR_SDRAM_BASE + 0x48L) +#define CSR_SDRAM_DFII_PI2_RDDATA_SIZE 1 +static inline uint32_t sdram_dfii_pi2_rddata_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x48L); +} +#define CSR_SDRAM_DFII_PI3_COMMAND_ADDR (CSR_SDRAM_BASE + 0x4cL) +#define CSR_SDRAM_DFII_PI3_COMMAND_SIZE 1 +static inline uint32_t sdram_dfii_pi3_command_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x4cL); +} +static inline void sdram_dfii_pi3_command_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x4cL, v); +} +#define CSR_SDRAM_DFII_PI3_COMMAND_ISSUE_ADDR (CSR_SDRAM_BASE + 0x50L) +#define CSR_SDRAM_DFII_PI3_COMMAND_ISSUE_SIZE 1 +static inline uint32_t sdram_dfii_pi3_command_issue_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x50L); +} +static inline void sdram_dfii_pi3_command_issue_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x50L, v); +} +#define CSR_SDRAM_DFII_PI3_ADDRESS_ADDR (CSR_SDRAM_BASE + 0x54L) +#define CSR_SDRAM_DFII_PI3_ADDRESS_SIZE 1 +static inline uint32_t sdram_dfii_pi3_address_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x54L); +} +static inline void sdram_dfii_pi3_address_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x54L, v); +} +#define CSR_SDRAM_DFII_PI3_BADDRESS_ADDR (CSR_SDRAM_BASE + 0x58L) +#define CSR_SDRAM_DFII_PI3_BADDRESS_SIZE 1 +static inline uint32_t sdram_dfii_pi3_baddress_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x58L); +} +static inline void sdram_dfii_pi3_baddress_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x58L, v); +} +#define CSR_SDRAM_DFII_PI3_WRDATA_ADDR (CSR_SDRAM_BASE + 0x5cL) +#define CSR_SDRAM_DFII_PI3_WRDATA_SIZE 1 +static inline uint32_t sdram_dfii_pi3_wrdata_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x5cL); +} +static inline void sdram_dfii_pi3_wrdata_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x5cL, v); +} +#define CSR_SDRAM_DFII_PI3_RDDATA_ADDR (CSR_SDRAM_BASE + 0x60L) +#define CSR_SDRAM_DFII_PI3_RDDATA_SIZE 1 +static inline uint32_t sdram_dfii_pi3_rddata_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sdram, 0x60L); +} +#endif // CSR_SDRAM_BASE + +#endif diff --git a/sbus-to-ztex-gateware-migen/prom_csr.fth b/sbus-to-ztex-gateware-migen/prom_csr.fth index 5c43479..d9d0973 100644 --- a/sbus-to-ztex-gateware-migen/prom_csr.fth +++ b/sbus-to-ztex-gateware-migen/prom_csr.fth @@ -1,12 +1,8 @@ \ auto-generated base regions for CSRs in the PROM h# 40000 constant sbusfpga_csraddr_leds h# 41000 constant sbusfpga_csraddr_ddrphy -h# 42000 constant sbusfpga_csraddr_sdblock2mem -h# 43000 constant sbusfpga_csraddr_sdcore -h# 44000 constant sbusfpga_csraddr_sdirq -h# 45000 constant sbusfpga_csraddr_sdmem2block -h# 46000 constant sbusfpga_csraddr_sdphy -h# 47000 constant sbusfpga_csraddr_sdram +h# 42000 constant sbusfpga_csraddr_exchange_with_mem +h# 43000 constant sbusfpga_csraddr_sdram h# 80000 constant sbusfpga_regionaddr_usb_host_ctrl h# 0 constant sbusfpga_regionaddr_prom h# fc000000 constant sbusfpga_regionaddr_usb_fake_dma diff --git a/sbus-to-ztex-gateware-migen/prom_migen.fth b/sbus-to-ztex-gateware-migen/prom_migen.fth index 6b9388a..6b0821d 100644 --- a/sbus-to-ztex-gateware-migen/prom_migen.fth +++ b/sbus-to-ztex-gateware-migen/prom_migen.fth @@ -43,6 +43,7 @@ new-device \ Absolute minimal stuff; name & registers def. " generic-ohci" device-name +\ USB registers are in the device space, not the CSR space my-address sbusfpga_regionaddr_usb_host_ctrl + my-space h# 1000 reg \ we don't support ET or anything non-32bits h# 7c xdrint " slave-burst-sizes" attribute @@ -89,11 +90,15 @@ new-device \ Absolute minimal stuff; name & registers def. " RDOL,sdram" device-name -\ two pages of registers: +\ three pages of registers: my-address sbusfpga_csraddr_ddrphy + my-space xdrphys \ Offset#1 h# 1000 xdrint xdr+ \ Merge size#1 my-address sbusfpga_csraddr_sdram + my-space xdrphys xdr+ \ Merge offset#2 h# 1000 xdrint xdr+ \ Merge size#2 +my-address sbusfpga_csraddr_exchange_with_mem + my-space xdrphys xdr+ \ Merge offset#3 +h# 1000 xdrint xdr+ \ Merge size#3 +\ my-address sbusfpga_regionaddr_main_ram + my-space xdrphys xdr+ \ Merge offset#4 +\ h# 10000 xdrint xdr+ \ Merge size#4 " reg" attribute \ we don't support ET or anything non-32bits @@ -102,7 +107,8 @@ h# 7c xdrint " burst-sizes" attribute headers -1 instance value mregs-ddrphy-virt --1 instance value mregs-sdramdfii-virt +-1 instance value mregs-sdram-virt +-1 instance value mregs-exchange_with_mem-virt my-address constant my-sbus-address my-space constant my-sbus-space : map-in ( adr space size -- virt ) " map-in" $call-parent ; @@ -110,11 +116,13 @@ my-space constant my-sbus-space : map-in-mregs ( -- ) my-sbus-address sbusfpga_csraddr_ddrphy + my-sbus-space h# 1000 map-in is mregs-ddrphy-virt - my-sbus-address sbusfpga_csraddr_sdram + my-sbus-space h# 1000 map-in is mregs-sdramdfii-virt + my-sbus-address sbusfpga_csraddr_sdram + my-sbus-space h# 1000 map-in is mregs-sdram-virt + my-sbus-address sbusfpga_csraddr_exchange_with_mem + my-sbus-space h# 1000 map-in is mregs-exchange_with_mem-virt ; : map-out-mregs ( -- ) mregs-ddrphy-virt h# 1000 map-out - mregs-sdramdfii-virt h# 1000 map-out + mregs-sdram-virt h# 1000 map-out + mregs-exchange_with_mem-virt h# 1000 map-out ; \ fload sdram_init.fth diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index 8155104..280685c 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -29,6 +29,7 @@ ROM_ADDR_PFX = Signal(12, reset = 0) WISHBONE_CSR_ADDR_PFX = Signal(12, reset = 4) USBOHCI_ADDR_PFX = Signal(12, reset = 8) SRAM_ADDR_PFX = Signal(12, reset = 9) +SDRAM_ADDR_PFX = Signal(12, reset = 2048) wishbone_default_timeout = 120 ## must be > sbus_default_timeout sbus_default_timeout = 100 ## must be below 127 as we can wait twice on it inside the 255 cycles @@ -171,12 +172,23 @@ LED_M_READ = 0x20 LED_M_CACHE = 0x40 class SBusFPGABus(Module): - def __init__(self, platform, hold_reset, wishbone_slave, wishbone_master): + def __init__(self, platform, hold_reset, wishbone_slave, wishbone_master, tosbus_fifo, fromsbus_fifo, fromsbus_req_fifo, burst_size = 8): self.platform = platform self.hold_reset = hold_reset self.wishbone_slave = wishbone_slave self.wishbone_master = wishbone_master + + self.tosbus_fifo = tosbus_fifo + self.fromsbus_fifo = fromsbus_fifo + self.fromsbus_req_fifo = fromsbus_req_fifo + + data_width = burst_size * 4 + data_width_bits = burst_size * 32 + blk_addr_width = 32 - log2_int(data_width) # 27 for burst_size == 8 + + fifo_blk_addr = Signal(blk_addr_width) + fifo_buffer = Signal(data_width_bits) pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") SBUS_DATA_OE_LED_o = Signal() @@ -266,6 +278,8 @@ class SBusFPGABus(Module): data_read_enable = Signal() # start enqueuing req. to read from WB master_data = Signal(32) # could be merged with p_data + master_data_src_tosbus_fifo = Signal() + master_data_src_fromsbus_fifo = Signal() master_addr = Signal(30) # could be meged with data_read_addr master_size = Signal(4) master_idx = Signal(2) @@ -278,7 +292,7 @@ class SBusFPGABus(Module): wishbone_slave_timeout = Signal(6) sbus_slave_timeout = Signal(6) - sbus_master_throttle = Signal(4) + sbus_master_throttle = Signal(2) #self.submodules.led_display = LedDisplay(platform.request_all("user_led")) @@ -340,6 +354,9 @@ class SBusFPGABus(Module): self.submodules.slave_fsm = slave_fsm = FSM(reset_state="Reset") self.sync += platform.request("user_led", 5).eq(~slave_fsm.ongoing("Idle")) + + self.sync += platform.request("user_led", 6).eq(master_data_src_tosbus_fifo) + self.sync += platform.request("user_led", 7).eq(master_data_src_fromsbus_fifo) slave_fsm.act("Reset", #NextValue(self.led_display.value, 0x0000000000), @@ -387,7 +404,8 @@ class SBusFPGABus(Module): ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX) | - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX) | + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SDRAM_ADDR_PFX)), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), @@ -419,7 +437,8 @@ class SBusFPGABus(Module): NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), If(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX) | - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)| + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SDRAM_ADDR_PFX)), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), @@ -456,7 +475,8 @@ class SBusFPGABus(Module): #NextValue(led0123, led0123 | LED_PARITY), NextState("Slave_Error") ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX) | - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)| + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SDRAM_ADDR_PFX)), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), @@ -502,7 +522,8 @@ class SBusFPGABus(Module): NextState("Slave_Error") ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX) | - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX) | + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SDRAM_ADDR_PFX)), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), If(~self.wishbone_master.cyc, NextValue(SBUS_3V3_ACKs_o, ACK_WORD), @@ -528,7 +549,8 @@ class SBusFPGABus(Module): (SBUS_3V3_PPRD_i == 0)), NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), - If((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), + If(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX) | + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SDRAM_ADDR_PFX)), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), If(~self.wishbone_master.cyc, NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), @@ -559,7 +581,8 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(led0123, led0123 | LED_PARITY), NextState("Slave_Error") - ).Elif((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX), + ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX) | + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SDRAM_ADDR_PFX)), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), If(~self.wishbone_master.cyc, NextValue(SBUS_3V3_ACKs_o, ACK_HWORD), @@ -689,6 +712,70 @@ class SBusFPGABus(Module): #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0x00), self.wishbone_slave.adr)), #NextValue(self.led_display.value, Cat(Signal(8, reset = LED_M_READ), Signal(2, reset = 0), self.master_read_buffer_addr)), NextState("Master_Translation") + ).Elif(SBUS_3V3_BGs_i & + self.tosbus_fifo.readable & + (sbus_master_throttle == 0), + NextValue(SBUS_3V3_BRs_o, 0) + ).Elif(~SBUS_3V3_BGs_i & + self.tosbus_fifo.readable, + NextValue(sbus_wishbone_le, 0), # checkme + NextValue(SBUS_3V3_BRs_o, 1), # relinquish the request + NextValue(sbus_oe_data, 1), ## output data (at least for @ during translation) + NextValue(sbus_oe_slave_in, 1), ## PPRD, SIZ becomes output + NextValue(sbus_oe_master_in, 0), ## ERRs, ACKs are input + NextValue(burst_counter, 0), + NextValue(burst_limit_m1, burst_size - 1), + NextValue(SBUS_3V3_D_o, self.tosbus_fifo.dout[0:32]), + NextValue(master_addr, self.tosbus_fifo.dout[2:32]), + NextValue(master_data, self.tosbus_fifo.dout[32:64]), + NextValue(fifo_buffer, self.tosbus_fifo.dout[32:]), + NextValue(master_data_src_tosbus_fifo, 1), + self.tosbus_fifo.re.eq(1), + Case(burst_size, { + 2 : [NextValue(SBUS_3V3_SIZ_o, SIZ_BURST2), + NextValue(master_size, SIZ_BURST2)], + 4 : [NextValue(SBUS_3V3_SIZ_o, SIZ_BURST4), + NextValue(master_size, SIZ_BURST4)], + 8 : [NextValue(SBUS_3V3_SIZ_o, SIZ_BURST8), + NextValue(master_size, SIZ_BURST8)], + 16 : [NextValue(SBUS_3V3_SIZ_o, SIZ_BURST16), + NextValue(master_size, SIZ_BURST16)], + }), + NextValue(SBUS_3V3_PPRD_o, 0), + NextValue(master_we, 1), + NextState("Master_Translation") + ).Elif(SBUS_3V3_BGs_i & + self.fromsbus_req_fifo.readable & + self.fromsbus_fifo.writable & + (sbus_master_throttle == 0), + NextValue(SBUS_3V3_BRs_o, 0) + ).Elif(~SBUS_3V3_BGs_i & + self.fromsbus_req_fifo.readable & + self.fromsbus_fifo.writable, + NextValue(sbus_wishbone_le, 0), # checkme + NextValue(SBUS_3V3_BRs_o, 1), # relinquish the request + NextValue(sbus_oe_data, 1), ## output data (at least for @ during translation) + NextValue(sbus_oe_slave_in, 1), ## PPRD, SIZ becomes output + NextValue(sbus_oe_master_in, 0), ## ERRs, ACKs are input + NextValue(burst_counter, 0), + NextValue(burst_limit_m1, burst_size - 1), + NextValue(SBUS_3V3_D_o, self.fromsbus_req_fifo.dout[blk_addr_width:blk_addr_width+32]), + NextValue(fifo_blk_addr, self.fromsbus_req_fifo.dout[0:blk_addr_width]), + NextValue(master_data_src_fromsbus_fifo, 1), + self.fromsbus_req_fifo.re.eq(1), + Case(burst_size, { + 2 : [NextValue(SBUS_3V3_SIZ_o, SIZ_BURST2), + NextValue(master_size, SIZ_BURST2)], + 4 : [NextValue(SBUS_3V3_SIZ_o, SIZ_BURST4), + NextValue(master_size, SIZ_BURST4)], + 8 : [NextValue(SBUS_3V3_SIZ_o, SIZ_BURST8), + NextValue(master_size, SIZ_BURST8)], + 16 : [NextValue(SBUS_3V3_SIZ_o, SIZ_BURST16), + NextValue(master_size, SIZ_BURST16)], + }), + NextValue(SBUS_3V3_PPRD_o, 1), + NextValue(master_we, 0), + NextState("Master_Translation") ).Elif(((SBUS_3V3_SELs_i == 0) & (SBUS_3V3_ASs_i == 0)), NextValue(sbus_oe_master_in, 1), @@ -1050,6 +1137,10 @@ class SBusFPGABus(Module): If(master_we, NextValue(sbus_oe_data, 1), Case(master_size, { + SIZ_BURST2: NextValue(SBUS_3V3_D_o, master_data), + SIZ_BURST4: NextValue(SBUS_3V3_D_o, master_data), + SIZ_BURST8: NextValue(SBUS_3V3_D_o, master_data), + SIZ_BURST16: NextValue(SBUS_3V3_D_o, master_data), SIZ_WORD: NextValue(SBUS_3V3_D_o, master_data), SIZ_BYTE: Case(master_idx, { 0: NextValue(SBUS_3V3_D_o, Cat(master_data[ 0: 8], @@ -1075,7 +1166,10 @@ class SBusFPGABus(Module): 2: NextValue(SBUS_3V3_D_o, Cat(master_data[16:32], master_data[16:32],)), }) - }) + }), + If(master_data_src_tosbus_fifo, + NextValue(master_data, fifo_buffer[32:64]), # 0:32 is on the bus already + ), ).Else( NextValue(sbus_oe_data, 0) ), @@ -1096,9 +1190,10 @@ class SBusFPGABus(Module): NextState("Idle")], ACK_IDLE: [If(master_we, - NextState("Master_Write") + NextState("Master_Write"), ## FIXME: in burst mode, should update master_data with the next value ## FIXME: we don't do burst mode yet + ## FIXME: actually now from FIFO is handled above ).Else( NextState("Master_Read") )], @@ -1140,8 +1235,29 @@ class SBusFPGABus(Module): ) slave_fsm.act("Master_Read_Ack", #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x0b), self.led_display.value[8:40])), - NextValue(self.master_read_buffer_data[burst_counter[0:2]], SBUS_3V3_D_i), - NextValue(self.master_read_buffer_done[burst_counter[0:2]], 1), + If(master_data_src_fromsbus_fifo, + Case(burst_counter, { + 0: NextValue(fifo_buffer[0:32], SBUS_3V3_D_i), + 1: NextValue(fifo_buffer[32:64], SBUS_3V3_D_i), + 2: NextValue(fifo_buffer[64:96], SBUS_3V3_D_i), + 3: NextValue(fifo_buffer[96:128], SBUS_3V3_D_i), + 4: NextValue(fifo_buffer[128:160], SBUS_3V3_D_i), + 5: NextValue(fifo_buffer[160:192], SBUS_3V3_D_i), + 6: NextValue(fifo_buffer[192:224], SBUS_3V3_D_i), + 7: NextValue(fifo_buffer[224:256], SBUS_3V3_D_i), +# 8: NextValue(fifo_buffer[256:288], SBUS_3V3_D_i), +# 9: NextValue(fifo_buffer[288:320], SBUS_3V3_D_i), +# 10: NextValue(fifo_buffer[320:352], SBUS_3V3_D_i), +# 11: NextValue(fifo_buffer[352:384], SBUS_3V3_D_i), +# 12: NextValue(fifo_buffer[384:416], SBUS_3V3_D_i), +# 13: NextValue(fifo_buffer[416:448], SBUS_3V3_D_i), +# 14: NextValue(fifo_buffer[448:480], SBUS_3V3_D_i), +# 15: NextValue(fifo_buffer[480:512], SBUS_3V3_D_i), + }), + ).Else( + NextValue(self.master_read_buffer_data[burst_counter[0:2]], SBUS_3V3_D_i), + NextValue(self.master_read_buffer_done[burst_counter[0:2]], 1), + ), NextValue(burst_counter, burst_counter + 1), If(burst_counter == burst_limit_m1, NextValue(self.master_read_buffer_start, 0), @@ -1167,6 +1283,11 @@ class SBusFPGABus(Module): ) slave_fsm.act("Master_Read_Finish", ## missing the handling of late error #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x0c), self.led_display.value[8:40])), + If(master_data_src_fromsbus_fifo, + fromsbus_fifo.we.eq(1), + fromsbus_fifo.din.eq(Cat(fifo_blk_addr, fifo_buffer)), + NextValue(master_data_src_fromsbus_fifo, 0), + ), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), @@ -1178,9 +1299,33 @@ class SBusFPGABus(Module): ACK_WORD: # FIXME: check againt master_size ? [If(burst_counter == burst_limit_m1, NextState("Master_Write_Final"), + If(master_data_src_tosbus_fifo, + NextValue(master_data_src_tosbus_fifo, 0), + ) ).Else( - NextValue(SBUS_3V3_D_o, master_data), ## FIXME: we're not updating master_data for burst mode yet + NextValue(SBUS_3V3_D_o, master_data), NextValue(burst_counter, burst_counter + 1), + If(master_data_src_tosbus_fifo, + Case(burst_counter, { #0:32 just ack'd, 32:64 is on the bus now, burst_counter will only increment for the next cycle, so we're two steps ahead + 0: NextValue(master_data, fifo_buffer[64:96]), + 1: NextValue(master_data, fifo_buffer[96:128]), + 2: NextValue(master_data, fifo_buffer[128:160]), + 3: NextValue(master_data, fifo_buffer[160:192]), + 4: NextValue(master_data, fifo_buffer[192:224]), + 5: NextValue(master_data, fifo_buffer[224:256]), +# 6: NextValue(master_data, fifo_buffer[256:288]), +# 7: NextValue(master_data, fifo_buffer[288:320]), +# 8: NextValue(master_data, fifo_buffer[320:352]), +# 9: NextValue(master_data, fifo_buffer[352:384]), +# 10: NextValue(master_data, fifo_buffer[384:416]), +# 11: NextValue(master_data, fifo_buffer[416:448]), +# 12: NextValue(master_data, fifo_buffer[448:480]), +# 13: NextValue(master_data, fifo_buffer[480:512]), + #14: NextValue(master_data, fifo_buffer[512:544]), + #15: NextValue(master_data, fifo_buffer[544:576]), + "default": NextValue(master_data, 0), + }) + ), )], ACK_BYTE: # FIXME: check againt master_size ? [NextState("Master_Write_Final"), @@ -1210,7 +1355,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), - NextValue(sbus_master_throttle, 7), + NextValue(sbus_master_throttle, 3), NextState("Idle") ) # ##### FINISHED ##### diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 2a259f7..f795598 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -17,6 +17,7 @@ from litedram.modules import MT41J128M16 from litedram.phy import s7ddrphy from sbus_to_fpga_fsm import *; +from sbus_to_fpga_blk_dma import *; import sbus_to_fpga_export; @@ -107,7 +108,7 @@ class SBusFPGA(SoCCore): # Anything at 0x10000000 is therefore unreachable directly # The position of the 'usb_fake_dma' is so it overlaps # the virtual address space used by NetBSD DMA allocators - wb_mem_map = { + self.wb_mem_map = wb_mem_map = { "prom": 0x00000000, "csr" : 0x00040000, "usb_host": 0x00080000, @@ -171,16 +172,33 @@ class SBusFPGA(SoCCore): wishbone_master_sys = wishbone.Interface(data_width=self.bus.data_width) self.submodules.wishbone_master_sbus = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_master_sys, cd_master="sbus", cd_slave="sys") self.submodules.wishbone_slave_sys = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_slave_sbus, cd_master="sys", cd_slave="sbus") + + burst_size=8 + self.submodules.tosbus_fifo = ClockDomainsRenamer({"read": "sbus", "write": "sys"})(AsyncFIFOBuffered(width=(32+burst_size*32), depth=4)) + self.submodules.fromsbus_fifo = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(AsyncFIFOBuffered(width=((30-log2_int(burst_size))+burst_size*32), depth=4)) + self.submodules.fromsbus_req_fifo = ClockDomainsRenamer({"read": "sbus", "write": "sys"})(AsyncFIFOBuffered(width=((30-log2_int(burst_size))+32), depth=16)) + + self.submodules.exchange_with_mem = ExchangeWithMem(soc=self, + tosbus_fifo=self.tosbus_fifo, + fromsbus_fifo=self.fromsbus_fifo, + fromsbus_req_fifo=self.fromsbus_req_fifo, + burst_size=burst_size) _sbus_bus = SBusFPGABus(platform=self.platform, hold_reset=hold_reset, wishbone_slave=wishbone_slave_sbus, - wishbone_master=self.wishbone_master_sbus) + wishbone_master=self.wishbone_master_sbus, + tosbus_fifo=self.tosbus_fifo, + fromsbus_fifo=self.fromsbus_fifo, + fromsbus_req_fifo=self.fromsbus_req_fifo, + burst_size=burst_size) #self.submodules.sbus_bus = _sbus_bus self.submodules.sbus_bus = ClockDomainsRenamer("sbus")(_sbus_bus) self.bus.add_master(name="SBusBridgeToWishbone", master=wishbone_master_sys) self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_slave_sys, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) + self.bus.add_master(name="mem_read_master", master=self.exchange_with_mem.wishbone_r_master) + self.bus.add_master(name="mem_write_master", master=self.exchange_with_mem.wishbone_w_master) self.submodules.ddrphy = s7ddrphy.A7DDRPHY(platform.request("ddram"), memtype = "DDR3", @@ -189,10 +207,10 @@ class SBusFPGA(SoCCore): self.add_sdram("sdram", phy = self.ddrphy, module = MT41J128M16(sys_clk_freq, "1:4"), - l2_cache_size = 0 + l2_cache_size = 0, ) - self.add_sdcard() + #self.add_sdcard() def main(): parser = argparse.ArgumentParser(description="SbusFPGA") From 0bd6b69dd9ac62041417b4af788fed32ea437df2 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 18 Jul 2021 02:47:06 -0400 Subject: [PATCH 37/78] Oups forgot the DMA engine (this version with wishbone Converter) --- .../sbus_to_fpga_blk_dma.py | 176 ++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py new file mode 100644 index 0000000..5695cf6 --- /dev/null +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py @@ -0,0 +1,176 @@ +from migen import * +from migen.genlib.fifo import * +from litex.soc.interconnect.csr import * +from litex.soc.interconnect import wishbone + +# width of towrite_fifo is '32'+'burst_size * 32' (vaddr + data) +# so the SBus DMA has all the needed info +# width of fromsbus_req_fifo is 'blk_addr_width' + 'vaddr' (blk_addr + vaddr) +# width of fromsbus_fifo is 'blk_addr_width' + 'burst_size * 32' (blk_addr + data) +# the blk_addr does the round-trip to accompany the data +class ExchangeWithMem(Module, AutoCSR): + def __init__(self, soc, tosbus_fifo, fromsbus_fifo, fromsbus_req_fifo, burst_size = 8): + self.wishbone_r_slave = wishbone.Interface(data_width=soc.bus.data_width) + self.wishbone_w_slave = wishbone.Interface(data_width=soc.bus.data_width) + self.tosbus_fifo = tosbus_fifo + self.fromsbus_fifo = fromsbus_fifo + self.fromsbus_req_fifo = fromsbus_req_fifo + + data_width = burst_size * 4 + data_width_bits = burst_size * 32 + blk_addr_width = 32 - log2_int(data_width) # 27 for burst_size == 8 + + self.wishbone_r_master = wishbone.Interface(data_width=data_width_bits) + self.wishbone_w_master = wishbone.Interface(data_width=data_width_bits) + + self.submodules += wishbone.Converter(self.wishbone_r_master, self.wishbone_r_slave) + self.submodules += wishbone.Converter(self.wishbone_w_master, self.wishbone_w_slave) + + print("ExchangeWithMem: data_width = {}, data_width_bits = {}, blk_addr_width = {}\n".format(data_width, data_width_bits, blk_addr_width)) + print("ExchangeWithMem: tosbus_fifo width = {}, fromsbus_fifo width = {}, fromsbus_req_fifo width = {}\n".format(len(tosbus_fifo.din), len(fromsbus_fifo.dout), len(fromsbus_req_fifo.din))) + + local_r_addr = Signal(blk_addr_width) + dma_r_addr = Signal(32) + #local_r_widx = Signal(log2_int(burst_size)) # so width is 3 for burst_size == 8 + #local_r_buffer = Signal(data_width_bits) + + local_w_addr = Signal(blk_addr_width) + dma_w_addr = Signal(32) + #local_w_widx = Signal(log2_int(burst_size)) # so width is 3 for burst_size == 8 + #local_w_buffer = Signal(data_width_bits) + + max_block_bits=16 + + # CSRConstant do not seem to appear in the CSR Map, but they need to be accessible to the OS driver + #self.blk_size = CSRConstant(value=data_width) # report the block size to the SW layer + #self.blk_base = CSRConstant(value=soc.wb_mem_map["main_ram"] >> log2_int(data_width)) # report where the blk starts + self.blk_size = CSRStatus(32) # report the block size to the SW layer + self.blk_base = CSRStatus(32) # report where the blk starts + self.comb += self.blk_size.status.eq(data_width) + self.comb += self.blk_base.status.eq(soc.wb_mem_map["main_ram"] >> log2_int(data_width)) + + self.blk_addr = CSRStorage(32, description = "SDRAM Block address to read/write from Wishbone memory (block of size {})".format(data_width)) + self.dma_addr = CSRStorage(32, description = "Host Base address where to write/read data (i.e. SPARC Virtual addr)") + self.blk_cnt = CSRStorage(32, write_from_dev=True, description = "How many blk to read/write (max 2^{}-1); bit 31 is RD".format(max_block_bits), reset = 0) + self.last_blk = CSRStatus(32, description = "Last Blk addr finished on WB side") + self.last_dma = CSRStatus(32, description = "Last DMA addr finished on WB side") + self.blk_rem = CSRStatus(32, description = "How many block remaining; bit 31 is RD", reset = 0) + self.dma_status = CSRStatus(32, description = "Status register") + self.wr_tosdram = CSRStatus(32, description = "Last address written to SDRAM") + + self.submodules.req_r_fsm = req_r_fsm = FSM(reset_state="Reset") + self.submodules.req_w_fsm = req_w_fsm = FSM(reset_state="Reset") + + self.comb += self.dma_status.status[0:1].eq(~req_r_fsm.ongoing("Idle")) # Read FSM Busy + self.comb += self.dma_status.status[1:2].eq(~req_w_fsm.ongoing("Idle")) # Write FSM Busy + self.comb += self.dma_status.status[2:3].eq(self.fromsbus_fifo.readable) # Some data available to write to memory + + self.comb += self.dma_status.status[8:9].eq(req_w_fsm.ongoing("ReqToMemory")) + self.comb += self.dma_status.status[9:10].eq(req_w_fsm.ongoing("WaitForAck")) + + self.comb += self.dma_status.status[16:17].eq(self.wishbone_w_master.cyc) # show the WB iface status (W) + self.comb += self.dma_status.status[17:18].eq(self.wishbone_w_master.stb) + self.comb += self.dma_status.status[18:19].eq(self.wishbone_w_master.we) + self.comb += self.dma_status.status[19:20].eq(self.wishbone_w_master.ack) + self.comb += self.dma_status.status[20:21].eq(self.wishbone_w_master.err) + + self.comb += self.dma_status.status[24:25].eq(self.wishbone_r_master.cyc) # show the WB iface status (R) + self.comb += self.dma_status.status[25:26].eq(self.wishbone_r_master.stb) + self.comb += self.dma_status.status[26:27].eq(self.wishbone_r_master.we) + self.comb += self.dma_status.status[27:28].eq(self.wishbone_r_master.ack) + self.comb += self.dma_status.status[28:29].eq(self.wishbone_r_master.err) + + req_r_fsm.act("Reset", + NextState("Idle") + ) + req_r_fsm.act("Idle", + If(((self.blk_cnt.storage[0:max_block_bits] != 0) & # checking self.blk_cnt.re might be too transient ? -> need to auto-reset + (~self.blk_cnt.storage[31:32])), # !read -> write + NextValue(local_r_addr, self.blk_addr.storage), + NextValue(dma_r_addr, self.dma_addr.storage), + NextValue(self.blk_rem.status, Cat(self.blk_cnt.storage[0:max_block_bits], Signal(32-max_block_bits, reset = 0))), + NextState("ReqFromMemory") + ).Elif(((self.blk_cnt.storage[0:max_block_bits] != 0) & # checking self.blk_cnt.re might be too transient ? -> need to auto-reset + (self.blk_cnt.storage[31:32])), # read + NextValue(local_r_addr, self.blk_addr.storage), + NextValue(dma_r_addr, self.dma_addr.storage), + NextValue(self.blk_rem.status, Cat(self.blk_cnt.storage[0:max_block_bits], Signal(32-max_block_bits, reset = 0))), + NextState("QueueReqToMemory") + ) + ) + req_r_fsm.act("ReqFromMemory", + If(~self.wishbone_r_master.ack, + NextValue(self.wishbone_r_master.cyc, 1), + NextValue(self.wishbone_r_master.stb, 1), + NextValue(self.wishbone_r_master.sel, 2**len(self.wishbone_r_master.sel)-1), + NextValue(self.wishbone_r_master.we, 0), + NextValue(self.wishbone_r_master.adr, local_r_addr), + NextState("WaitForData") + ) + ) + req_r_fsm.act("WaitForData", + If(self.wishbone_r_master.ack & + self.tosbus_fifo.writable, + NextValue(self.wishbone_r_master.cyc, 0), + NextValue(self.wishbone_r_master.stb, 0), + tosbus_fifo.we.eq(1), + tosbus_fifo.din.eq(Cat(dma_r_addr, self.wishbone_r_master.dat_r)), + NextValue(self.last_blk.status, local_r_addr), + NextValue(self.last_dma.status, dma_r_addr), + NextValue(self.blk_rem.status, self.blk_rem.status - 1), + If(self.blk_rem.status[0:max_block_bits] <= 1, + self.blk_cnt.we.eq(1), ## auto-reset + self.blk_cnt.dat_w.eq(0), + NextState("Idle"), + ).Else( + NextValue(local_r_addr, local_r_addr + 1), + NextValue(dma_r_addr, dma_r_addr + data_width), + NextState("ReqFromMemory"), + ) + ) + ) + req_r_fsm.act("QueueReqToMemory", + If(self.fromsbus_req_fifo.writable, + self.fromsbus_req_fifo.we.eq(1), + self.fromsbus_req_fifo.din.eq(Cat(local_r_addr, dma_r_addr)), + NextValue(self.last_blk.status, local_r_addr), + NextValue(self.last_dma.status, dma_r_addr), + NextValue(self.blk_rem.status, self.blk_rem.status - 1), + If(self.blk_rem.status[0:max_block_bits] <= 1, + self.blk_cnt.we.eq(1), ## auto-reset + self.blk_cnt.dat_w.eq(0), + NextState("Idle"), + ).Else( + NextValue(local_r_addr, local_r_addr + 1), + NextValue(dma_r_addr, dma_r_addr + data_width), + NextValue(self.blk_rem.status, self.blk_rem.status - 1), + NextState("QueueReqToMemory"), #redundant + ) + ) + ) + + + req_w_fsm.act("Reset", + NextState("Idle") + ) + req_w_fsm.act("Idle", + If(self.fromsbus_fifo.readable & + ~self.wishbone_w_master.ack, + self.fromsbus_fifo.re.eq(1), + NextValue(self.wishbone_w_master.cyc, 1), + NextValue(self.wishbone_w_master.stb, 1), + NextValue(self.wishbone_w_master.sel, 2**len(self.wishbone_w_master.sel)-1), + NextValue(self.wishbone_w_master.we, 1), + NextValue(self.wishbone_w_master.adr, self.fromsbus_fifo.dout[0:blk_addr_width]), + NextValue(self.wishbone_w_master.dat_w, self.fromsbus_fifo.dout[blk_addr_width:(blk_addr_width + data_width_bits)]), + NextValue(self.wr_tosdram.status, self.fromsbus_fifo.dout[0:blk_addr_width]), + NextState("WaitForAck") + ) + ) + req_w_fsm.act("WaitForAck", + If(self.wishbone_w_master.ack, + NextValue(self.wishbone_w_master.cyc, 0), + NextValue(self.wishbone_w_master.stb, 0), + NextState("Idle"), + ) + ) From b86cf18e19921ae274182f7fbfc791a36a093c8c Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 18 Jul 2021 02:47:15 -0400 Subject: [PATCH 38/78] Oups forgot the DMA engine (this version with wishbone Converter) --- sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index f795598..d447122 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -197,8 +197,8 @@ class SBusFPGA(SoCCore): self.bus.add_master(name="SBusBridgeToWishbone", master=wishbone_master_sys) self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_slave_sys, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) - self.bus.add_master(name="mem_read_master", master=self.exchange_with_mem.wishbone_r_master) - self.bus.add_master(name="mem_write_master", master=self.exchange_with_mem.wishbone_w_master) + self.bus.add_master(name="mem_read_master", master=self.exchange_with_mem.wishbone_r_slave) + self.bus.add_master(name="mem_write_master", master=self.exchange_with_mem.wishbone_w_slave) self.submodules.ddrphy = s7ddrphy.A7DDRPHY(platform.request("ddram"), memtype = "DDR3", From 2f5b4eecfbeabc82c8a42201f799830093d2d4e3 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 18 Jul 2021 02:52:37 -0400 Subject: [PATCH 39/78] blk dev/dk support for sbusfpga_sdram --- .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c | 459 ++++++++++++++++-- .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h | 3 +- 2 files changed, 412 insertions(+), 50 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c index 035e98b..76fb285 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c @@ -43,6 +43,14 @@ __KERNEL_RCSID(0, "$NetBSD$"); #include +#include +#include +#include + +#include +#include +#include + #include #include @@ -56,6 +64,19 @@ CFATTACH_DECL_NEW(sbusfpga_sdram, sizeof(struct sbusfpga_sdram_softc), dev_type_open(sbusfpga_sdram_open); dev_type_close(sbusfpga_sdram_close); dev_type_ioctl(sbusfpga_sdram_ioctl); +dev_type_strategy(sbusfpga_sdram_strategy); +dev_type_size(sbusfpga_sdram_size); + +const struct bdevsw sbusfpga_sdram_bdevsw = { + .d_open = sbusfpga_sdram_open, + .d_close = sbusfpga_sdram_close, + .d_strategy = sbusfpga_sdram_strategy, + .d_ioctl = sbusfpga_sdram_ioctl, + .d_dump = nodump, + .d_psize = sbusfpga_sdram_size, + .d_discard = nodiscard, + .d_flag = D_DISK +}; const struct cdevsw sbusfpga_sdram_cdevsw = { .d_open = sbusfpga_sdram_open, @@ -72,14 +93,59 @@ const struct cdevsw sbusfpga_sdram_cdevsw = { .d_flag = 0 }; +static void sbusfpga_sdram_set_geometry(struct sbusfpga_sdram_softc *sc); +static void sbusfpga_sdram_minphys(struct buf *); +static int sbusfpga_sdram_diskstart(device_t self, struct buf *bp); + +struct dkdriver sbusfpga_sdram_dkdriver = { + .d_strategy = sbusfpga_sdram_strategy, + .d_minphys = sbusfpga_sdram_minphys, + .d_diskstart = sbusfpga_sdram_diskstart +}; + extern struct cfdriver sbusfpga_sdram_cd; + +static int sbusfpga_sdram_read_block(struct sbusfpga_sdram_softc *sc, const u_int32_t block, const u_int32_t blkcnt, void *data); +static int sbusfpga_sdram_write_block(struct sbusfpga_sdram_softc *sc, const u_int32_t block, const u_int32_t blkcnt, void *data); + int sbusfpga_sdram_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) { - //struct sbusfpga_sdram_softc *sc = device_lookup_private(&sbusfpga_sdram_cd, minor(dev)); - int err = 0; + struct sbusfpga_sdram_softc *sc = device_lookup_private(&sbusfpga_sdram_cd, minor(dev)); + int err = 0, err2 = 0; + + if (sc == NULL) { + aprint_error("%s:%d: sc == NULL! giving up\n", __PRETTY_FUNCTION__, __LINE__); + return (ENXIO); + } - switch (cmd) { + switch (cmd) { + /* case VNDIOCCLR: */ + /* case VNDIOCCLR50: */ + case DIOCGDINFO: + case DIOCSDINFO: + case DIOCWDINFO: + case DIOCGPARTINFO: + case DIOCKLABEL: + case DIOCWLABEL: + case DIOCCACHESYNC: +#ifdef __HAVE_OLD_DISKLABEL + case ODIOCGDINFO: + case ODIOCSDINFO: + case ODIOCWDINFO: + case ODIOCGDEFLABEL: +#endif + case DIOCDWEDGE: + case DIOCAWEDGE: + case DIOCLWEDGES: + case DIOCRMWEDGES: + case DIOCMWEDGES: + case DIOCGWEDGEINFO: + case DIOCGDEFLABEL: + err2 = dk_ioctl(&sc->dk, dev, cmd, data, flag, l); + if (err2 != EPASSTHROUGH) + err = err2; + break; default: err = EINVAL; break; @@ -88,15 +154,43 @@ sbusfpga_sdram_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l } int -sbusfpga_sdram_open(dev_t dev, int flags, int mode, struct lwp *l) +sbusfpga_sdram_open(dev_t dev, int flag, int fmt, struct lwp *l) { - return (0); + struct sbusfpga_sdram_softc *sd = device_lookup_private(&sbusfpga_sdram_cd, DISKUNIT(dev)); + struct dk_softc *dksc; + int error; + + if (sd == NULL) { + aprint_error("%s:%d: sd == NULL! giving up\n", __PRETTY_FUNCTION__, __LINE__); + return (ENXIO); + } else { + aprint_normal("%s:%d: open device, part is %d\n", __PRETTY_FUNCTION__, __LINE__, DISKPART(dev)); + } + dksc = &sd->dk; + + if (!device_is_active(dksc->sc_dev)) { + return (ENODEV); + } + + error = dk_open(dksc, dev, flag, fmt, l); + + return error; } int -sbusfpga_sdram_close(dev_t dev, int flags, int mode, struct lwp *l) +sbusfpga_sdram_close(dev_t dev, int flag, int fmt, struct lwp *l) { - return (0); + struct sbusfpga_sdram_softc *sd = device_lookup_private(&sbusfpga_sdram_cd, DISKUNIT(dev)); + struct dk_softc *dksc; + + if (sd == NULL) { + aprint_error("%s:%d: sd == NULL! giving up\n", __PRETTY_FUNCTION__, __LINE__); + return (ENXIO); + } + + dksc = &sd->dk; + + return dk_close(dksc, dev, flag, fmt, l); } int @@ -130,7 +224,7 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) sc->sc_bustag = sa->sa_bustag; sc->sc_dmatag = sa->sa_dmatag; - sc->sc_dev = self; + sc->dk.sc_dev = self; aprint_normal("\n"); @@ -235,8 +329,168 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) aprint_error_dev(self, "DMA-MEMTEST failed for SDRAM\n"); return; } + + /* we seem OK hardware-wise */ + + + dk_init(&sc->dk, self, DKTYPE_FLASH); + disk_init(&sc->dk.sc_dkdev, device_xname(sc->dk.sc_dev), &sbusfpga_sdram_dkdriver); + dk_attach(&sc->dk); + disk_attach(&sc->dk.sc_dkdev); + sbusfpga_sdram_set_geometry(sc); + + bufq_alloc(&sc->dk.sc_bufq, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK); /* needed ? */ + + /* + aprint_normal_dev(self, "sc->dk.sc_dkdev.dk_blkshift = %d\n", sc->dk.sc_dkdev.dk_blkshift); + aprint_normal_dev(self, "sc->dk.sc_dkdev.dk_byteshift = %d\n", sc->dk.sc_dkdev.dk_byteshift); + aprint_normal_dev(self, "sc->dk.sc_dkdev.dk_label = %p\n", sc->dk.sc_dkdev.dk_label); + aprint_normal_dev(self, "sc->dk.sc_dkdev.dk_cpulabel = %p\n", sc->dk.sc_dkdev.dk_cpulabel); + */ } +void +sbusfpga_sdram_strategy(struct buf *bp) +{ + struct sbusfpga_sdram_softc *sc = device_lookup_private(&sbusfpga_sdram_cd, DISKUNIT(bp->b_dev)); + + dk_strategy(&sc->dk, bp); +} + +static void sbusfpga_sdram_set_geometry(struct sbusfpga_sdram_softc *sc) { + struct dk_softc *dksc = &sc->dk; + struct disk_geom *dg = &dksc->sc_dkdev.dk_geom; + + memset(dg, 0, sizeof(*dg)); + + dg->dg_secsize = 512; + dg->dg_nsectors = 32; + dg->dg_ntracks = 32; + dg->dg_ncylinders = 512; + dg->dg_secpercyl = dg->dg_nsectors * dg->dg_ntracks; + dg->dg_secperunit = 524288; //dg->dg_secpercyl * dg->dg_ncylinders; + dg->dg_pcylinders = 512; + dg->dg_sparespertrack = 0; + dg->dg_sparespercyl = 0; + + disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, "sbusfpga_sdram"); +} + + +int +sbusfpga_sdram_size(dev_t dev) { + return 524288; +} + +static void +sbusfpga_sdram_minphys(struct buf *bp) +{ + if (bp->b_bcount > (SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ/512)) + bp->b_bcount = (SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ/512); +} + + +static int +sbusfpga_sdram_diskstart(device_t self, struct buf *bp) +{ + struct sbusfpga_sdram_softc *sc = device_private(self); + int err = 0; + if (sc == NULL) { + aprint_error("%s:%d: sc == NULL! giving up\n", __PRETTY_FUNCTION__, __LINE__); + err = EINVAL; + goto done; + } + aprint_normal_dev(sc->dk.sc_dev, "%s:%d: part %d\n", __PRETTY_FUNCTION__, __LINE__, DISKPART(bp->b_dev)); + aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bflags = 0x%08x\n", __PRETTY_FUNCTION__, __LINE__, bp->b_flags); + aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bufsize = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_bufsize); + aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_blkno = %lld\n", __PRETTY_FUNCTION__, __LINE__, bp->b_blkno); + aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_rawblkno = %lld\n", __PRETTY_FUNCTION__, __LINE__, bp->b_rawblkno); + aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bcount = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_bcount); + + bp->b_resid = bp->b_bcount; + + if (bp->b_bcount == 0) { + goto done; + } + + if (bp->b_flags & B_READ) { + unsigned char* data = bp->b_data; + daddr_t blk = bp->b_rawblkno; + /* struct partition *p = NULL; */ + + /* if (DISKPART(bp->b_dev) != RAW_PART) { */ + /* if ((err = bounds_check_with_label(&sc->dk.sc_dkdev, bp, 0)) <= 0) { */ + /* aprint_error("%s:%d: bounds_check_with_label -> %d\n", __PRETTY_FUNCTION__, __LINE__, err); */ + /* bp->b_resid = bp->b_bcount; */ + /* goto done; */ + /* } */ + /* p = &sc->dk.sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; */ + /* blk = bp->b_blkno + p->p_offset; */ + /* } */ + + while (bp->b_resid >= 512 && !err) { + u_int32_t blkcnt = bp->b_resid / 512; + + if (blkcnt > (SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ/512)) + blkcnt = (SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ/512); + + if (blk+blkcnt <= 524288) { + err = sbusfpga_sdram_read_block(sc, blk, blkcnt, data); + } else { + aprint_error("%s:%d: blk = %lld read out of range! giving up\n", __PRETTY_FUNCTION__, __LINE__, blk); + err = EINVAL; + } + blk += blkcnt; + data += 512 * blkcnt; + bp->b_resid -= 512 * blkcnt; + } + } else { + /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: part %d\n", __PRETTY_FUNCTION__, __LINE__, DISKPART(bp->b_dev)); */ + /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bflags = 0x%08x\n", __PRETTY_FUNCTION__, __LINE__, bp->b_flags); */ + /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bufsize = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_bufsize); */ + /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_blkno = %lld\n", __PRETTY_FUNCTION__, __LINE__, bp->b_blkno); */ + /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_rawblkno = %lld\n", __PRETTY_FUNCTION__, __LINE__, bp->b_rawblkno); */ + /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bcount = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_bcount); */ + unsigned char* data = bp->b_data; + daddr_t blk = bp->b_rawblkno; + /* struct partition *p = NULL; */ + + /* if (DISKPART(bp->b_dev) != RAW_PART) { */ + /* if (bounds_check_with_label(&sc->dk.sc_dkdev, bp, 0) <= 0) { */ + /* bp->b_resid = bp->b_bcount; */ + /* goto done; */ + /* } */ + /* p = &sc->dk.sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; */ + /* blk = bp->b_blkno + p->p_offset; */ + /* } */ + + while (bp->b_resid >= 512 && !err) { + u_int32_t blkcnt = bp->b_resid / 512; + + if (blkcnt > (SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ/512)) + blkcnt = (SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ/512); + + if (blk+blkcnt <= 524288) { + err = sbusfpga_sdram_write_block(sc, blk, blkcnt, data); + } else { + aprint_error("%s:%d: blk = %lld write out of range! giving up\n", __PRETTY_FUNCTION__, __LINE__, blk); + err = EINVAL; + } + blk += blkcnt; + data += 512 * blkcnt; + bp->b_resid -= 512 * blkcnt; + } + } + + /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_resid = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_resid); */ + /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_error = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_error); */ + + done: + biodone(bp); + return err; +} + + #define CONFIG_CSR_DATA_WIDTH 32 // define CSR_LEDS_BASE & others to avoid defining the CSRs of HW we don't handle #define CSR_LEDS_BASE @@ -257,24 +511,24 @@ int dma_init(struct sbusfpga_sdram_softc *sc) { sc->dma_blk_size = exchange_with_mem_blk_size_read(sc); sc->dma_blk_base = exchange_with_mem_blk_base_read(sc); - aprint_normal_dev(sc->sc_dev, "DMA: HW -> block size is %d, base address is 0x%08x\n", sc->dma_blk_size, sc->dma_blk_base * sc->dma_blk_size); + aprint_normal_dev(sc->dk.sc_dev, "DMA: HW -> block size is %d, base address is 0x%08x\n", sc->dma_blk_size, sc->dma_blk_base * sc->dma_blk_size); /* Allocate a dmamap */ if (bus_dmamap_create(sc->sc_dmatag, SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ, 1, SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, &sc->sc_dmamap) != 0) { - aprint_error_dev(sc->sc_dev, "DMA map create failed\n"); + aprint_error_dev(sc->dk.sc_dev, "DMA map create failed\n"); return 0; } else { - aprint_normal_dev(sc->sc_dev, "dmamap: %lu %lu %d (%p)\n", sc->sc_dmamap->dm_maxsegsz, sc->sc_dmamap->dm_mapsize, sc->sc_dmamap->dm_nsegs, sc->sc_dmatag->_dmamap_load); + aprint_normal_dev(sc->dk.sc_dev, "dmamap: %lu %lu %d (%p)\n", sc->sc_dmamap->dm_maxsegsz, sc->sc_dmamap->dm_mapsize, sc->sc_dmamap->dm_nsegs, sc->sc_dmatag->_dmamap_load); } if (bus_dmamem_alloc(sc->sc_dmatag, SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ, 64, 64, &sc->sc_segs, 1, &sc->sc_rsegs, BUS_DMA_NOWAIT | BUS_DMA_STREAMING)) { - aprint_error_dev(sc->sc_dev, "cannot allocate DVMA memory"); + aprint_error_dev(sc->dk.sc_dev, "cannot allocate DVMA memory"); bus_dmamap_destroy(sc->sc_dmatag, sc->sc_dmamap); return 0; } if (bus_dmamem_map(sc->sc_dmatag, &sc->sc_segs, 1, SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ, &sc->sc_dma_kva, BUS_DMA_NOWAIT)) { - aprint_error_dev(sc->sc_dev, "cannot allocate DVMA address"); + aprint_error_dev(sc->dk.sc_dev, "cannot allocate DVMA address"); bus_dmamem_free(sc->sc_dmatag, &sc->sc_segs, 1); bus_dmamap_destroy(sc->sc_dmatag, sc->sc_dmamap); return 0; @@ -282,14 +536,14 @@ dma_init(struct sbusfpga_sdram_softc *sc) { if (bus_dmamap_load(sc->sc_dmatag, sc->sc_dmamap, sc->sc_dma_kva, SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ, /* kernel space */ NULL, BUS_DMA_NOWAIT | BUS_DMA_STREAMING | BUS_DMA_WRITE)) { - aprint_error_dev(sc->sc_dev, "cannot load dma map"); + aprint_error_dev(sc->dk.sc_dev, "cannot load dma map"); bus_dmamem_unmap(sc->sc_dmatag, &sc->sc_dma_kva, SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ); bus_dmamem_free(sc->sc_dmatag, &sc->sc_segs, 1); bus_dmamap_destroy(sc->sc_dmatag, sc->sc_dmamap); return 0; } - aprint_normal_dev(sc->sc_dev, "DMA: SW -> kernal address is %p, dvma address is 0x%08llx\n", sc->sc_dma_kva, sc->sc_dmamap->dm_segs[0].ds_addr); + aprint_normal_dev(sc->dk.sc_dev, "DMA: SW -> kernal address is %p, dvma address is 0x%08llx\n", sc->sc_dma_kva, sc->sc_dmamap->dm_segs[0].ds_addr); return 1; } @@ -305,14 +559,14 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { unsigned int blkcnt ; int count; - aprint_normal_dev(sc->sc_dev, "Initializing DMA buffer.\n"); + aprint_normal_dev(sc->dk.sc_dev, "Initializing DMA buffer.\n"); val = 0xDEADBEEF; for (int i = 0 ; i < testdatasize/sizeof(unsigned long) ; i++) { val = lfsr(32, val); kva_ulong[i] = val; } - aprint_normal_dev(sc->sc_dev, "First value: 0x%08lx\n", kva_ulong[0]); + aprint_normal_dev(sc->dk.sc_dev, "First value: 0x%08lx\n", kva_ulong[0]); if (sc->sc_bufsiz_mmap > 0) { int idx = blkn * sc->dma_blk_size / sizeof(unsigned long), x; @@ -323,20 +577,20 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { count = 0; for (x = idx ; x < bound; x++) { unsigned long data = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_mmap, x*sizeof(unsigned long)); - aprint_normal_dev(sc->sc_dev, "Prior to write [mmap] at %d: 0x%08lx\n", x, data); + aprint_normal_dev(sc->dk.sc_dev, "Prior to write [mmap] at %d: 0x%08lx\n", x, data); } } } bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, 4096, BUS_DMASYNC_PREREAD); - aprint_normal_dev(sc->sc_dev, "Starting DMA Write-to-Sdram.\n"); + aprint_normal_dev(sc->dk.sc_dev, "Starting DMA Write-to-Sdram.\n"); exchange_with_mem_blk_addr_write(sc, blkn + sc->dma_blk_base); exchange_with_mem_dma_addr_write(sc, sc->sc_dmamap->dm_segs[0].ds_addr); exchange_with_mem_blk_cnt_write(sc, 0x80000000 | (testdatasize / sc->dma_blk_size)); - aprint_normal_dev(sc->sc_dev, "DMA Write-to-Sdram started, polling\n"); + aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram started, polling\n"); bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, 4096, BUS_DMASYNC_POSTREAD); @@ -344,7 +598,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { count = 0; while (((blkcnt = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < 10)) { - aprint_normal_dev(sc->sc_dev, "DMA Write-to-Sdram ongoing (%u, status 0x%08x, lastblk req 0x%08x, last phys addr written 0x%08x)\n", + aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram ongoing (%u, status 0x%08x, lastblk req 0x%08x, last phys addr written 0x%08x)\n", blkcnt & 0x0000FFFF, exchange_with_mem_dma_status_read(sc), exchange_with_mem_last_blk_read(sc), @@ -354,7 +608,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { } if (blkcnt) { - aprint_error_dev(sc->sc_dev, "DMA Write-to-Sdram didn't finish ? (%u, status 0x%08x, 0x%08x, 0x%08x, lastblk req 0x%08x, last phys addr written 0x%08x)\n", + aprint_error_dev(sc->dk.sc_dev, "DMA Write-to-Sdram didn't finish ? (%u, status 0x%08x, 0x%08x, 0x%08x, lastblk req 0x%08x, last phys addr written 0x%08x)\n", blkcnt & 0x0000FFFF, exchange_with_mem_dma_status_read(sc), exchange_with_mem_last_dma_read(sc), @@ -363,7 +617,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { exchange_with_mem_wr_tosdram_read(sc)); return 0; } else { - aprint_normal_dev(sc->sc_dev, "DMA Write-to-Sdram done (status 0x%08x, 0x%08x, 0x%08x, 0x%08x, last phys addr written 0x%08x)\n", + aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram done (status 0x%08x, 0x%08x, 0x%08x, 0x%08x, last phys addr written 0x%08x)\n", exchange_with_mem_dma_status_read(sc), exchange_with_mem_last_blk_read(sc), exchange_with_mem_last_dma_read(sc), @@ -373,20 +627,20 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { count = 0; while ((((blkcnt = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 10)) { - aprint_normal_dev(sc->sc_dev, "DMA Write-to-Sdram hasn't reached SDRAM yet (status 0x%08x)\n", blkcnt); + aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram hasn't reached SDRAM yet (status 0x%08x)\n", blkcnt); count ++; delay(500); } if (blkcnt & 0x3) { - aprint_error_dev(sc->sc_dev, "DMA Write-to-Sdram can't reach SDRAM ? (%u, status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", blkcnt & 0x0000FFFF, + aprint_error_dev(sc->dk.sc_dev, "DMA Write-to-Sdram can't reach SDRAM ? (%u, status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", blkcnt & 0x0000FFFF, exchange_with_mem_dma_status_read(sc), exchange_with_mem_last_blk_read(sc), exchange_with_mem_last_dma_read(sc), exchange_with_mem_blk_rem_read(sc)); return 0; } else { - aprint_normal_dev(sc->sc_dev, "DMA Write-to-Sdram has reached SDRAM (status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", + aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram has reached SDRAM (status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", exchange_with_mem_dma_status_read(sc), exchange_with_mem_last_blk_read(sc), exchange_with_mem_last_dma_read(sc), @@ -405,7 +659,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { unsigned long data = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_mmap, x*sizeof(unsigned long)); val = lfsr(32, val); if (val != data) { - aprint_error_dev(sc->sc_dev, "Read-after-write [mmap] error at %d: 0x%08lx vs. 0x%08lx (0x%08lx)\n", x, data, val, val ^ data); + aprint_error_dev(sc->dk.sc_dev, "Read-after-write [mmap] error at %d: 0x%08lx vs. 0x%08lx (0x%08lx)\n", x, data, val, val ^ data); count ++; } } @@ -415,17 +669,17 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { for (int i = 0 ; i < testdatasize/sizeof(unsigned long) ; i++) { kva_ulong[i] = 0x0c0ffee0; } - aprint_normal_dev(sc->sc_dev, "First value: 0x%08lx\n", kva_ulong[0]); + aprint_normal_dev(sc->dk.sc_dev, "First value: 0x%08lx\n", kva_ulong[0]); bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, 4096, BUS_DMASYNC_PREWRITE); - aprint_normal_dev(sc->sc_dev, "Starting DMA Read-from-Sdram.\n"); + aprint_normal_dev(sc->dk.sc_dev, "Starting DMA Read-from-Sdram.\n"); exchange_with_mem_blk_addr_write(sc, blkn + sc->dma_blk_base); exchange_with_mem_dma_addr_write(sc, sc->sc_dmamap->dm_segs[0].ds_addr); exchange_with_mem_blk_cnt_write(sc, 0x00000000 | (testdatasize / sc->dma_blk_size)); - aprint_normal_dev(sc->sc_dev, "DMA Read-from-Sdram started, polling\n"); + aprint_normal_dev(sc->dk.sc_dev, "DMA Read-from-Sdram started, polling\n"); bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, 4096, BUS_DMASYNC_POSTWRITE); @@ -433,13 +687,13 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { count = 0; while (((blkcnt = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < 10)) { - aprint_normal_dev(sc->sc_dev, "DMA Read-from-Sdram ongoing (%u, status 0x%08x)\n", blkcnt & 0x0000FFFF, exchange_with_mem_dma_status_read(sc)); + aprint_normal_dev(sc->dk.sc_dev, "DMA Read-from-Sdram ongoing (%u, status 0x%08x)\n", blkcnt & 0x0000FFFF, exchange_with_mem_dma_status_read(sc)); count ++; delay(500); } if (blkcnt) { - aprint_error_dev(sc->sc_dev, "DMA Read-from-Sdram didn't finish ? (%u, status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", + aprint_error_dev(sc->dk.sc_dev, "DMA Read-from-Sdram didn't finish ? (%u, status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", blkcnt & 0x0000FFFF, exchange_with_mem_dma_status_read(sc), exchange_with_mem_last_blk_read(sc), @@ -447,7 +701,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { exchange_with_mem_blk_rem_read(sc)); return 0; } else { - aprint_normal_dev(sc->sc_dev, "DMA Read-from-Sdram done (status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", + aprint_normal_dev(sc->dk.sc_dev, "DMA Read-from-Sdram done (status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", exchange_with_mem_dma_status_read(sc), exchange_with_mem_last_blk_read(sc), exchange_with_mem_last_dma_read(sc), @@ -456,22 +710,22 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { count = 0; while ((((blkcnt = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 10)) { - aprint_normal_dev(sc->sc_dev, "DMA Read-from-Sdram hasn't reached memory yet (status 0x%08x)\n", blkcnt); + aprint_normal_dev(sc->dk.sc_dev, "DMA Read-from-Sdram hasn't reached memory yet (status 0x%08x)\n", blkcnt); count ++; delay(500); } - aprint_normal_dev(sc->sc_dev, "First value: 0x%08lx\n", kva_ulong[0]); + aprint_normal_dev(sc->dk.sc_dev, "First value: 0x%08lx\n", kva_ulong[0]); if (blkcnt & 0x3) { - aprint_error_dev(sc->sc_dev, "DMA Read-from-Sdram can't reach memory ? (%u, status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", blkcnt & 0x0000FFFF, + aprint_error_dev(sc->dk.sc_dev, "DMA Read-from-Sdram can't reach memory ? (%u, status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", blkcnt & 0x0000FFFF, exchange_with_mem_dma_status_read(sc), exchange_with_mem_last_blk_read(sc), exchange_with_mem_last_dma_read(sc), exchange_with_mem_blk_rem_read(sc)); return 0; } else { - aprint_normal_dev(sc->sc_dev, "DMA Read-from-Sdram has reached memory (status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", + aprint_normal_dev(sc->dk.sc_dev, "DMA Read-from-Sdram has reached memory (status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", exchange_with_mem_dma_status_read(sc), exchange_with_mem_last_blk_read(sc), exchange_with_mem_last_dma_read(sc), @@ -483,7 +737,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { for (int i = 0 ; i < testdatasize/sizeof(unsigned long) && count < 10; i++) { val = lfsr(32, val); if (kva_ulong[i] != val) { - aprint_error_dev(sc->sc_dev, "Read-after-write error at %d: 0x%08lx vs. 0x%08lx (0x%08lx)\n", i, kva_ulong[i], val, val ^ kva_ulong[i]); + aprint_error_dev(sc->dk.sc_dev, "Read-after-write error at %d: 0x%08lx vs. 0x%08lx (0x%08lx)\n", i, kva_ulong[i], val, val ^ kva_ulong[i]); count ++; } } @@ -495,6 +749,113 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { } +static int sbusfpga_sdram_read_block(struct sbusfpga_sdram_softc *sc, const u_int32_t block, const u_int32_t blkcnt, void *data) { + int res = 0; + int count; + unsigned int check; + + bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, blkcnt * 512, BUS_DMASYNC_PREWRITE); + + exchange_with_mem_blk_addr_write(sc, sc->dma_blk_base + (block * 512 / sc->dma_blk_size) ); + exchange_with_mem_dma_addr_write(sc, sc->sc_dmamap->dm_segs[0].ds_addr); + exchange_with_mem_blk_cnt_write(sc, 0x00000000 | (blkcnt * 512 / sc->dma_blk_size) ); + + delay(500); + + count = 0; + while (((check = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < 10)) { + count ++; + delay(500); + } + + if (check) { + aprint_error_dev(sc->dk.sc_dev, "DMA didn't finish ? (%u, status 0x%08x, 0x%08x, 0x%08x, lastblk req 0x%08x, last phys addr written 0x%08x)\n", + check & 0x0000FFFF, + exchange_with_mem_dma_status_read(sc), + exchange_with_mem_last_dma_read(sc), + exchange_with_mem_blk_rem_read(sc), + exchange_with_mem_last_blk_read(sc), + exchange_with_mem_wr_tosdram_read(sc)); + return ENXIO; + } + + count = 0; + while ((((check = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 10)) { + aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram hasn't reached SDRAM yet (status 0x%08x)\n", check); + count ++; + delay(500); + } + + if (check & 0x3) { + aprint_error_dev(sc->dk.sc_dev, "DMA can't reach memory/SDRAM ? (%u, status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", + check & 0x0000FFFF, + exchange_with_mem_dma_status_read(sc), + exchange_with_mem_last_blk_read(sc), + exchange_with_mem_last_dma_read(sc), + exchange_with_mem_blk_rem_read(sc)); + return ENXIO; + } + bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, blkcnt * 512, BUS_DMASYNC_POSTWRITE); + + memcpy(data, sc->sc_dma_kva, blkcnt * 512); + + return res; +} + + +static int sbusfpga_sdram_write_block(struct sbusfpga_sdram_softc *sc, const u_int32_t block, const u_int32_t blkcnt, void *data) { + int res = 0; + int count; + unsigned int check; + + memcpy(sc->sc_dma_kva, data, blkcnt * 512); + + bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, blkcnt * 512, BUS_DMASYNC_PREREAD); + + exchange_with_mem_blk_addr_write(sc, sc->dma_blk_base + (block * 512 / sc->dma_blk_size) ); + exchange_with_mem_dma_addr_write(sc, sc->sc_dmamap->dm_segs[0].ds_addr); + exchange_with_mem_blk_cnt_write(sc, 0x80000000 | (blkcnt * 512 / sc->dma_blk_size) ); + + delay(500); + + count = 0; + while (((check = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < 10)) { + count ++; + delay(500); + } + + if (check) { + aprint_error_dev(sc->dk.sc_dev, "DMA didn't finish ? (%u, status 0x%08x, 0x%08x, 0x%08x, lastblk req 0x%08x, last phys addr written 0x%08x)\n", + check & 0x0000FFFF, + exchange_with_mem_dma_status_read(sc), + exchange_with_mem_last_dma_read(sc), + exchange_with_mem_blk_rem_read(sc), + exchange_with_mem_last_blk_read(sc), + exchange_with_mem_wr_tosdram_read(sc)); + return ENXIO; + } + + count = 0; + while ((((check = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 10)) { + aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram hasn't reached SDRAM yet (status 0x%08x)\n", check); + count ++; + delay(500); + } + + if (check & 0x3) { + aprint_error_dev(sc->dk.sc_dev, "DMA can't reach memory/SDRAM ? (%u, status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", + check & 0x0000FFFF, + exchange_with_mem_dma_status_read(sc), + exchange_with_mem_last_blk_read(sc), + exchange_with_mem_last_dma_read(sc), + exchange_with_mem_blk_rem_read(sc)); + return ENXIO; + } + bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, blkcnt * 512, BUS_DMASYNC_POSTREAD); + + return res; +} + /* auto-generated sdram_phy.h + sc */ #define DFII_CONTROL_SEL 0x01 #define DFII_CONTROL_CKE 0x02 @@ -1035,7 +1396,7 @@ sdram_software_control_on(struct sbusfpga_sdram_softc *sc) previous = sdram_dfii_control_read(sc); if (previous != (0x02 | 0x04 | 0x08)) { sdram_dfii_control_write(sc, (0x02 | 0x04 | 0x08)); - aprint_normal_dev(sc->sc_dev, "Switching SDRAM to software control.\n"); + aprint_normal_dev(sc->dk.sc_dev, "Switching SDRAM to software control.\n"); } } static void @@ -1045,7 +1406,7 @@ sdram_software_control_off(struct sbusfpga_sdram_softc *sc) previous = sdram_dfii_control_read(sc); if (previous != (0x01)) { sdram_dfii_control_write(sc, (0x01)); - aprint_normal_dev(sc->sc_dev, "Switching SDRAM to hardware control.\n"); + aprint_normal_dev(sc->dk.sc_dev, "Switching SDRAM to hardware control.\n"); } } __attribute__((unused)) static void @@ -1140,7 +1501,7 @@ sdram_leveling_center_module (struct sbusfpga_sdram_softc *sc, int module, int s int delay, delay_mid, delay_range; int delay_min = -1, delay_max = -1; if (show_long) - aprint_normal_dev(sc->sc_dev, "m%d: |", module); + aprint_normal_dev(sc->dk.sc_dev, "m%d: |", module); delay = 0; rst_delay(sc, module); while (1) { @@ -1180,7 +1541,7 @@ sdram_leveling_center_module (struct sbusfpga_sdram_softc *sc, int module, int s delay_max = delay; } if (show_long) - aprint_normal_dev(sc->sc_dev, "| "); + aprint_normal_dev(sc->dk.sc_dev, "| "); delay_mid = (delay_min + delay_max) / 2 % 32; delay_range = (delay_max - delay_min) / 2; if (show_short) { @@ -1237,7 +1598,7 @@ sdram_read_leveling_scan_module (struct sbusfpga_sdram_softc *sc, int module, in unsigned int errors; score = 0; if (show) - aprint_normal_dev(sc->sc_dev, " m%d, b%02d: |", module, bitslip); + aprint_normal_dev(sc->dk.sc_dev, " m%d, b%02d: |", module, bitslip); sdram_read_leveling_rst_delay(sc, module); for (i = 0; i < 32; i++) { int working; @@ -1281,7 +1642,7 @@ sdram_read_leveling(struct sbusfpga_sdram_softc *sc) break; sdram_read_leveling_inc_bitslip(sc, module); } - aprint_normal_dev(sc->sc_dev, " best: m%d, b%02d ", module, best_bitslip); + aprint_normal_dev(sc->dk.sc_dev, " best: m%d, b%02d ", module, best_bitslip); sdram_read_leveling_rst_bitslip(sc, module); for (bitslip = 0; bitslip < best_bitslip; bitslip++) sdram_read_leveling_inc_bitslip(sc, module); @@ -1329,9 +1690,9 @@ sdram_write_latency_calibration(struct sbusfpga_sdram_softc *sc) else bitslip = _sdram_write_leveling_bitslips[module]; if (bitslip == -1) - aprint_normal_dev(sc->sc_dev, "m%d:- ", module); + aprint_normal_dev(sc->dk.sc_dev, "m%d:- ", module); else - aprint_normal_dev(sc->sc_dev, "m%d:%d ", module, bitslip); + aprint_normal_dev(sc->dk.sc_dev, "m%d:%d ", module, bitslip); ddrphy_dly_sel_write(sc, 1 << module); ddrphy_wdly_dq_bitslip_rst_write(sc, 1); for (i = 0; i < bitslip; i++) { @@ -1350,9 +1711,9 @@ sdram_leveling(struct sbusfpga_sdram_softc *sc) sdram_read_leveling_rst_delay(sc, module); sdram_read_leveling_rst_bitslip(sc, module); } - aprint_normal_dev(sc->sc_dev, "Write latency calibration:\n"); + aprint_normal_dev(sc->dk.sc_dev, "Write latency calibration:\n"); sdram_write_latency_calibration(sc); - aprint_normal_dev(sc->sc_dev, "Read leveling:\n"); + aprint_normal_dev(sc->dk.sc_dev, "Read leveling:\n"); sdram_read_leveling(sc); sdram_software_control_off(sc); return 1; @@ -1362,7 +1723,7 @@ sdram_init(struct sbusfpga_sdram_softc *sc) { ddrphy_rdphase_write(sc, 2); ddrphy_wrphase_write(sc, 3); - aprint_normal_dev(sc->sc_dev, "Initializing SDRAM @0x%08lx...\n", 0x80000000L); + aprint_normal_dev(sc->dk.sc_dev, "Initializing SDRAM @0x%08lx...\n", 0x80000000L); sdram_software_control_on(sc); ddrphy_rst_write(sc, 1); cdelay (1000); diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h index 3c24afe..7fc0852 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h @@ -30,7 +30,8 @@ #define _SBUSFPGA_SDRAM_H_ struct sbusfpga_sdram_softc { - device_t sc_dev; /* us as a device */ + struct dk_softc dk; + /* device_t sc_dev; */ /* us as a device */ /* in dk */ u_int sc_rev; /* revision */ int sc_node; /* PROM node ID */ int sc_burst; /* DVMA burst size in effect */ From c5e9a025b4eaf1a5d7ee04f768e0fa06f042d41c Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 18 Jul 2021 05:08:37 -0400 Subject: [PATCH 40/78] switch sdram access from Wishbone to dedicated port, seems to be more reliable --- sbus-to-ztex-gateware-migen/netbsd_csr.h | 2 +- sbus-to-ztex-gateware-migen/prom_csr.fth | 2 +- .../sbus_to_fpga_blk_dma.py | 160 ++++++++++-------- .../sbus_to_fpga_soc.py | 39 +++-- 4 files changed, 116 insertions(+), 87 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/netbsd_csr.h b/sbus-to-ztex-gateware-migen/netbsd_csr.h index f7a1606..96ba456 100644 --- a/sbus-to-ztex-gateware-migen/netbsd_csr.h +++ b/sbus-to-ztex-gateware-migen/netbsd_csr.h @@ -1,5 +1,5 @@ //-------------------------------------------------------------------------------- -// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-07-17 11:01:08 +// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-07-18 04:58:11 //-------------------------------------------------------------------------------- #ifndef __GENERATED_CSR_H #define __GENERATED_CSR_H diff --git a/sbus-to-ztex-gateware-migen/prom_csr.fth b/sbus-to-ztex-gateware-migen/prom_csr.fth index d9d0973..d0b8e76 100644 --- a/sbus-to-ztex-gateware-migen/prom_csr.fth +++ b/sbus-to-ztex-gateware-migen/prom_csr.fth @@ -5,6 +5,6 @@ h# 42000 constant sbusfpga_csraddr_exchange_with_mem h# 43000 constant sbusfpga_csraddr_sdram h# 80000 constant sbusfpga_regionaddr_usb_host_ctrl h# 0 constant sbusfpga_regionaddr_prom -h# fc000000 constant sbusfpga_regionaddr_usb_fake_dma h# 80000000 constant sbusfpga_regionaddr_main_ram +h# fc000000 constant sbusfpga_regionaddr_usb_fake_dma h# 40000 constant sbusfpga_regionaddr_csr diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py index 5695cf6..4b25926 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py @@ -9,22 +9,29 @@ from litex.soc.interconnect import wishbone # width of fromsbus_fifo is 'blk_addr_width' + 'burst_size * 32' (blk_addr + data) # the blk_addr does the round-trip to accompany the data class ExchangeWithMem(Module, AutoCSR): - def __init__(self, soc, tosbus_fifo, fromsbus_fifo, fromsbus_req_fifo, burst_size = 8): - self.wishbone_r_slave = wishbone.Interface(data_width=soc.bus.data_width) - self.wishbone_w_slave = wishbone.Interface(data_width=soc.bus.data_width) + def __init__(self, soc, tosbus_fifo, fromsbus_fifo, fromsbus_req_fifo, dram_dma_writer, dram_dma_reader, burst_size = 8): + #self.wishbone_r_slave = wishbone.Interface(data_width=soc.bus.data_width) + #self.wishbone_w_slave = wishbone.Interface(data_width=soc.bus.data_width) self.tosbus_fifo = tosbus_fifo self.fromsbus_fifo = fromsbus_fifo self.fromsbus_req_fifo = fromsbus_req_fifo + self.dram_dma_writer = dram_dma_writer + self.dram_dma_reader = dram_dma_reader data_width = burst_size * 4 data_width_bits = burst_size * 32 blk_addr_width = 32 - log2_int(data_width) # 27 for burst_size == 8 - - self.wishbone_r_master = wishbone.Interface(data_width=data_width_bits) - self.wishbone_w_master = wishbone.Interface(data_width=data_width_bits) - self.submodules += wishbone.Converter(self.wishbone_r_master, self.wishbone_r_slave) - self.submodules += wishbone.Converter(self.wishbone_w_master, self.wishbone_w_slave) + assert(len(self.dram_dma_writer.sink.data == data_width_bits)) + assert(len(self.dram_dma_reader.source.data == data_width_bits)) + assert(len(self.dram_dma_writer.sink.address == blk_addr_width)) + assert(len(self.dram_dma_reader.sink.address == blk_addr_width)) + + #self.wishbone_r_master = wishbone.Interface(data_width=data_width_bits) + #self.wishbone_w_master = wishbone.Interface(data_width=data_width_bits) + + #self.submodules += wishbone.Converter(self.wishbone_r_master, self.wishbone_r_slave) + #self.submodules += wishbone.Converter(self.wishbone_w_master, self.wishbone_w_slave) print("ExchangeWithMem: data_width = {}, data_width_bits = {}, blk_addr_width = {}\n".format(data_width, data_width_bits, blk_addr_width)) print("ExchangeWithMem: tosbus_fifo width = {}, fromsbus_fifo width = {}, fromsbus_req_fifo width = {}\n".format(len(tosbus_fifo.din), len(fromsbus_fifo.dout), len(fromsbus_req_fifo.din))) @@ -49,14 +56,14 @@ class ExchangeWithMem(Module, AutoCSR): self.comb += self.blk_size.status.eq(data_width) self.comb += self.blk_base.status.eq(soc.wb_mem_map["main_ram"] >> log2_int(data_width)) - self.blk_addr = CSRStorage(32, description = "SDRAM Block address to read/write from Wishbone memory (block of size {})".format(data_width)) - self.dma_addr = CSRStorage(32, description = "Host Base address where to write/read data (i.e. SPARC Virtual addr)") - self.blk_cnt = CSRStorage(32, write_from_dev=True, description = "How many blk to read/write (max 2^{}-1); bit 31 is RD".format(max_block_bits), reset = 0) - self.last_blk = CSRStatus(32, description = "Last Blk addr finished on WB side") - self.last_dma = CSRStatus(32, description = "Last DMA addr finished on WB side") - self.blk_rem = CSRStatus(32, description = "How many block remaining; bit 31 is RD", reset = 0) + self.blk_addr = CSRStorage(32, description = "SDRAM Block address to read/write from Wishbone memory (block of size {})".format(data_width)) + self.dma_addr = CSRStorage(32, description = "Host Base address where to write/read data (i.e. SPARC Virtual addr)") + self.blk_cnt = CSRStorage(32, write_from_dev=True, description = "How many blk to read/write (max 2^{}-1); bit 31 is RD".format(max_block_bits), reset = 0) + self.last_blk = CSRStatus(32, description = "Last Blk addr finished on WB side") + self.last_dma = CSRStatus(32, description = "Last DMA addr finished on WB side") + self.blk_rem = CSRStatus(32, description = "How many block remaining; bit 31 is RD", reset = 0) self.dma_status = CSRStatus(32, description = "Status register") - self.wr_tosdram = CSRStatus(32, description = "Last address written to SDRAM") + self.wr_tosdram = CSRStatus(32, description = "Last address written to SDRAM") self.submodules.req_r_fsm = req_r_fsm = FSM(reset_state="Reset") self.submodules.req_w_fsm = req_w_fsm = FSM(reset_state="Reset") @@ -68,17 +75,17 @@ class ExchangeWithMem(Module, AutoCSR): self.comb += self.dma_status.status[8:9].eq(req_w_fsm.ongoing("ReqToMemory")) self.comb += self.dma_status.status[9:10].eq(req_w_fsm.ongoing("WaitForAck")) - self.comb += self.dma_status.status[16:17].eq(self.wishbone_w_master.cyc) # show the WB iface status (W) - self.comb += self.dma_status.status[17:18].eq(self.wishbone_w_master.stb) - self.comb += self.dma_status.status[18:19].eq(self.wishbone_w_master.we) - self.comb += self.dma_status.status[19:20].eq(self.wishbone_w_master.ack) - self.comb += self.dma_status.status[20:21].eq(self.wishbone_w_master.err) + #self.comb += self.dma_status.status[16:17].eq(self.wishbone_w_master.cyc) # show the WB iface status (W) + #self.comb += self.dma_status.status[17:18].eq(self.wishbone_w_master.stb) + #self.comb += self.dma_status.status[18:19].eq(self.wishbone_w_master.we) + #self.comb += self.dma_status.status[19:20].eq(self.wishbone_w_master.ack) + #self.comb += self.dma_status.status[20:21].eq(self.wishbone_w_master.err) - self.comb += self.dma_status.status[24:25].eq(self.wishbone_r_master.cyc) # show the WB iface status (R) - self.comb += self.dma_status.status[25:26].eq(self.wishbone_r_master.stb) - self.comb += self.dma_status.status[26:27].eq(self.wishbone_r_master.we) - self.comb += self.dma_status.status[27:28].eq(self.wishbone_r_master.ack) - self.comb += self.dma_status.status[28:29].eq(self.wishbone_r_master.err) + #self.comb += self.dma_status.status[24:25].eq(self.wishbone_r_master.cyc) # show the WB iface status (R) + #self.comb += self.dma_status.status[25:26].eq(self.wishbone_r_master.stb) + #self.comb += self.dma_status.status[26:27].eq(self.wishbone_r_master.we) + #self.comb += self.dma_status.status[27:28].eq(self.wishbone_r_master.ack) + #self.comb += self.dma_status.status[28:29].eq(self.wishbone_r_master.err) req_r_fsm.act("Reset", NextState("Idle") @@ -99,35 +106,31 @@ class ExchangeWithMem(Module, AutoCSR): ) ) req_r_fsm.act("ReqFromMemory", - If(~self.wishbone_r_master.ack, - NextValue(self.wishbone_r_master.cyc, 1), - NextValue(self.wishbone_r_master.stb, 1), - NextValue(self.wishbone_r_master.sel, 2**len(self.wishbone_r_master.sel)-1), - NextValue(self.wishbone_r_master.we, 0), - NextValue(self.wishbone_r_master.adr, local_r_addr), - NextState("WaitForData") - ) + self.dram_dma_reader.sink.address.eq(local_r_addr), + self.dram_dma_reader.sink.valid.eq(1), + If(self.dram_dma_reader.sink.ready, + NextState("WaitForData") + ) ) req_r_fsm.act("WaitForData", - If(self.wishbone_r_master.ack & - self.tosbus_fifo.writable, - NextValue(self.wishbone_r_master.cyc, 0), - NextValue(self.wishbone_r_master.stb, 0), - tosbus_fifo.we.eq(1), - tosbus_fifo.din.eq(Cat(dma_r_addr, self.wishbone_r_master.dat_r)), - NextValue(self.last_blk.status, local_r_addr), - NextValue(self.last_dma.status, dma_r_addr), - NextValue(self.blk_rem.status, self.blk_rem.status - 1), - If(self.blk_rem.status[0:max_block_bits] <= 1, - self.blk_cnt.we.eq(1), ## auto-reset - self.blk_cnt.dat_w.eq(0), - NextState("Idle"), - ).Else( - NextValue(local_r_addr, local_r_addr + 1), - NextValue(dma_r_addr, dma_r_addr + data_width), - NextState("ReqFromMemory"), - ) - ) + If(self.dram_dma_reader.source.valid & + self.tosbus_fifo.writable, + self.tosbus_fifo.we.eq(1), + self.tosbus_fifo.din.eq(Cat(dma_r_addr, self.dram_dma_reader.source.data)), + self.dram_dma_reader.source.ready.eq(1), + NextValue(self.last_blk.status, local_r_addr), + NextValue(self.last_dma.status, dma_r_addr), + NextValue(self.blk_rem.status, self.blk_rem.status - 1), + If(self.blk_rem.status[0:max_block_bits] <= 1, + self.blk_cnt.we.eq(1), ## auto-reset + self.blk_cnt.dat_w.eq(0), + NextState("Idle"), + ).Else( + NextValue(local_r_addr, local_r_addr + 1), + NextValue(dma_r_addr, dma_r_addr + data_width), + NextState("ReqFromMemory"), + ) + ) ) req_r_fsm.act("QueueReqToMemory", If(self.fromsbus_req_fifo.writable, @@ -150,27 +153,42 @@ class ExchangeWithMem(Module, AutoCSR): ) +# req_w_fsm.act("Reset", +# NextState("Idle") +# ) +# req_w_fsm.act("Idle", +# If(self.fromsbus_fifo.readable & +# ~self.wishbone_w_master.ack, +# self.fromsbus_fifo.re.eq(1), +# NextValue(self.wishbone_w_master.cyc, 1), +# NextValue(self.wishbone_w_master.stb, 1), +# NextValue(self.wishbone_w_master.sel, 2**len(self.wishbone_w_master.sel)-1), +# NextValue(self.wishbone_w_master.we, 1), +# NextValue(self.wishbone_w_master.adr, self.fromsbus_fifo.dout[0:blk_addr_width]), +# NextValue(self.wishbone_w_master.dat_w, self.fromsbus_fifo.dout[blk_addr_width:(blk_addr_width + data_width_bits)]), +# NextValue(self.wr_tosdram.status, self.fromsbus_fifo.dout[0:blk_addr_width]), +# NextState("WaitForAck") +# ) +# ) +# req_w_fsm.act("WaitForAck", +# If(self.wishbone_w_master.ack, +# NextValue(self.wishbone_w_master.cyc, 0), +# NextValue(self.wishbone_w_master.stb, 0), +# NextState("Idle"), +# ) +# ) + req_w_fsm.act("Reset", NextState("Idle") ) req_w_fsm.act("Idle", - If(self.fromsbus_fifo.readable & - ~self.wishbone_w_master.ack, - self.fromsbus_fifo.re.eq(1), - NextValue(self.wishbone_w_master.cyc, 1), - NextValue(self.wishbone_w_master.stb, 1), - NextValue(self.wishbone_w_master.sel, 2**len(self.wishbone_w_master.sel)-1), - NextValue(self.wishbone_w_master.we, 1), - NextValue(self.wishbone_w_master.adr, self.fromsbus_fifo.dout[0:blk_addr_width]), - NextValue(self.wishbone_w_master.dat_w, self.fromsbus_fifo.dout[blk_addr_width:(blk_addr_width + data_width_bits)]), - NextValue(self.wr_tosdram.status, self.fromsbus_fifo.dout[0:blk_addr_width]), - NextState("WaitForAck") - ) - ) - req_w_fsm.act("WaitForAck", - If(self.wishbone_w_master.ack, - NextValue(self.wishbone_w_master.cyc, 0), - NextValue(self.wishbone_w_master.stb, 0), - NextState("Idle"), - ) + If(self.fromsbus_fifo.readable, + self.dram_dma_writer.sink.address.eq(self.fromsbus_fifo.dout[0:blk_addr_width]), + self.dram_dma_writer.sink.data.eq(self.fromsbus_fifo.dout[blk_addr_width:(blk_addr_width + data_width_bits)]), + self.dram_dma_writer.sink.valid.eq(1), + NextValue(self.wr_tosdram.status, self.fromsbus_fifo.dout[0:blk_addr_width]), + If(self.dram_dma_writer.sink.ready, + self.fromsbus_fifo.re.eq(1) + ) + ) ) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index d447122..9b243ba 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -16,8 +16,10 @@ from migen.genlib.fifo import * from litedram.modules import MT41J128M16 from litedram.phy import s7ddrphy -from sbus_to_fpga_fsm import *; -from sbus_to_fpga_blk_dma import *; +from sbus_to_fpga_fsm import * +from sbus_to_fpga_blk_dma import * + +from litedram.frontend.dma import * import sbus_to_fpga_export; @@ -156,6 +158,15 @@ class SBusFPGA(SoCCore): #getattr(self,"prom").mem.init = prom_data #getattr(self,"prom").mem.depth = 2**14 + self.submodules.ddrphy = s7ddrphy.A7DDRPHY(platform.request("ddram"), + memtype = "DDR3", + nphases = 4, + sys_clk_freq = sys_clk_freq) + self.add_sdram("sdram", + phy = self.ddrphy, + module = MT41J128M16(sys_clk_freq, "1:4"), + l2_cache_size = 0, + ) # don't enable anything on the SBus side for 20 seconds after power up # this avoids FPGA initialization messing with the cold boot process # requires us to reset the SPARCstation afterward so the FPGA board @@ -178,10 +189,20 @@ class SBusFPGA(SoCCore): self.submodules.fromsbus_fifo = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(AsyncFIFOBuffered(width=((30-log2_int(burst_size))+burst_size*32), depth=4)) self.submodules.fromsbus_req_fifo = ClockDomainsRenamer({"read": "sbus", "write": "sys"})(AsyncFIFOBuffered(width=((30-log2_int(burst_size))+32), depth=16)) + self.submodules.dram_dma_writer = LiteDRAMDMAWriter(port=self.sdram.crossbar.get_port(mode="write", data_width=burst_size*32), + fifo_depth=4, + fifo_buffered=True) + + self.submodules.dram_dma_reader = LiteDRAMDMAReader(port=self.sdram.crossbar.get_port(mode="read", data_width=burst_size*32), + fifo_depth=4, + fifo_buffered=True) + self.submodules.exchange_with_mem = ExchangeWithMem(soc=self, tosbus_fifo=self.tosbus_fifo, fromsbus_fifo=self.fromsbus_fifo, fromsbus_req_fifo=self.fromsbus_req_fifo, + dram_dma_writer=self.dram_dma_writer, + dram_dma_reader=self.dram_dma_reader, burst_size=burst_size) _sbus_bus = SBusFPGABus(platform=self.platform, @@ -197,18 +218,8 @@ class SBusFPGA(SoCCore): self.bus.add_master(name="SBusBridgeToWishbone", master=wishbone_master_sys) self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_slave_sys, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) - self.bus.add_master(name="mem_read_master", master=self.exchange_with_mem.wishbone_r_slave) - self.bus.add_master(name="mem_write_master", master=self.exchange_with_mem.wishbone_w_slave) - - self.submodules.ddrphy = s7ddrphy.A7DDRPHY(platform.request("ddram"), - memtype = "DDR3", - nphases = 4, - sys_clk_freq = sys_clk_freq) - self.add_sdram("sdram", - phy = self.ddrphy, - module = MT41J128M16(sys_clk_freq, "1:4"), - l2_cache_size = 0, - ) + #self.bus.add_master(name="mem_read_master", master=self.exchange_with_mem.wishbone_r_slave) + #self.bus.add_master(name="mem_write_master", master=self.exchange_with_mem.wishbone_w_slave) #self.add_sdcard() From 023e84b7348997da8802651854345fb03376025f Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 18 Jul 2021 08:14:57 -0400 Subject: [PATCH 41/78] swap on sbusfpga_sdram seems to work, but hogs the bus --- .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c | 78 +++++++++++++------ 1 file changed, 55 insertions(+), 23 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c index 76fb285..cee869e 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c @@ -111,7 +111,7 @@ static int sbusfpga_sdram_write_block(struct sbusfpga_sdram_softc *sc, const u_i int sbusfpga_sdram_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) { - struct sbusfpga_sdram_softc *sc = device_lookup_private(&sbusfpga_sdram_cd, minor(dev)); + struct sbusfpga_sdram_softc *sc = device_lookup_private(&sbusfpga_sdram_cd, DISKUNIT(dev)); int err = 0, err2 = 0; if (sc == NULL) { @@ -338,8 +338,40 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) dk_attach(&sc->dk); disk_attach(&sc->dk.sc_dkdev); sbusfpga_sdram_set_geometry(sc); - + bufq_alloc(&sc->dk.sc_bufq, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK); /* needed ? */ + { + struct disklabel *lp = sc->dk.sc_dkdev.dk_label; + struct cpu_disklabel *clp = sc->dk.sc_dkdev.dk_cpulabel; + memset(lp, 0, sizeof(struct disklabel)); + memset(clp, 0, sizeof(struct cpu_disklabel)); + + lp->d_type = DKTYPE_FLASH; + lp->d_secsize = 512; + lp->d_nsectors = 4; + lp->d_ntracks = 2; + lp->d_ncylinders = 65536; + lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; + lp->d_secperunit = lp->d_secpercyl * lp->d_ncylinders; + lp->d_rpm = 3600; + + strncpy(lp->d_typename, "sdramdisk", sizeof(lp->d_typename)); + strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); + lp->d_interleave = 0; + + lp->d_partitions[0].p_offset = lp->d_secpercyl * lp->d_secsize; + lp->d_partitions[0].p_size = lp->d_secpercyl * (lp->d_ncylinders - 1); + lp->d_partitions[0].p_fstype = FS_SWAP; + + lp->d_partitions[RAW_PART].p_offset = 0; + lp->d_partitions[RAW_PART].p_size = lp->d_secpercyl * lp->d_ncylinders; + lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; + lp->d_npartitions = RAW_PART + 1; + + lp->d_magic = DISKMAGIC; + lp->d_magic2 = DISKMAGIC; + lp->d_checksum = dkcksum(lp); + } /* aprint_normal_dev(self, "sc->dk.sc_dkdev.dk_blkshift = %d\n", sc->dk.sc_dkdev.dk_blkshift); @@ -364,12 +396,12 @@ static void sbusfpga_sdram_set_geometry(struct sbusfpga_sdram_softc *sc) { memset(dg, 0, sizeof(*dg)); dg->dg_secsize = 512; - dg->dg_nsectors = 32; - dg->dg_ntracks = 32; - dg->dg_ncylinders = 512; + dg->dg_nsectors = 2; + dg->dg_ntracks = 4; + dg->dg_ncylinders = 65536; dg->dg_secpercyl = dg->dg_nsectors * dg->dg_ntracks; - dg->dg_secperunit = 524288; //dg->dg_secpercyl * dg->dg_ncylinders; - dg->dg_pcylinders = 512; + dg->dg_secperunit = dg->dg_secpercyl * dg->dg_ncylinders; + dg->dg_pcylinders = 65536; dg->dg_sparespertrack = 0; dg->dg_sparespercyl = 0; @@ -400,12 +432,12 @@ sbusfpga_sdram_diskstart(device_t self, struct buf *bp) err = EINVAL; goto done; } - aprint_normal_dev(sc->dk.sc_dev, "%s:%d: part %d\n", __PRETTY_FUNCTION__, __LINE__, DISKPART(bp->b_dev)); - aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bflags = 0x%08x\n", __PRETTY_FUNCTION__, __LINE__, bp->b_flags); - aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bufsize = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_bufsize); - aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_blkno = %lld\n", __PRETTY_FUNCTION__, __LINE__, bp->b_blkno); - aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_rawblkno = %lld\n", __PRETTY_FUNCTION__, __LINE__, bp->b_rawblkno); - aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bcount = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_bcount); + /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: part %d\n", __PRETTY_FUNCTION__, __LINE__, DISKPART(bp->b_dev)); */ + /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bflags = 0x%08x\n", __PRETTY_FUNCTION__, __LINE__, bp->b_flags); */ + /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bufsize = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_bufsize); */ + /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_blkno = %lld\n", __PRETTY_FUNCTION__, __LINE__, bp->b_blkno); */ + /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_rawblkno = %lld\n", __PRETTY_FUNCTION__, __LINE__, bp->b_rawblkno); */ + /* aprint_normal_dev(sc->dk.sc_dev, "%s:%d: bp->b_bcount = %d\n", __PRETTY_FUNCTION__, __LINE__, bp->b_bcount); */ bp->b_resid = bp->b_bcount; @@ -760,12 +792,12 @@ static int sbusfpga_sdram_read_block(struct sbusfpga_sdram_softc *sc, const u_in exchange_with_mem_dma_addr_write(sc, sc->sc_dmamap->dm_segs[0].ds_addr); exchange_with_mem_blk_cnt_write(sc, 0x00000000 | (blkcnt * 512 / sc->dma_blk_size) ); - delay(500); + delay(100); count = 0; - while (((check = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < 10)) { + while (((check = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < 50)) { count ++; - delay(500); + delay(100); } if (check) { @@ -780,10 +812,10 @@ static int sbusfpga_sdram_read_block(struct sbusfpga_sdram_softc *sc, const u_in } count = 0; - while ((((check = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 10)) { + while ((((check = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 50)) { aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram hasn't reached SDRAM yet (status 0x%08x)\n", check); count ++; - delay(500); + delay(100); } if (check & 0x3) { @@ -816,12 +848,12 @@ static int sbusfpga_sdram_write_block(struct sbusfpga_sdram_softc *sc, const u_i exchange_with_mem_dma_addr_write(sc, sc->sc_dmamap->dm_segs[0].ds_addr); exchange_with_mem_blk_cnt_write(sc, 0x80000000 | (blkcnt * 512 / sc->dma_blk_size) ); - delay(500); + delay(100); count = 0; - while (((check = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < 10)) { + while (((check = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < 50)) { count ++; - delay(500); + delay(100); } if (check) { @@ -836,10 +868,10 @@ static int sbusfpga_sdram_write_block(struct sbusfpga_sdram_softc *sc, const u_i } count = 0; - while ((((check = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 10)) { + while ((((check = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 50)) { aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram hasn't reached SDRAM yet (status 0x%08x)\n", check); count ++; - delay(500); + delay(100); } if (check & 0x3) { From 38e3431c7fdc27d595abbca0256c415392cd9674 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 18 Jul 2021 10:19:53 -0400 Subject: [PATCH 42/78] cleaning up some stuff, disable USB Host for testing SDRAM disk --- .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c | 20 +++--- .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h | 4 +- sbus-to-ztex-gateware-migen/netbsd_csr.h | 63 ++++++++++--------- sbus-to-ztex-gateware-migen/prom_migen.fth | 2 +- .../sbus_to_fpga_blk_dma.py | 7 ++- .../sbus_to_fpga_fsm.py | 35 +++++++++-- 6 files changed, 85 insertions(+), 46 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c index cee869e..92b351f 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c @@ -350,7 +350,7 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) lp->d_secsize = 512; lp->d_nsectors = 4; lp->d_ntracks = 2; - lp->d_ncylinders = 65536; + lp->d_ncylinders = sc->dma_real_mem_size / (lp->d_secsize * lp->d_nsectors * lp->d_ntracks); lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; lp->d_secperunit = lp->d_secpercyl * lp->d_ncylinders; lp->d_rpm = 3600; @@ -398,10 +398,10 @@ static void sbusfpga_sdram_set_geometry(struct sbusfpga_sdram_softc *sc) { dg->dg_secsize = 512; dg->dg_nsectors = 2; dg->dg_ntracks = 4; - dg->dg_ncylinders = 65536; + dg->dg_ncylinders = sc->dma_real_mem_size / (dg->dg_secsize * dg->dg_nsectors * dg->dg_ntracks); dg->dg_secpercyl = dg->dg_nsectors * dg->dg_ntracks; dg->dg_secperunit = dg->dg_secpercyl * dg->dg_ncylinders; - dg->dg_pcylinders = 65536; + dg->dg_pcylinders = dg->dg_ncylinders; dg->dg_sparespertrack = 0; dg->dg_sparespercyl = 0; @@ -411,7 +411,8 @@ static void sbusfpga_sdram_set_geometry(struct sbusfpga_sdram_softc *sc) { int sbusfpga_sdram_size(dev_t dev) { - return 524288; + struct sbusfpga_sdram_softc *sc = device_lookup_private(&sbusfpga_sdram_cd, DISKUNIT(dev)); + return sc->dma_real_mem_size / 512; } static void @@ -466,7 +467,7 @@ sbusfpga_sdram_diskstart(device_t self, struct buf *bp) if (blkcnt > (SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ/512)) blkcnt = (SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ/512); - if (blk+blkcnt <= 524288) { + if (blk+blkcnt <= (sc->dma_real_mem_size / 512)) { err = sbusfpga_sdram_read_block(sc, blk, blkcnt, data); } else { aprint_error("%s:%d: blk = %lld read out of range! giving up\n", __PRETTY_FUNCTION__, __LINE__, blk); @@ -502,7 +503,7 @@ sbusfpga_sdram_diskstart(device_t self, struct buf *bp) if (blkcnt > (SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ/512)) blkcnt = (SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ/512); - if (blk+blkcnt <= 524288) { + if (blk+blkcnt <= (sc->dma_real_mem_size / 512)) { err = sbusfpga_sdram_write_block(sc, blk, blkcnt, data); } else { aprint_error("%s:%d: blk = %lld write out of range! giving up\n", __PRETTY_FUNCTION__, __LINE__, blk); @@ -543,7 +544,12 @@ int dma_init(struct sbusfpga_sdram_softc *sc) { sc->dma_blk_size = exchange_with_mem_blk_size_read(sc); sc->dma_blk_base = exchange_with_mem_blk_base_read(sc); - aprint_normal_dev(sc->dk.sc_dev, "DMA: HW -> block size is %d, base address is 0x%08x\n", sc->dma_blk_size, sc->dma_blk_base * sc->dma_blk_size); + sc->dma_mem_size = exchange_with_mem_mem_size_read(sc); + sc->dma_real_mem_size = sc->dma_mem_size * sc->dma_blk_size; + aprint_normal_dev(sc->dk.sc_dev, "DMA: HW -> block size is %d, base address is 0x%08x (%d MiB)\n", + sc->dma_blk_size, + sc->dma_blk_base * sc->dma_blk_size, + sc->dma_real_mem_size / 1048576); /* Allocate a dmamap */ if (bus_dmamap_create(sc->sc_dmatag, SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ, 1, SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, &sc->sc_dmamap) != 0) { diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h index 7fc0852..2539b0d 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.h @@ -47,6 +47,8 @@ struct sbusfpga_sdram_softc { /* specific of the DMA engine */ u_int dma_blk_size; u_int dma_blk_base; + u_int dma_mem_size; /* in blk_size */ + u_int dma_real_mem_size; /* precomputed in bytes */ /* DMA kernel structures */ bus_dma_tag_t sc_dmatag; bus_dmamap_t sc_dmamap; @@ -55,6 +57,6 @@ struct sbusfpga_sdram_softc { void * sc_dma_kva; }; -#define SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ (4*1024) +#define SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ (64*1024) #endif /* _SBUSFPGA_SDRAM_H_ */ diff --git a/sbus-to-ztex-gateware-migen/netbsd_csr.h b/sbus-to-ztex-gateware-migen/netbsd_csr.h index 96ba456..3b9549d 100644 --- a/sbus-to-ztex-gateware-migen/netbsd_csr.h +++ b/sbus-to-ztex-gateware-migen/netbsd_csr.h @@ -1,5 +1,5 @@ //-------------------------------------------------------------------------------- -// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-07-18 04:58:11 +// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-07-18 09:29:51 //-------------------------------------------------------------------------------- #ifndef __GENERATED_CSR_H #define __GENERATED_CSR_H @@ -142,54 +142,59 @@ static inline uint32_t exchange_with_mem_blk_size_read(struct sbusfpga_sdram_sof static inline uint32_t exchange_with_mem_blk_base_read(struct sbusfpga_sdram_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x4L); } -#define CSR_EXCHANGE_WITH_MEM_BLK_ADDR_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x8L) -#define CSR_EXCHANGE_WITH_MEM_BLK_ADDR_SIZE 1 -static inline uint32_t exchange_with_mem_blk_addr_read(struct sbusfpga_sdram_softc *sc) { +#define CSR_EXCHANGE_WITH_MEM_MEM_SIZE_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x8L) +#define CSR_EXCHANGE_WITH_MEM_MEM_SIZE_SIZE 1 +static inline uint32_t exchange_with_mem_mem_size_read(struct sbusfpga_sdram_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x8L); } -static inline void exchange_with_mem_blk_addr_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x8L, v); -} -#define CSR_EXCHANGE_WITH_MEM_DMA_ADDR_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0xcL) -#define CSR_EXCHANGE_WITH_MEM_DMA_ADDR_SIZE 1 -static inline uint32_t exchange_with_mem_dma_addr_read(struct sbusfpga_sdram_softc *sc) { +#define CSR_EXCHANGE_WITH_MEM_BLK_ADDR_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0xcL) +#define CSR_EXCHANGE_WITH_MEM_BLK_ADDR_SIZE 1 +static inline uint32_t exchange_with_mem_blk_addr_read(struct sbusfpga_sdram_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0xcL); } -static inline void exchange_with_mem_dma_addr_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void exchange_with_mem_blk_addr_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0xcL, v); } -#define CSR_EXCHANGE_WITH_MEM_BLK_CNT_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x10L) -#define CSR_EXCHANGE_WITH_MEM_BLK_CNT_SIZE 1 -static inline uint32_t exchange_with_mem_blk_cnt_read(struct sbusfpga_sdram_softc *sc) { +#define CSR_EXCHANGE_WITH_MEM_DMA_ADDR_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x10L) +#define CSR_EXCHANGE_WITH_MEM_DMA_ADDR_SIZE 1 +static inline uint32_t exchange_with_mem_dma_addr_read(struct sbusfpga_sdram_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x10L); } -static inline void exchange_with_mem_blk_cnt_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void exchange_with_mem_dma_addr_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x10L, v); } -#define CSR_EXCHANGE_WITH_MEM_LAST_BLK_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x14L) -#define CSR_EXCHANGE_WITH_MEM_LAST_BLK_SIZE 1 -static inline uint32_t exchange_with_mem_last_blk_read(struct sbusfpga_sdram_softc *sc) { +#define CSR_EXCHANGE_WITH_MEM_BLK_CNT_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x14L) +#define CSR_EXCHANGE_WITH_MEM_BLK_CNT_SIZE 1 +static inline uint32_t exchange_with_mem_blk_cnt_read(struct sbusfpga_sdram_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x14L); } -#define CSR_EXCHANGE_WITH_MEM_LAST_DMA_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x18L) -#define CSR_EXCHANGE_WITH_MEM_LAST_DMA_SIZE 1 -static inline uint32_t exchange_with_mem_last_dma_read(struct sbusfpga_sdram_softc *sc) { +static inline void exchange_with_mem_blk_cnt_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x14L, v); +} +#define CSR_EXCHANGE_WITH_MEM_LAST_BLK_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x18L) +#define CSR_EXCHANGE_WITH_MEM_LAST_BLK_SIZE 1 +static inline uint32_t exchange_with_mem_last_blk_read(struct sbusfpga_sdram_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x18L); } -#define CSR_EXCHANGE_WITH_MEM_BLK_REM_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x1cL) -#define CSR_EXCHANGE_WITH_MEM_BLK_REM_SIZE 1 -static inline uint32_t exchange_with_mem_blk_rem_read(struct sbusfpga_sdram_softc *sc) { +#define CSR_EXCHANGE_WITH_MEM_LAST_DMA_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x1cL) +#define CSR_EXCHANGE_WITH_MEM_LAST_DMA_SIZE 1 +static inline uint32_t exchange_with_mem_last_dma_read(struct sbusfpga_sdram_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x1cL); } -#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x20L) -#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_SIZE 1 -static inline uint32_t exchange_with_mem_dma_status_read(struct sbusfpga_sdram_softc *sc) { +#define CSR_EXCHANGE_WITH_MEM_BLK_REM_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x20L) +#define CSR_EXCHANGE_WITH_MEM_BLK_REM_SIZE 1 +static inline uint32_t exchange_with_mem_blk_rem_read(struct sbusfpga_sdram_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x20L); } -#define CSR_EXCHANGE_WITH_MEM_WR_TOSDRAM_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x24L) +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x24L) +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_SIZE 1 +static inline uint32_t exchange_with_mem_dma_status_read(struct sbusfpga_sdram_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x24L); +} +#define CSR_EXCHANGE_WITH_MEM_WR_TOSDRAM_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x28L) #define CSR_EXCHANGE_WITH_MEM_WR_TOSDRAM_SIZE 1 static inline uint32_t exchange_with_mem_wr_tosdram_read(struct sbusfpga_sdram_softc *sc) { - return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x24L); + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x28L); } #endif // CSR_EXCHANGE_WITH_MEM_BASE diff --git a/sbus-to-ztex-gateware-migen/prom_migen.fth b/sbus-to-ztex-gateware-migen/prom_migen.fth index 6b0821d..d07a0cb 100644 --- a/sbus-to-ztex-gateware-migen/prom_migen.fth +++ b/sbus-to-ztex-gateware-migen/prom_migen.fth @@ -41,7 +41,7 @@ finish-device new-device \ Absolute minimal stuff; name & registers def. -" generic-ohci" device-name +" DISABLED-generic-ohci" device-name \ USB registers are in the device space, not the CSR space my-address sbusfpga_regionaddr_usb_host_ctrl + my-space h# 1000 reg diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py index 4b25926..b8d7f23 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py @@ -53,8 +53,10 @@ class ExchangeWithMem(Module, AutoCSR): #self.blk_base = CSRConstant(value=soc.wb_mem_map["main_ram"] >> log2_int(data_width)) # report where the blk starts self.blk_size = CSRStatus(32) # report the block size to the SW layer self.blk_base = CSRStatus(32) # report where the blk starts + self.mem_size = CSRStatus(32) # report how much memory we have self.comb += self.blk_size.status.eq(data_width) self.comb += self.blk_base.status.eq(soc.wb_mem_map["main_ram"] >> log2_int(data_width)) + self.comb += self.mem_size.status.eq((256 * 1024 * 1024) >> log2_int(data_width)) # is it already available from mem_regions ? self.blk_addr = CSRStorage(32, description = "SDRAM Block address to read/write from Wishbone memory (block of size {})".format(data_width)) self.dma_addr = CSRStorage(32, description = "Host Base address where to write/read data (i.e. SPARC Virtual addr)") @@ -72,8 +74,9 @@ class ExchangeWithMem(Module, AutoCSR): self.comb += self.dma_status.status[1:2].eq(~req_w_fsm.ongoing("Idle")) # Write FSM Busy self.comb += self.dma_status.status[2:3].eq(self.fromsbus_fifo.readable) # Some data available to write to memory - self.comb += self.dma_status.status[8:9].eq(req_w_fsm.ongoing("ReqToMemory")) - self.comb += self.dma_status.status[9:10].eq(req_w_fsm.ongoing("WaitForAck")) + self.comb += self.dma_status.status[8:9].eq(req_r_fsm.ongoing("ReqFromMemory")) + self.comb += self.dma_status.status[9:10].eq(req_r_fsm.ongoing("WaitForData")) + self.comb += self.dma_status.status[10:11].eq(req_r_fsm.ongoing("QueueReqToMemory")) #self.comb += self.dma_status.status[16:17].eq(self.wishbone_w_master.cyc) # show the WB iface status (W) #self.comb += self.dma_status.status[17:18].eq(self.wishbone_w_master.stb) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index 280685c..658203b 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -306,11 +306,11 @@ class SBusFPGABus(Module): #led6 = platform.request("user_led", 6) #led7 = platform.request("user_led", 7) - led0123 = Signal(4) - self.sync += platform.request("user_led", 0).eq(led0123[0]) - self.sync += platform.request("user_led", 1).eq(led0123[1]) - self.sync += platform.request("user_led", 2).eq(led0123[2]) - self.sync += platform.request("user_led", 3).eq(led0123[3]) + #led0123 = Signal(4) + #self.sync += platform.request("user_led", 0).eq(led0123[0]) + #self.sync += platform.request("user_led", 1).eq(led0123[1]) + #self.sync += platform.request("user_led", 2).eq(led0123[2]) + #self.sync += platform.request("user_led", 3).eq(led0123[3]) #self.sync += platform.request("user_led", 0).eq(self.wishbone_master.cyc) #self.sync += platform.request("user_led", 1).eq(self.wishbone_master.stb) @@ -354,6 +354,29 @@ class SBusFPGABus(Module): self.submodules.slave_fsm = slave_fsm = FSM(reset_state="Reset") self.sync += platform.request("user_led", 5).eq(~slave_fsm.ongoing("Idle")) + self.sync += platform.request("user_led", 0).eq(slave_fsm.ongoing("Master_Translation")) + self.sync += platform.request("user_led", 1).eq(slave_fsm.ongoing("Master_Read") | + slave_fsm.ongoing("Master_Read_Ack") | + slave_fsm.ongoing("Master_Read_Finish") | + slave_fsm.ongoing("Master_Write") | + slave_fsm.ongoing("Master_Write_Final")) + self.sync += platform.request("user_led", 2).eq(slave_fsm.ongoing("Slave_Do_Read") | + slave_fsm.ongoing("Slave_Ack_Read_Reg_Burst") | + slave_fsm.ongoing("Slave_Ack_Read_Reg_Burst_Wait_For_Data") | + slave_fsm.ongoing("Slave_Ack_Read_Reg_Burst_Wait_For_Wishbone") | + slave_fsm.ongoing("Slave_Ack_Read_Reg_HWord") | + slave_fsm.ongoing("Slave_Ack_Read_Reg_HWord_Wait_For_Data") | + slave_fsm.ongoing("Slave_Ack_Read_Reg_HWord_Wait_For_Wishbone") | + slave_fsm.ongoing("Slave_Ack_Read_Reg_Byte") | + slave_fsm.ongoing("Slave_Ack_Read_Reg_Byte_Wait_For_Data") | + slave_fsm.ongoing("Slave_Ack_Read_Reg_Byte_Wait_For_Wishbone")) + self.sync += platform.request("user_led", 3).eq(slave_fsm.ongoing("Slave_Ack_Reg_Write_Burst") | + slave_fsm.ongoing("Slave_Ack_Reg_Write_Final") | + slave_fsm.ongoing("Slave_Ack_Reg_Write_Burst_Wait_For_Wishbone") | + slave_fsm.ongoing("Slave_Ack_Reg_Write_HWord") | + slave_fsm.ongoing("Slave_Ack_Reg_Write_HWord_Wait_For_Wishbone") | + slave_fsm.ongoing("Slave_Ack_Reg_Write_Byte") | + slave_fsm.ongoing("Slave_Ack_Reg_Write_Byte_Wait_For_Wishbone")) self.sync += platform.request("user_led", 6).eq(master_data_src_tosbus_fifo) self.sync += platform.request("user_led", 7).eq(master_data_src_fromsbus_fifo) @@ -680,7 +703,7 @@ class SBusFPGABus(Module): NextValue(burst_limit_m1, 0), ## only single word for now NextValue(master_size, SIZ_WORD), NextValue(SBUS_3V3_SIZ_o, SIZ_WORD), - NextValue(led0123, self.wishbone_slave.sel) + #NextValue(led0123, self.wishbone_slave.sel) ] }), # NextValue(master_data, self.wishbone_slave.dat_w), From 5094d1d213490a2dcdedbb65ba1b8ddf051cb7b7 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 18 Jul 2021 12:44:48 -0400 Subject: [PATCH 43/78] re-enable USB in PROM, make sure the SDRAM request don't inadvertently kill the USB request --- sbus-to-ztex-gateware-migen/netbsd_csr.h | 2 +- sbus-to-ztex-gateware-migen/prom_migen.fth | 2 +- .../sbus_to_fpga_fsm.py | 44 ++++++++++--------- .../sbus_to_fpga_soc.py | 3 ++ 4 files changed, 29 insertions(+), 22 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/netbsd_csr.h b/sbus-to-ztex-gateware-migen/netbsd_csr.h index 3b9549d..534c79e 100644 --- a/sbus-to-ztex-gateware-migen/netbsd_csr.h +++ b/sbus-to-ztex-gateware-migen/netbsd_csr.h @@ -1,5 +1,5 @@ //-------------------------------------------------------------------------------- -// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-07-18 09:29:51 +// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-07-18 12:35:05 //-------------------------------------------------------------------------------- #ifndef __GENERATED_CSR_H #define __GENERATED_CSR_H diff --git a/sbus-to-ztex-gateware-migen/prom_migen.fth b/sbus-to-ztex-gateware-migen/prom_migen.fth index d07a0cb..6b0821d 100644 --- a/sbus-to-ztex-gateware-migen/prom_migen.fth +++ b/sbus-to-ztex-gateware-migen/prom_migen.fth @@ -41,7 +41,7 @@ finish-device new-device \ Absolute minimal stuff; name & registers def. -" DISABLED-generic-ohci" device-name +" generic-ohci" device-name \ USB registers are in the device space, not the CSR space my-address sbusfpga_regionaddr_usb_host_ctrl + my-space h# 1000 reg diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index 658203b..40333e8 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -297,10 +297,10 @@ class SBusFPGABus(Module): #self.submodules.led_display = LedDisplay(platform.request_all("user_led")) self.sync += platform.request("user_led", 4).eq(self.wishbone_slave.cyc) - #self.sync += platform.request("user_led", 5).eq(self.wishbone_slave.stb) - #self.sync += platform.request("user_led", 6).eq(self.wishbone_slave.we) - #self.sync += platform.request("user_led", 7).eq(self.wishbone_slave.ack) - #self.sync += platform.request("user_led", 4).eq(self.wishbone_slave.err) + self.sync += platform.request("user_led", 5).eq(self.wishbone_slave.stb) + self.sync += platform.request("user_led", 6).eq(self.wishbone_slave.we) + self.sync += platform.request("user_led", 7).eq(self.wishbone_slave.ack) + #self.sync += platform.request("user_led", 0).eq(self.wishbone_slave.err) #led4 = platform.request("user_led", 4) #led5 = platform.request("user_led", 5) #led6 = platform.request("user_led", 6) @@ -345,21 +345,22 @@ class SBusFPGABus(Module): self.master_read_buffer_done = Array(Signal() for a in range(4)) self.master_read_buffer_read = Array(Signal() for a in range(4)) self.master_read_buffer_start = Signal() + + #self.sync += platform.request("user_led", 1).eq(self.master_read_buffer_start) - self.master_write_buffer_data = Array(Signal(32) for a in range(4)) - self.master_write_buffer_addr = Signal(28) - self.master_write_buffer_todo = Array(Signal() for a in range(4)) - self.master_write_buffer_start = Signal() + #self.master_write_buffer_data = Array(Signal(32) for a in range(4)) + #self.master_write_buffer_addr = Signal(28) + #self.master_write_buffer_todo = Array(Signal() for a in range(4)) + #self.master_write_buffer_start = Signal() self.submodules.slave_fsm = slave_fsm = FSM(reset_state="Reset") - self.sync += platform.request("user_led", 5).eq(~slave_fsm.ongoing("Idle")) - self.sync += platform.request("user_led", 0).eq(slave_fsm.ongoing("Master_Translation")) - self.sync += platform.request("user_led", 1).eq(slave_fsm.ongoing("Master_Read") | - slave_fsm.ongoing("Master_Read_Ack") | - slave_fsm.ongoing("Master_Read_Finish") | - slave_fsm.ongoing("Master_Write") | - slave_fsm.ongoing("Master_Write_Final")) + #self.sync += platform.request("user_led", 0).eq(slave_fsm.ongoing("Master_Translation")) + #self.sync += platform.request("user_led", 1).eq(slave_fsm.ongoing("Master_Read") | + # slave_fsm.ongoing("Master_Read_Ack") | + # slave_fsm.ongoing("Master_Read_Finish") | + # slave_fsm.ongoing("Master_Write") | + # slave_fsm.ongoing("Master_Write_Final")) self.sync += platform.request("user_led", 2).eq(slave_fsm.ongoing("Slave_Do_Read") | slave_fsm.ongoing("Slave_Ack_Read_Reg_Burst") | slave_fsm.ongoing("Slave_Ack_Read_Reg_Burst_Wait_For_Data") | @@ -378,8 +379,9 @@ class SBusFPGABus(Module): slave_fsm.ongoing("Slave_Ack_Reg_Write_Byte") | slave_fsm.ongoing("Slave_Ack_Reg_Write_Byte_Wait_For_Wishbone")) - self.sync += platform.request("user_led", 6).eq(master_data_src_tosbus_fifo) - self.sync += platform.request("user_led", 7).eq(master_data_src_fromsbus_fifo) + #self.sync += platform.request("user_led", 5).eq(~slave_fsm.ongoing("Idle")) + #self.sync += platform.request("user_led", 6).eq(master_data_src_tosbus_fifo) + #self.sync += platform.request("user_led", 7).eq(master_data_src_fromsbus_fifo) slave_fsm.act("Reset", #NextValue(self.led_display.value, 0x0000000000), @@ -634,7 +636,6 @@ class SBusFPGABus(Module): (wishbone_slave_timeout == 0), ## sel == 0 so nothing to write, don't acquire the SBus NextValue(self.wishbone_slave.ack, 1), - NextValue(wishbone_slave_timeout, wishbone_default_timeout), ).Elif(SBUS_3V3_BGs_i & self.wishbone_slave.cyc & self.wishbone_slave.stb & @@ -1283,7 +1284,9 @@ class SBusFPGABus(Module): ), NextValue(burst_counter, burst_counter + 1), If(burst_counter == burst_limit_m1, - NextValue(self.master_read_buffer_start, 0), + If(~master_data_src_fromsbus_fifo, + NextValue(self.master_read_buffer_start, 0), + ), NextState("Master_Read_Finish") ).Else( Case(SBUS_3V3_ACKs_i, { @@ -1460,7 +1463,8 @@ class SBusFPGABus(Module): # ##### Slave read buffering FSM #### last_read_word_idx = Signal(2) self.submodules.wishbone_slave_read_buffering_fsm = wishbone_slave_read_buffering_fsm = FSM(reset_state="Reset") - #self.sync += led4.eq(self.master_read_buffer_start) + self.sync += platform.request("user_led", 0).eq(~wishbone_slave_read_buffering_fsm.ongoing("Idle")) + self.sync += platform.request("user_led", 1).eq(self.master_read_buffer_done[last_read_word_idx]) wishbone_slave_read_buffering_fsm.act("Reset", NextState("Idle") ) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 9b243ba..78f4152 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -184,6 +184,9 @@ class SBusFPGA(SoCCore): self.submodules.wishbone_master_sbus = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_master_sys, cd_master="sbus", cd_slave="sys") self.submodules.wishbone_slave_sys = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_slave_sbus, cd_master="sys", cd_slave="sbus") + # SPARCstation 20 slave interface to the main memory are limited to 32-bytes burst (32-bits wide, 8 word long) + # burst_size=16 should work on Ultra systems, but then they probably should go for 64-bits ET as well... + # Older systems are probably limited to burst_size=4, (it should always be available) burst_size=8 self.submodules.tosbus_fifo = ClockDomainsRenamer({"read": "sbus", "write": "sys"})(AsyncFIFOBuffered(width=(32+burst_size*32), depth=4)) self.submodules.fromsbus_fifo = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(AsyncFIFOBuffered(width=((30-log2_int(burst_size))+burst_size*32), depth=4)) From 9d88808b4f3dd53c89fa198ec1843866a616d18a Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 18 Jul 2021 13:02:43 -0400 Subject: [PATCH 44/78] SBus-OHCI driver --- NetBSD/9.0/usr/src/sys/dev/sbus/ohci_sbus.c | 197 ++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 NetBSD/9.0/usr/src/sys/dev/sbus/ohci_sbus.c diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/ohci_sbus.c b/NetBSD/9.0/usr/src/sys/dev/sbus/ohci_sbus.c new file mode 100644 index 0000000..aaeb78a --- /dev/null +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/ohci_sbus.c @@ -0,0 +1,197 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 1998, 2021 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Lennart Augustsson (lennart@augustsson.net) at + * Carlstedt Research & Technology. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +struct ohci_sbus_softc { + ohci_softc_t sc; + void *sc_ih; + int sc_node; + int sc_burst; +}; + +static int +ohci_sbus_match(device_t parent, cfdata_t match, void *aux) +{ + struct sbus_attach_args *sa = (struct sbus_attach_args *)aux; + /* generic-ohci is the default name, from device-tree */ + if (strcmp("generic-ohci", sa->sa_name) == 0) + return 1; + /* usb is the OFW name, qualified by device-type */ + const char* type = prom_getpropstring(sa->sa_node, "device-type"); + if (type != NULL && (strcmp("ohci", type) == 0)) + return 1; + return 0; +} + +static void +ohci_sbus_attach(device_t parent, device_t self, void *aux) +{ + struct ohci_sbus_softc *sc = device_private(self); + struct sbus_attach_args *sa = (struct sbus_attach_args *)aux; + struct sbus_softc *sbsc = device_private(parent); + int sbusburst; + + sc->sc.sc_dev = self; + sc->sc.sc_bus.ub_hcpriv = sc; + sc->sc.iot = sa->sa_bustag; + sc->sc.sc_size = sa->sa_size; + + /* **** SBus specific */ + sc->sc_node = sa->sa_node; + /* + * Get transfer burst size from PROM + */ + sbusburst = sbsc->sc_burst; + if (sbusburst == 0) + sbusburst = SBUS_BURST_32 - 1; /* 1->16 */ + sc->sc_burst = prom_getpropint(sc->sc_node, "burst-sizes", -1); + if (sc->sc_burst == -1) + /* take SBus burst sizes */ + sc->sc_burst = sbusburst; + /* Clamp at parent's burst sizes */ + sc->sc_burst &= sbusburst; + + if (0) { /* in PCI there's a test for some specific controller */ + sc->sc.sc_flags = OHCIF_SUPERIO; + } + + /* check if memory space access is enabled */ + /* CHECKME: not needed ? */ + + /* Map I/O registers */ + if (sbus_bus_map(sc->sc.iot, sa->sa_slot, sa->sa_offset, sc->sc.sc_size, + BUS_SPACE_MAP_LINEAR, &sc->sc.ioh) != 0) { + aprint_error_dev(self, ": cannot map registers\n"); + return; + } + + aprint_normal_dev(self, "nid 0x%x, bustag %p (0x%zx @ 0x%08lx), burst 0x%x (parent 0x%0x)\n", + sc->sc_node, + sc->sc.iot, + (size_t)sc->sc.sc_size, + sc->sc.ioh, + sc->sc_burst, + sbsc->sc_burst); + + /* we're SPECIAL!!! */ + /* sc->sc.sc_endian = OHCI_BIG_ENDIAN; */ + + /* Disable interrupts, so we don't get any spurious ones. */ + bus_space_write_4(sc->sc.iot, sc->sc.ioh, OHCI_INTERRUPT_DISABLE, + OHCI_ALL_INTRS); + + sc->sc.sc_bus.ub_dmatag = sa->sa_dmatag; + /* sc->sc.sc_bus.ub_dmatag = (void*)((char*)sc->sc.ioh + 0x10000); */ + + /* Enable the device. */ + /* CHECKME: not needed ? */ + + /* Map and establish the interrupt. */ + if (sa->sa_nintr != 0) { + sc->sc_ih = bus_intr_establish(sc->sc.iot, sa->sa_pri, + IPL_NET, ohci_intr, sc); // checkme: interrupt priority + if (sc->sc_ih == NULL) { + aprint_error_dev(self, "couldn't establish interrupt (%d)\n", sa->sa_nintr); + } else + aprint_normal_dev(self, "interrupting at %d / %d / %d\n", sa->sa_nintr, sa->sa_pri, IPL_NET); + } else { + aprint_error_dev(self, "no interrupt defined in PROM\n"); + goto fail; + } + + int err = ohci_init(&sc->sc); + if (err) { + aprint_error_dev(self, "init failed, error=%d\n", err); + goto fail; + } + + if (!pmf_device_register1(self, ohci_suspend, ohci_resume, + ohci_shutdown)) + aprint_error_dev(self, "couldn't establish power handler\n"); + + /* Attach usb device. */ + sc->sc.sc_child = config_found(self, &sc->sc.sc_bus, usbctlprint); + return; + +fail: + /* should we unmap ? */ + return; +} + +static int +ohci_sbus_detach(device_t self, int flags) +{ + struct ohci_sbus_softc *sc = device_private(self); + int rv; + + rv = ohci_detach(&sc->sc, flags); + if (rv) + return rv; + + pmf_device_deregister(self); + + ohci_shutdown(self, flags); + + /* Disable interrupts, so we don't get any spurious ones. */ + bus_space_write_4(sc->sc.iot, sc->sc.ioh, + OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS); + + /* can we disestablish the interrupt ? */ + /* can we unmap the registers ? */ + return 0; +} + +CFATTACH_DECL3_NEW(ohci_sbus, sizeof(struct ohci_sbus_softc), + ohci_sbus_match, ohci_sbus_attach, ohci_sbus_detach, ohci_activate, NULL, + ohci_childdet, DVF_DETACH_SHUTDOWN); From 22e13886c0a3faf929b6e571517d8678005d5506 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 18 Jul 2021 13:03:18 -0400 Subject: [PATCH 45/78] update READMEs --- README.md | 26 +++++++++++++++++--------- sbus-to-ztex-gateware/README.md | 9 +++++++++ 2 files changed, 26 insertions(+), 9 deletions(-) create mode 100644 sbus-to-ztex-gateware/README.md diff --git a/README.md b/README.md index 8fd0df9..9ff3923 100644 --- a/README.md +++ b/README.md @@ -12,13 +12,9 @@ To save on PCB cost, the board is smaller than a 'true' SBus board; the hardware ## Current status -2021-03-21: The adapter board seems to work fine in two different SS20. Currently the embedded PROM code exposes three devices in the FPGA: +2021-07-18: The old VHDL gateware has been replaced by a new Migen-based gateware, see below for details. -* "RDOL,cryptoengine": exposes a (way too large) polynomial multiplier to implement GCM mode and a AES block. Currently used to implement DMA-based acceleration of AES-256-CBC through /dev/crypto. Unfortunately OpenSSL doesn't support AES-256-GCM in the cryptodev engine, and disagree with NetBSD's /dev/crypto on how to implement AES-256-CTR. And the default SSH cannot use cryptodev, it closes all file descriptors after cryptodev has opened /dev/crypto... still WiP. - -* "RDOL,trng": exposes a 5 MHz counter (didn't realize the SS20 already had a good counter) and a so-far-not-true TRNG (implemented by a PRNG). The 'true' random generators I've found make Vivado screams very loudly when synthesizing... anyway both works fine in NetBSD 9.0 as a timecounter and an entropy source (which a PRNG really isn't, I know). still WiP. - -* "RDOL,sdcard": trying to expose the micro-sd card slot as a storage device, at first using SPI mode. So far reading seems to work, and NetBSD can see a Sun disklabel on the micro-sd card if it has been partitioned that way. Mounting a FAT filesystem read-only now works (with very little testing as of yet). Writing not working yet. Very much WiP. +Short version: the board enables a 256 MiB SDRAM disk (for fast swapping) and a USH OHCI host controller (for USB peripherals). ## The hardware @@ -28,9 +24,21 @@ The custom board is a SBus-compliant (I hope...) board, designed to receive a [Z The PCB was designed with Kicad 5.0 -## The gateware +## The gateware (Migen) -Directory 'sbus-to-ztex-gateware' +The gateware was rewritten from scrach in the Migen language, choosen because that's what [Litex](https://github.com/enjoy-digital/litex/) uses. +It implements a simple CPU-less Litex SoC built around a Wishbone bus, with a bridge between the SBus and the Wishbone. + +A ROM, a SDRAM controller (litedram to the on-board DDR3) and an USB OHCI (host controller, using the Litex wrapper around the [SpinalHDL](https://github.com/SpinalHDL/SpinalHDL) implementation) are connected to that bus. +Master access to the SBus by the host are routed to the Wishbone to access the various CSRs / control registers of the devices. + +The USB OHCI DMA is bridged from the Wishbone to the SBus by having the physical addresses of the Wishbone (that match the virtual addresses from NetBSD DVMA allocations) to the bridge. Reads are buffered by block of 16 bytes; currently writes are unbuffered (and somwhat slow, as they need a full SBus master cycle for every transaction of 32 bits or less). The standard NetBSD OHCI driver is used, with just a small custom SBus-OHCI driver mirroring the PCI-OHCI one. + +The SDRAM has its own custom DMA controller, using native Litedram DMA to the memory, and some FIFO to/from the SBus. A custom NetBSD driver exposes it as a drive on which you can swap. It might also be usable as a 'fast', volatile disk, but I haven't tried that yet. + +## The gateware (VHDL, obsolete) + +Directory 'sbus-to-ztex-gateware', this is obsolete and replaced by the Migen gateware above. The function embedded in the FPGA currently includes the PROM, lighting Led to display a 32-bits value, and a GHASH MAC (128 polynomial accumulator, used for the AES-GCM encryption scheme). The device is a fairly basic scale, but should be able to read from the PROM and read/write from the GCM space with any kind of SBus burst (1, 2, 4, 8 or 16 words). @@ -40,5 +48,5 @@ The gateware is currently synthesized with Vivado 2020.1 Directory 'NetBSD' -Some basic drivers for NetBSD 9.0/sparc to enable the deviced as described above. +Some basic drivers for NetBSD 9.0/sparc to enable the devices as described above. diff --git a/sbus-to-ztex-gateware/README.md b/sbus-to-ztex-gateware/README.md new file mode 100644 index 0000000..738cf27 --- /dev/null +++ b/sbus-to-ztex-gateware/README.md @@ -0,0 +1,9 @@ +## Current status + +2021-03-21: The adapter board seems to work fine in two different SS20. Currently the embedded PROM code exposes three devices in the FPGA: + +* "RDOL,cryptoengine": exposes a (way too large) polynomial multiplier to implement GCM mode and a AES block. Currently used to implement DMA-based acceleration of AES-256-CBC through /dev/crypto. Unfortunately OpenSSL doesn't support AES-256-GCM in the cryptodev engine, and disagree with NetBSD's /dev/crypto on how to implement AES-256-CTR. And the default SSH cannot use cryptodev, it closes all file descriptors after cryptodev has opened /dev/crypto... still WiP. + +* "RDOL,trng": exposes a 5 MHz counter (didn't realize the SS20 already had a good counter) and a so-far-not-true TRNG (implemented by a PRNG). The 'true' random generators I've found make Vivado screams very loudly when synthesizing... anyway both works fine in NetBSD 9.0 as a timecounter and an entropy source (which a PRNG really isn't, I know). still WiP. + +* "RDOL,sdcard": trying to expose the micro-sd card slot as a storage device, at first using SPI mode. So far reading seems to work, and NetBSD can see a Sun disklabel on the micro-sd card if it has been partitioned that way. Mounting a FAT filesystem read-only now works (with very little testing as of yet). Writing not working yet. Very much WiP. From c258b75c57c299b40eee794cc531f2ce68fdc27f Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 18 Jul 2021 13:06:50 -0400 Subject: [PATCH 46/78] typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9ff3923..7f6d26c 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ## Goal -The goal of this repository is to be able to interface a modern (2020 era) [FPGA](https://en.wikipedia.org/wiki/Field-programmable_gate_array) with a [SBus](https://en.wikipedia.org/wiki/SBus) host. SBus was widely used in SPARCstation and compatibles system in the first halt of the 90s. It was progressively displaced by PCI from the mid-90s onward, and is thoroughly obsolete. +The goal of this repository is to be able to interface a modern (2020 era) [FPGA](https://en.wikipedia.org/wiki/Field-programmable_gate_array) with a [SBus](https://en.wikipedia.org/wiki/SBus) host. SBus was widely used in SPARCstation and compatibles system in the first half of the 90s. It was progressively displaced by PCI from the mid-90s onward, and is thoroughly obsolete. So unless you're a retrocomputing enthusiast with such a machine, this is useless. To be honest, even if you are such an enthusiast, it's probably not that useful... From aea1865b7b55ae58c371188eba867f7f93a51476 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Tue, 20 Jul 2021 07:45:51 -0400 Subject: [PATCH 47/78] commit the neorv32trgn-based trng --- .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c | 39 ++ .../9.0/usr/src/sys/dev/sbus/sbusfpga_trng.c | 207 ++++++++++ .../9.0/usr/src/sys/dev/sbus/sbusfpga_trng.h | 43 ++ .../neorv32_trng_patched.vhd | 382 ++++++++++++++++++ sbus-to-ztex-gateware-migen/netbsd_csr.h | 104 +++-- sbus-to-ztex-gateware-migen/prom_csr.fth | 1 + sbus-to-ztex-gateware-migen/prom_migen.fth | 41 ++ .../sbus_to_fpga_export.py | 12 +- .../sbus_to_fpga_soc.py | 3 + .../sbus_to_fpga_trng.py | 94 +++++ 10 files changed, 877 insertions(+), 49 deletions(-) create mode 100644 NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.c create mode 100644 NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.h create mode 100644 sbus-to-ztex-gateware-migen/neorv32_trng_patched.vhd create mode 100644 sbus-to-ztex-gateware-migen/sbus_to_fpga_trng.py diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c index 92b351f..d91d394 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c @@ -55,6 +55,8 @@ __KERNEL_RCSID(0, "$NetBSD$"); #include + #include + int sbusfpga_sdram_match(device_t, cfdata_t, void *); void sbusfpga_sdram_attach(device_t, device_t, void *); @@ -446,6 +448,30 @@ sbusfpga_sdram_diskstart(device_t self, struct buf *bp) goto done; } + /* + { + paddr_t pap; + pmap_t pk = pmap_kernel(); + if (pmap_extract(pk, (vaddr_t)bp->b_data, &pap)) { + aprint_normal_dev(sc->dk.sc_dev, "KVA %p mapped to PA 0x%08lx\n", bp->b_data, pap); + if (bp->b_bcount > 4096) { + u_int32_t np = (bp->b_bcount + 4095) / 4096; + u_int32_t pn; + for (pn = 1 ; pn < np ; pn ++) { + paddr_t papn; + if (pmap_extract(pk, (vaddr_t)bp->b_data + pn * 4096, &papn)) { + if (papn != (pap + pn * 4096)) + break; + } else break; + } + aprint_normal_dev(sc->dk.sc_dev, "And we have %u out %u consecutive PA pages\n", pn, np); + } + } else { + aprint_normal_dev(sc->dk.sc_dev, "KVA %p not mapped\n", bp->b_data); + } + } + */ + if (bp->b_flags & B_READ) { unsigned char* data = bp->b_data; daddr_t blk = bp->b_rawblkno; @@ -527,18 +553,31 @@ sbusfpga_sdram_diskstart(device_t self, struct buf *bp) #define CONFIG_CSR_DATA_WIDTH 32 // define CSR_LEDS_BASE & others to avoid defining the CSRs of HW we don't handle #define CSR_LEDS_BASE +//#define CSR_DDRPHY_BASE +//#define CSR_SDRAM_BASE +//#define CSR_EXCHANGE_WITH_MEM_BASE #define CSR_SDBLOCK2MEM_BASE #define CSR_SDCORE_BASE #define CSR_SDIRQ_BASE #define CSR_SDMEM2BLOCK_BASE #define CSR_SDPHY_BASE +#define CSR_TRNG_BASE + +/* grrr */ +#define sbusfpga_exchange_with_mem_softc sbusfpga_sdram_softc +#define sbusfpga_ddrphy_softc sbusfpga_sdram_softc + #include "dev/sbus/litex_csr.h" #undef CSR_LEDS_BASE +//#undef CSR_DDRPHY_BASE +//#undef CSR_SDRAM_BASE +//#undef CSR_EXCHANGE_WITH_MEM_BASE #undef CSR_SDBLOCK2MEM_BASE #undef CSR_SDCORE_BASE #undef CSR_SDIRQ_BASE #undef CSR_SDMEM2BLOCK_BASE #undef CSR_SDPHY_BASE +#undef CSR_TRNG_BASE int dma_init(struct sbusfpga_sdram_softc *sc) { diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.c new file mode 100644 index 0000000..d67fde8 --- /dev/null +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.c @@ -0,0 +1,207 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2020 Romain Dolbeau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include + +#include + +#include + +int sbusfpga_trng_print(void *, const char *); +int sbusfpga_trng_match(device_t, cfdata_t, void *); +void sbusfpga_trng_attach(device_t, device_t, void *); + +CFATTACH_DECL_NEW(sbusfpga_trng, sizeof(struct sbusfpga_trng_softc), + sbusfpga_trng_match, sbusfpga_trng_attach, NULL, NULL); + +dev_type_open(sbusfpga_trng_open); +dev_type_close(sbusfpga_trng_close); +dev_type_ioctl(sbusfpga_trng_ioctl); + + + +const struct cdevsw sbusfpga_trng_cdevsw = { + .d_open = sbusfpga_trng_open, + .d_close = sbusfpga_trng_close, + .d_read = noread, + .d_write = nowrite, + .d_ioctl = noioctl, + .d_stop = nostop, + .d_tty = notty, + .d_poll = nopoll, + .d_mmap = nommap, + .d_kqfilter = nokqfilter, + .d_discard = nodiscard, + .d_flag = 0 +}; + +extern struct cfdriver sbusfpga_trng_cd; +int +sbusfpga_trng_open(dev_t dev, int flags, int mode, struct lwp *l) +{ + return (0); +} + +int +sbusfpga_trng_close(dev_t dev, int flags, int mode, struct lwp *l) +{ + return (0); +} + +int +sbusfpga_trng_print(void *aux, const char *busname) +{ + + sbus_print(aux, busname); + return (UNCONF); +} + +int +sbusfpga_trng_match(device_t parent, cfdata_t cf, void *aux) +{ + struct sbus_attach_args *sa = (struct sbus_attach_args *)aux; + + return (strcmp("RDOL,neorv32trng", sa->sa_name) == 0); +} + +#define CONFIG_CSR_DATA_WIDTH 32 +// define CSR_LEDS_BASE & others to avoid defining the CSRs of HW we don't handle +#define CSR_LEDS_BASE +#define CSR_DDRPHY_BASE +#define CSR_EXCHANGE_WITH_MEM_BASE +#define CSR_SDRAM_BASE +#define CSR_SDBLOCK2MEM_BASE +#define CSR_SDCORE_BASE +#define CSR_SDIRQ_BASE +#define CSR_SDMEM2BLOCK_BASE +#define CSR_SDPHY_BASE +//#define CSR_TRNG_BASE +#include "dev/sbus/litex_csr.h" +#undef CSR_LEDS_BASE +#undef CSR_DDRPHY_BASE +#undef CSR_EXCHANGE_WITH_MEM_BASE +#undef CSR_SDRAM_BASE +#undef CSR_SDBLOCK2MEM_BASE +#undef CSR_SDCORE_BASE +#undef CSR_SDIRQ_BASE +#undef CSR_SDMEM2BLOCK_BASE +#undef CSR_SDPHY_BASE +//#undef CSR_TRNG_BASE + +static void +sbusfpga_trng_getentropy(size_t nbytes, void *cookie) { + struct sbusfpga_trng_softc *sc = cookie; + size_t dbytes = 0; + int failure = 0; + while ((nbytes - dbytes) > 0) { + u_int32_t data = trng_data_read(sc); + if (data) { + rnd_add_data_sync(&sc->sc_rndsource, &data, 4, 32); // 32 is perhaps optimistic + dbytes += 4; + } else { + failure ++; + if (failure > (1+(dbytes/4))) { // something going on + aprint_normal_dev(sc->sc_dev, "out of entropy after %zd / %zd bytes\n", dbytes, nbytes); + return; + } + delay(1); + } + } + aprint_normal_dev(sc->sc_dev, "gathered %zd bytes\n", dbytes); +} + +/* + * Attach all the sub-devices we can find + */ +void +sbusfpga_trng_attach(device_t parent, device_t self, void *aux) +{ + struct sbus_attach_args *sa = aux; + struct sbusfpga_trng_softc *sc = device_private(self); + struct sbus_softc *sbsc = device_private(parent); + int node; + int sbusburst; + + sc->sc_bustag = sa->sa_bustag; + sc->sc_dev = self; + + if (sbus_bus_map(sc->sc_bustag, sa->sa_slot, sa->sa_offset, sa->sa_size, + BUS_SPACE_MAP_LINEAR, &sc->sc_bhregs_trng) != 0) { + aprint_error(": cannot map registers\n"); + return; + } + + //sc->sc_buffer = bus_space_vaddr(sc->sc_bustag, sc->sc_bhregs_trng); + sc->sc_bufsiz = sa->sa_size; + + node = sc->sc_node = sa->sa_node; + + /* + * Get transfer burst size from PROM + */ + sbusburst = sbsc->sc_burst; + if (sbusburst == 0) + sbusburst = SBUS_BURST_32 - 1; /* 1->16 */ + + sc->sc_burst = prom_getpropint(node, "burst-sizes", -1); + if (sc->sc_burst == -1) + /* take SBus burst sizes */ + sc->sc_burst = sbusburst; + + /* Clamp at parent's burst sizes */ + sc->sc_burst &= sbusburst; + + aprint_normal("\n"); + aprint_normal_dev(self, "nid 0x%x, bustag %p, burst 0x%x (parent 0x%0x)\n", + sc->sc_node, + sc->sc_bustag, + sc->sc_burst, + sbsc->sc_burst); + + trng_ctrl_write(sc, 0x02); // start the TRNG + + rndsource_setcb(&sc->sc_rndsource, sbusfpga_trng_getentropy, sc); + rnd_attach_source(&sc->sc_rndsource, device_xname(self), RND_TYPE_RNG, RND_FLAG_HASCB | RND_FLAG_COLLECT_VALUE); +} diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.h b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.h new file mode 100644 index 0000000..365b367 --- /dev/null +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.h @@ -0,0 +1,43 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2020 Romain Dolbeau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RDFPGA_TRNG_H_ +#define _RDFPGA_TRNG_H_ + +struct sbusfpga_trng_softc { + device_t sc_dev; /* us as a device */ + u_int sc_rev; /* revision */ + int sc_node; /* PROM node ID */ + int sc_burst; /* DVMA burst size in effect */ + bus_space_tag_t sc_bustag; /* bus tag */ + bus_space_handle_t sc_bhregs_trng; /* bus handle */ + int sc_bufsiz; /* Size of buffer */ + struct krndsource sc_rndsource; +}; + +#endif /* _RDFPGA_TRNG_H_ */ diff --git a/sbus-to-ztex-gateware-migen/neorv32_trng_patched.vhd b/sbus-to-ztex-gateware-migen/neorv32_trng_patched.vhd new file mode 100644 index 0000000..5934a41 --- /dev/null +++ b/sbus-to-ztex-gateware-migen/neorv32_trng_patched.vhd @@ -0,0 +1,382 @@ +-- # THIS IS NOT THE ORIGINAL FILE +-- # THIS WAS MODIFIED TO EXPOSE THE TRNG IN LITEX +-- # See the link in the copyright header to find the original file +-- +-- +-- ################################################################################################# +-- # << NEORV32 - True Random Number Generator (TRNG) >> # +-- # ********************************************************************************************* # +-- # This unit implements a *true* random number generator which uses several ring oscillators as # +-- # entropy source. The outputs of all chains are XORed and de-biased using a John von Neumann # +-- # randomness extractor. The de-biased signal is further processed by a simple LFSR for improved # +-- # whitening. # +-- # ********************************************************************************************* # +-- # BSD 3-Clause License # +-- # # +-- # Copyright (c) 2021, Stephan Nolting. All rights reserved. # +-- # # +-- # Redistribution and use in source and binary forms, with or without modification, are # +-- # permitted provided that the following conditions are met: # +-- # # +-- # 1. Redistributions of source code must retain the above copyright notice, this list of # +-- # conditions and the following disclaimer. # +-- # # +-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of # +-- # conditions and the following disclaimer in the documentation and/or other materials # +-- # provided with the distribution. # +-- # # +-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to # +-- # endorse or promote products derived from this software without specific prior written # +-- # permission. # +-- # # +-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS # +-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF # +-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # +-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # +-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE # +-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED # +-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # +-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED # +-- # OF THE POSSIBILITY OF SUCH DAMAGE. # +-- # ********************************************************************************************* # +-- # The NEORV32 Processor - https://github.com/stnolting/neorv32 (c) Stephan Nolting # +-- ################################################################################################# + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library neorv32; +-- use neorv32.neorv32_package.all; + +entity neorv32_trng is + port ( + -- host access -- + clk_i : in std_ulogic; -- global clock line +-- addr_i : in std_ulogic_vector(31 downto 0); -- address + rden_i : in std_ulogic; -- read enable + wren_i : in std_ulogic; -- write enable + data_i : in std_ulogic_vector(31 downto 0); -- data in + data_o : out std_ulogic_vector(31 downto 0)--; -- data out +-- ack_o : out std_ulogic -- transfer acknowledge + ); +end neorv32_trng; + +architecture neorv32_trng_rtl of neorv32_trng is + + -- Advanced Configuration -------------------------------------------------------------------------------- + constant num_roscs_c : natural := 4; -- total number of ring oscillators + constant num_inv_start_c : natural := 5; -- number of inverters in FIRST ring oscillator (has to be odd) + constant num_inv_inc_c : natural := 2; -- number of inverters increment for each next ring oscillator (has to be even) + constant lfsr_en_c : boolean := true; -- use LFSR-based post-processing + constant lfsr_taps_c : std_ulogic_vector(7 downto 0) := "10111000"; -- Fibonacci post-processing LFSR feedback taps + -- ------------------------------------------------------------------------------------------------------- + + -- control register bits -- + constant ctrl_data_lsb_c : natural := 0; -- r/-: Random data byte LSB + constant ctrl_data_msb_c : natural := 7; -- r/-: Random data byte MSB + -- + constant ctrl_en_c : natural := 30; -- r/w: TRNG enable + constant ctrl_valid_c : natural := 31; -- r/-: Output data valid + + -- IO space: module base address -- +-- constant hi_abb_c : natural := index_size_f(io_size_c)-1; -- high address boundary bit +-- constant lo_abb_c : natural := index_size_f(trng_size_c); -- low address boundary bit + +-- copy/pasted from the rtl/core/neorv32_package.vhd file + function xor_reduce_f(a : std_ulogic_vector) return std_ulogic is + variable tmp_v : std_ulogic; + begin + tmp_v := '0'; + if (a'low < a'high) then -- not null range? + for i in a'low to a'high loop + tmp_v := tmp_v xor a(i); + end loop; -- i + end if; + return tmp_v; + end function xor_reduce_f; + + + -- Component: Ring-Oscillator -- + component neorv32_trng_ring_osc + generic ( + NUM_INV : natural := 16 -- number of inverters in chain + ); + port ( + clk_i : in std_ulogic; + enable_i : in std_ulogic; -- enable chain input + enable_o : out std_ulogic; -- enable chain output + data_o : out std_ulogic -- sync random bit + ); + end component; + + -- access control -- + signal acc_en : std_ulogic; -- module access enable +-- signal wren : std_ulogic; -- full word write enable +-- signal rden : std_ulogic; -- read enable + + -- ring-oscillator array -- + signal osc_array_en_in : std_ulogic_vector(num_roscs_c-1 downto 0); + signal osc_array_en_out : std_ulogic_vector(num_roscs_c-1 downto 0); + signal osc_array_data : std_ulogic_vector(num_roscs_c-1 downto 0); + + -- von-Neumann de-biasing -- + type debiasing_t is record + sreg : std_ulogic_vector(1 downto 0); + state : std_ulogic; -- process de-biasing every second cycle + valid : std_ulogic; -- de-biased data + data : std_ulogic; -- de-biased data valid + end record; + signal debiasing : debiasing_t; + + -- (post-)processing core -- + type processing_t is record + enable : std_ulogic; -- TRNG enable flag + cnt : std_ulogic_vector(3 downto 0); -- bit counter + sreg : std_ulogic_vector(7 downto 0); -- data shift register + output : std_ulogic_vector(7 downto 0); -- output register + valid : std_ulogic; -- data output valid flag + end record; + signal processing : processing_t; + +begin + + -- Sanity Checks -------------------------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + assert not (num_roscs_c = 0) report "NEORV32 PROCESSOR CONFIG ERROR: TRNG - Total number of ring-oscillators has to be >0." severity error; + assert not ((num_inv_start_c mod 2) = 0) report "NEORV32 PROCESSOR CONFIG ERROR: TRNG - Number of inverters in fisrt ring has to be odd." severity error; + assert not ((num_inv_inc_c mod 2) /= 0) report "NEORV32 PROCESSOR CONFIG ERROR: TRNG - Number of inverters increment for each next ring has to be even." severity error; + + + -- Access Control ------------------------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- +-- acc_en <= '1' when (addr_i(hi_abb_c downto lo_abb_c) = trng_base_c(hi_abb_c downto lo_abb_c)) else '0'; +-- wren <= acc_en and wren_i; +-- rden <= acc_en and rden_i; + + -- Read/Write Access ---------------------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + rw_access: process(clk_i) + begin + if rising_edge(clk_i) then +-- ack_o <= wren_i or rden_i; + -- write access -- + if (wren_i = '1') then + processing.enable <= data_i(ctrl_en_c); + end if; + -- read access -- +-- data_o <= (others => '0'); + if (rden_i = '1') then + data_o(ctrl_data_msb_c downto ctrl_data_lsb_c) <= processing.output; + data_o(ctrl_en_c) <= processing.enable; + data_o(ctrl_valid_c) <= processing.valid; + end if; + end if; + end process rw_access; + + + -- Entropy Source ------------------------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + neorv32_trng_ring_osc_inst: + for i in 0 to num_roscs_c-1 generate + neorv32_trng_ring_osc_inst_i: neorv32_trng_ring_osc + generic map ( + NUM_INV => num_inv_start_c + (i*num_inv_inc_c) -- number of inverters in chain + ) + port map ( + clk_i => clk_i, + enable_i => osc_array_en_in(i), + enable_o => osc_array_en_out(i), + data_o => osc_array_data(i) + ); + end generate; + + -- RO enable chain -- + array_intercon: process(processing.enable, osc_array_en_out) + begin + for i in 0 to num_roscs_c-1 loop + if (i = 0) then -- start of enable chain + osc_array_en_in(i) <= processing.enable; + else + osc_array_en_in(i) <= osc_array_en_out(i-1); + end if; + end loop; -- i + end process array_intercon; + + + -- John von Neumann De-Biasing ------------------------------------------------------------ + -- ------------------------------------------------------------------------------------------- + neumann_debiasing_sync: process(clk_i) + begin + if rising_edge(clk_i) then + debiasing.sreg <= debiasing.sreg(debiasing.sreg'left-1 downto 0) & xor_reduce_f(osc_array_data); + debiasing.state <= (not debiasing.state) and osc_array_en_out(num_roscs_c-1); -- start toggling when last RO is enabled -> process in every second cycle + end if; + end process neumann_debiasing_sync; + + -- Edge detector -- + neumann_debiasing_comb: process(debiasing) + variable tmp_v : std_ulogic_vector(2 downto 0); + begin + -- check groups of two non-overlapping bits from the input stream + tmp_v := debiasing.state & debiasing.sreg; + case tmp_v is + when "101" => debiasing.valid <= '1'; debiasing.data <= '1'; -- rising edge -> '1' + when "110" => debiasing.valid <= '1'; debiasing.data <= '0'; -- falling edge -> '0' + when others => debiasing.valid <= '0'; debiasing.data <= '0'; -- no valid data + end case; + end process neumann_debiasing_comb; + + + -- Processing Core ------------------------------------------------------------------------ + -- ------------------------------------------------------------------------------------------- + processing_core: process(clk_i) + begin + if rising_edge(clk_i) then + -- sample random data bit and apply post-processing -- + if (processing.enable = '0') then + processing.cnt <= (others => '0'); + processing.sreg <= (others => '0'); + elsif (debiasing.valid = '1') then -- valid random sample? + if (processing.cnt = "1000") then + processing.cnt <= (others => '0'); + else + processing.cnt <= std_ulogic_vector(unsigned(processing.cnt) + 1); + end if; + if (lfsr_en_c = true) then -- LFSR post-processing + processing.sreg <= processing.sreg(processing.sreg'left-1 downto 0) & ((not xor_reduce_f(processing.sreg and lfsr_taps_c)) xnor debiasing.data); + else -- NO post-processing + processing.sreg <= processing.sreg(processing.sreg'left-1 downto 0) & debiasing.data; + end if; + end if; + + -- data output register -- + if (processing.cnt = "1000") then + processing.output <= processing.sreg; + end if; + + -- data ready/valid flag -- + if (processing.cnt = "1000") then -- new sample ready? + processing.valid <= '1'; + elsif (processing.enable = '0') or (rden_i = '1') then -- clear when deactivated or on data read + processing.valid <= '0'; + end if; + end if; + end process processing_core; + + +end neorv32_trng_rtl; + + +-- ############################################################################################################################ +-- ############################################################################################################################ + + +-- ################################################################################################# +-- # << NEORV32 - True Random Number Generator (TRNG) - Ring-Oscillator-Based Entropy Source >> # +-- # ********************************************************************************************* # +-- # An inverter chain (ring oscillator) is used as entropy source. # +-- # The inverter chain is constructed as an "asynchronous" LFSR. The single inverters are # +-- # connected via latches that are used to enable/disable the TRNG. Also, these latches are used # +-- # as additional delay element. By using unique enable signals for each latch, the synthesis # +-- # tool cannot "optimize" (=remove) any of the inverters out of the design. Furthermore, the # +-- # latches prevent the synthesis tool from detecting combinatorial loops. # +-- # ********************************************************************************************* # +-- # BSD 3-Clause License # +-- # # +-- # Copyright (c) 2021, Stephan Nolting. All rights reserved. # +-- # # +-- # Redistribution and use in source and binary forms, with or without modification, are # +-- # permitted provided that the following conditions are met: # +-- # # +-- # 1. Redistributions of source code must retain the above copyright notice, this list of # +-- # conditions and the following disclaimer. # +-- # # +-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of # +-- # conditions and the following disclaimer in the documentation and/or other materials # +-- # provided with the distribution. # +-- # # +-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to # +-- # endorse or promote products derived from this software without specific prior written # +-- # permission. # +-- # # +-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS # +-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF # +-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # +-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # +-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE # +-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED # +-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # +-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED # +-- # OF THE POSSIBILITY OF SUCH DAMAGE. # +-- # ********************************************************************************************* # +-- # The NEORV32 Processor - https://github.com/stnolting/neorv32 (c) Stephan Nolting # +-- ################################################################################################# + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library neorv32; +-- use neorv32.neorv32_package.all; + +entity neorv32_trng_ring_osc is + generic ( + NUM_INV : natural := 15 -- number of inverters in chain + ); + port ( + clk_i : in std_ulogic; + enable_i : in std_ulogic; -- enable chain input + enable_o : out std_ulogic; -- enable chain output + data_o : out std_ulogic -- sync random bit + ); +end neorv32_trng_ring_osc; + +architecture neorv32_trng_ring_osc_rtl of neorv32_trng_ring_osc is + + signal inv_chain : std_ulogic_vector(NUM_INV-1 downto 0); -- oscillator chain + signal enable_sreg : std_ulogic_vector(NUM_INV-1 downto 0); -- enable shift register + signal sync_ff : std_ulogic_vector(1 downto 0); -- output signal synchronizer + +begin + + -- Ring Oscillator ------------------------------------------------------------------------ + -- ------------------------------------------------------------------------------------------- + ring_osc: process(enable_i, enable_sreg, inv_chain) + begin + -- Using individual enable signals for each inverter - derived from a shift register - to prevent the synthesis tool + -- from removing all but one inverter (since they implement "logical identical functions"). + -- This also allows to make the TRNG platform independent. + for i in 0 to NUM_INV-1 loop -- inverters in chain + if (enable_i = '0') then -- start with a defined state (latch reset) + inv_chain(i) <= '0'; + elsif (enable_sreg(i) = '1') then + -- here we have the inverter chain -- + if (i = NUM_INV-1) then -- left-most inverter? + inv_chain(i) <= not inv_chain(0); + else + inv_chain(i) <= not inv_chain(i+1); + end if; + end if; + end loop; -- i + end process ring_osc; + + + -- Control -------------------------------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + ctrl_unit: process(clk_i) + begin + if rising_edge(clk_i) then + enable_sreg <= enable_sreg(enable_sreg'left-1 downto 0) & enable_i; -- activate right-most inverter first + sync_ff <= sync_ff(0) & inv_chain(0); -- synchronize to prevent metastability + end if; + end process ctrl_unit; + + -- output for "enable chain" -- + enable_o <= enable_sreg(enable_sreg'left); + + -- rnd output -- + data_o <= sync_ff(1); + + +end neorv32_trng_ring_osc_rtl; diff --git a/sbus-to-ztex-gateware-migen/netbsd_csr.h b/sbus-to-ztex-gateware-migen/netbsd_csr.h index 534c79e..cf7ec66 100644 --- a/sbus-to-ztex-gateware-migen/netbsd_csr.h +++ b/sbus-to-ztex-gateware-migen/netbsd_csr.h @@ -1,5 +1,5 @@ //-------------------------------------------------------------------------------- -// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-07-18 12:35:05 +// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-07-20 07:32:43 //-------------------------------------------------------------------------------- #ifndef __GENERATED_CSR_H #define __GENERATED_CSR_H @@ -12,10 +12,10 @@ #define CSR_LEDS_BASE (CSR_BASE + 0x0L) #define CSR_LEDS_OUT_ADDR (CSR_LEDS_BASE + 0x0L) #define CSR_LEDS_OUT_SIZE 1 -static inline uint32_t leds_out_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t leds_out_read(struct sbusfpga_leds_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_leds, 0x0L); } -static inline void leds_out_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void leds_out_write(struct sbusfpga_leds_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_leds, 0x0L, v); } #endif // CSR_LEDS_BASE @@ -25,106 +25,106 @@ static inline void leds_out_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { #define CSR_DDRPHY_BASE (CSR_BASE + 0x1000L) #define CSR_DDRPHY_RST_ADDR (CSR_DDRPHY_BASE + 0x0L) #define CSR_DDRPHY_RST_SIZE 1 -static inline uint32_t ddrphy_rst_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t ddrphy_rst_read(struct sbusfpga_ddrphy_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x0L); } -static inline void ddrphy_rst_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void ddrphy_rst_write(struct sbusfpga_ddrphy_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x0L, v); } #define CSR_DDRPHY_HALF_SYS8X_TAPS_ADDR (CSR_DDRPHY_BASE + 0x4L) #define CSR_DDRPHY_HALF_SYS8X_TAPS_SIZE 1 -static inline uint32_t ddrphy_half_sys8x_taps_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t ddrphy_half_sys8x_taps_read(struct sbusfpga_ddrphy_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x4L); } -static inline void ddrphy_half_sys8x_taps_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void ddrphy_half_sys8x_taps_write(struct sbusfpga_ddrphy_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x4L, v); } #define CSR_DDRPHY_WLEVEL_EN_ADDR (CSR_DDRPHY_BASE + 0x8L) #define CSR_DDRPHY_WLEVEL_EN_SIZE 1 -static inline uint32_t ddrphy_wlevel_en_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t ddrphy_wlevel_en_read(struct sbusfpga_ddrphy_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x8L); } -static inline void ddrphy_wlevel_en_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void ddrphy_wlevel_en_write(struct sbusfpga_ddrphy_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x8L, v); } #define CSR_DDRPHY_WLEVEL_STROBE_ADDR (CSR_DDRPHY_BASE + 0xcL) #define CSR_DDRPHY_WLEVEL_STROBE_SIZE 1 -static inline uint32_t ddrphy_wlevel_strobe_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t ddrphy_wlevel_strobe_read(struct sbusfpga_ddrphy_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0xcL); } -static inline void ddrphy_wlevel_strobe_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void ddrphy_wlevel_strobe_write(struct sbusfpga_ddrphy_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0xcL, v); } #define CSR_DDRPHY_DLY_SEL_ADDR (CSR_DDRPHY_BASE + 0x10L) #define CSR_DDRPHY_DLY_SEL_SIZE 1 -static inline uint32_t ddrphy_dly_sel_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t ddrphy_dly_sel_read(struct sbusfpga_ddrphy_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x10L); } -static inline void ddrphy_dly_sel_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void ddrphy_dly_sel_write(struct sbusfpga_ddrphy_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x10L, v); } #define CSR_DDRPHY_RDLY_DQ_RST_ADDR (CSR_DDRPHY_BASE + 0x14L) #define CSR_DDRPHY_RDLY_DQ_RST_SIZE 1 -static inline uint32_t ddrphy_rdly_dq_rst_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t ddrphy_rdly_dq_rst_read(struct sbusfpga_ddrphy_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x14L); } -static inline void ddrphy_rdly_dq_rst_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void ddrphy_rdly_dq_rst_write(struct sbusfpga_ddrphy_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x14L, v); } #define CSR_DDRPHY_RDLY_DQ_INC_ADDR (CSR_DDRPHY_BASE + 0x18L) #define CSR_DDRPHY_RDLY_DQ_INC_SIZE 1 -static inline uint32_t ddrphy_rdly_dq_inc_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t ddrphy_rdly_dq_inc_read(struct sbusfpga_ddrphy_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x18L); } -static inline void ddrphy_rdly_dq_inc_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void ddrphy_rdly_dq_inc_write(struct sbusfpga_ddrphy_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x18L, v); } #define CSR_DDRPHY_RDLY_DQ_BITSLIP_RST_ADDR (CSR_DDRPHY_BASE + 0x1cL) #define CSR_DDRPHY_RDLY_DQ_BITSLIP_RST_SIZE 1 -static inline uint32_t ddrphy_rdly_dq_bitslip_rst_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t ddrphy_rdly_dq_bitslip_rst_read(struct sbusfpga_ddrphy_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x1cL); } -static inline void ddrphy_rdly_dq_bitslip_rst_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void ddrphy_rdly_dq_bitslip_rst_write(struct sbusfpga_ddrphy_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x1cL, v); } #define CSR_DDRPHY_RDLY_DQ_BITSLIP_ADDR (CSR_DDRPHY_BASE + 0x20L) #define CSR_DDRPHY_RDLY_DQ_BITSLIP_SIZE 1 -static inline uint32_t ddrphy_rdly_dq_bitslip_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t ddrphy_rdly_dq_bitslip_read(struct sbusfpga_ddrphy_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x20L); } -static inline void ddrphy_rdly_dq_bitslip_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void ddrphy_rdly_dq_bitslip_write(struct sbusfpga_ddrphy_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x20L, v); } #define CSR_DDRPHY_WDLY_DQ_BITSLIP_RST_ADDR (CSR_DDRPHY_BASE + 0x24L) #define CSR_DDRPHY_WDLY_DQ_BITSLIP_RST_SIZE 1 -static inline uint32_t ddrphy_wdly_dq_bitslip_rst_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t ddrphy_wdly_dq_bitslip_rst_read(struct sbusfpga_ddrphy_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x24L); } -static inline void ddrphy_wdly_dq_bitslip_rst_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void ddrphy_wdly_dq_bitslip_rst_write(struct sbusfpga_ddrphy_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x24L, v); } #define CSR_DDRPHY_WDLY_DQ_BITSLIP_ADDR (CSR_DDRPHY_BASE + 0x28L) #define CSR_DDRPHY_WDLY_DQ_BITSLIP_SIZE 1 -static inline uint32_t ddrphy_wdly_dq_bitslip_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t ddrphy_wdly_dq_bitslip_read(struct sbusfpga_ddrphy_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x28L); } -static inline void ddrphy_wdly_dq_bitslip_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void ddrphy_wdly_dq_bitslip_write(struct sbusfpga_ddrphy_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x28L, v); } #define CSR_DDRPHY_RDPHASE_ADDR (CSR_DDRPHY_BASE + 0x2cL) #define CSR_DDRPHY_RDPHASE_SIZE 1 -static inline uint32_t ddrphy_rdphase_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t ddrphy_rdphase_read(struct sbusfpga_ddrphy_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x2cL); } -static inline void ddrphy_rdphase_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void ddrphy_rdphase_write(struct sbusfpga_ddrphy_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x2cL, v); } #define CSR_DDRPHY_WRPHASE_ADDR (CSR_DDRPHY_BASE + 0x30L) #define CSR_DDRPHY_WRPHASE_SIZE 1 -static inline uint32_t ddrphy_wrphase_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t ddrphy_wrphase_read(struct sbusfpga_ddrphy_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x30L); } -static inline void ddrphy_wrphase_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void ddrphy_wrphase_write(struct sbusfpga_ddrphy_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_ddrphy, 0x30L, v); } #endif // CSR_DDRPHY_BASE @@ -134,66 +134,66 @@ static inline void ddrphy_wrphase_write(struct sbusfpga_sdram_softc *sc, uint32_ #define CSR_EXCHANGE_WITH_MEM_BASE (CSR_BASE + 0x2000L) #define CSR_EXCHANGE_WITH_MEM_BLK_SIZE_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x0L) #define CSR_EXCHANGE_WITH_MEM_BLK_SIZE_SIZE 1 -static inline uint32_t exchange_with_mem_blk_size_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t exchange_with_mem_blk_size_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x0L); } #define CSR_EXCHANGE_WITH_MEM_BLK_BASE_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x4L) #define CSR_EXCHANGE_WITH_MEM_BLK_BASE_SIZE 1 -static inline uint32_t exchange_with_mem_blk_base_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t exchange_with_mem_blk_base_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x4L); } #define CSR_EXCHANGE_WITH_MEM_MEM_SIZE_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x8L) #define CSR_EXCHANGE_WITH_MEM_MEM_SIZE_SIZE 1 -static inline uint32_t exchange_with_mem_mem_size_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t exchange_with_mem_mem_size_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x8L); } #define CSR_EXCHANGE_WITH_MEM_BLK_ADDR_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0xcL) #define CSR_EXCHANGE_WITH_MEM_BLK_ADDR_SIZE 1 -static inline uint32_t exchange_with_mem_blk_addr_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t exchange_with_mem_blk_addr_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0xcL); } -static inline void exchange_with_mem_blk_addr_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void exchange_with_mem_blk_addr_write(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0xcL, v); } #define CSR_EXCHANGE_WITH_MEM_DMA_ADDR_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x10L) #define CSR_EXCHANGE_WITH_MEM_DMA_ADDR_SIZE 1 -static inline uint32_t exchange_with_mem_dma_addr_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t exchange_with_mem_dma_addr_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x10L); } -static inline void exchange_with_mem_dma_addr_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void exchange_with_mem_dma_addr_write(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x10L, v); } #define CSR_EXCHANGE_WITH_MEM_BLK_CNT_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x14L) #define CSR_EXCHANGE_WITH_MEM_BLK_CNT_SIZE 1 -static inline uint32_t exchange_with_mem_blk_cnt_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t exchange_with_mem_blk_cnt_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x14L); } -static inline void exchange_with_mem_blk_cnt_write(struct sbusfpga_sdram_softc *sc, uint32_t v) { +static inline void exchange_with_mem_blk_cnt_write(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x14L, v); } #define CSR_EXCHANGE_WITH_MEM_LAST_BLK_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x18L) #define CSR_EXCHANGE_WITH_MEM_LAST_BLK_SIZE 1 -static inline uint32_t exchange_with_mem_last_blk_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t exchange_with_mem_last_blk_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x18L); } #define CSR_EXCHANGE_WITH_MEM_LAST_DMA_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x1cL) #define CSR_EXCHANGE_WITH_MEM_LAST_DMA_SIZE 1 -static inline uint32_t exchange_with_mem_last_dma_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t exchange_with_mem_last_dma_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x1cL); } #define CSR_EXCHANGE_WITH_MEM_BLK_REM_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x20L) #define CSR_EXCHANGE_WITH_MEM_BLK_REM_SIZE 1 -static inline uint32_t exchange_with_mem_blk_rem_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t exchange_with_mem_blk_rem_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x20L); } #define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x24L) #define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_SIZE 1 -static inline uint32_t exchange_with_mem_dma_status_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t exchange_with_mem_dma_status_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x24L); } #define CSR_EXCHANGE_WITH_MEM_WR_TOSDRAM_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x28L) #define CSR_EXCHANGE_WITH_MEM_WR_TOSDRAM_SIZE 1 -static inline uint32_t exchange_with_mem_wr_tosdram_read(struct sbusfpga_sdram_softc *sc) { +static inline uint32_t exchange_with_mem_wr_tosdram_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x28L); } #endif // CSR_EXCHANGE_WITH_MEM_BASE @@ -467,4 +467,22 @@ static inline uint32_t sdram_dfii_pi3_rddata_read(struct sbusfpga_sdram_softc *s } #endif // CSR_SDRAM_BASE +/* trng */ +#ifndef CSR_TRNG_BASE +#define CSR_TRNG_BASE (CSR_BASE + 0x4000L) +#define CSR_TRNG_CTRL_ADDR (CSR_TRNG_BASE + 0x0L) +#define CSR_TRNG_CTRL_SIZE 1 +static inline uint32_t trng_ctrl_read(struct sbusfpga_trng_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_trng, 0x0L); +} +static inline void trng_ctrl_write(struct sbusfpga_trng_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_trng, 0x0L, v); +} +#define CSR_TRNG_DATA_ADDR (CSR_TRNG_BASE + 0x4L) +#define CSR_TRNG_DATA_SIZE 1 +static inline uint32_t trng_data_read(struct sbusfpga_trng_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_trng, 0x4L); +} +#endif // CSR_TRNG_BASE + #endif diff --git a/sbus-to-ztex-gateware-migen/prom_csr.fth b/sbus-to-ztex-gateware-migen/prom_csr.fth index d0b8e76..44c9655 100644 --- a/sbus-to-ztex-gateware-migen/prom_csr.fth +++ b/sbus-to-ztex-gateware-migen/prom_csr.fth @@ -3,6 +3,7 @@ h# 40000 constant sbusfpga_csraddr_leds h# 41000 constant sbusfpga_csraddr_ddrphy h# 42000 constant sbusfpga_csraddr_exchange_with_mem h# 43000 constant sbusfpga_csraddr_sdram +h# 44000 constant sbusfpga_csraddr_trng h# 80000 constant sbusfpga_regionaddr_usb_host_ctrl h# 0 constant sbusfpga_regionaddr_prom h# 80000000 constant sbusfpga_regionaddr_main_ram diff --git a/sbus-to-ztex-gateware-migen/prom_migen.fth b/sbus-to-ztex-gateware-migen/prom_migen.fth index 6b0821d..1170c1a 100644 --- a/sbus-to-ztex-gateware-migen/prom_migen.fth +++ b/sbus-to-ztex-gateware-migen/prom_migen.fth @@ -5,6 +5,7 @@ fload prom_csr.fth \ fload v2compat.fth +\ \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ LEDs \ Absolute minimal stuff; name & registers def. " RDOL,led" device-name my-address sbusfpga_csraddr_leds + my-space h# 4 reg @@ -38,6 +39,7 @@ my-space constant my-sbus-space \ tokenizer[ 01 emit-byte h# 27 emit-byte h# 01 emit-byte h# 1f emit-byte ]tokenizer \ The OpenFirmware tokenizer does accept the 'clean' syntax finish-device +\ \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ USB OHCI new-device \ Absolute minimal stuff; name & registers def. @@ -86,6 +88,7 @@ my-reset! \ tokenizer[ 01 emit-byte h# 27 emit-byte h# 01 emit-byte h# 1f emit-byte ]tokenizer \ The OpenFirmware tokenizer does accept the 'clean' syntax finish-device +\ \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ SDRAM new-device \ Absolute minimal stuff; name & registers def. @@ -129,4 +132,42 @@ my-space constant my-sbus-space \ init! + +\ OpenBIOS tokenizer won't accept finish-device without new-device +\ Cheat by using the tokenizer so we can do OpenBoot 2.x siblings +\ tokenizer[ 01 emit-byte h# 27 emit-byte h# 01 emit-byte h# 1f emit-byte ]tokenizer +\ The OpenFirmware tokenizer does accept the 'clean' syntax +finish-device +\ \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ TRNG +new-device + +\ Absolute minimal stuff; name & registers def. +" RDOL,neorv32trng" device-name + +my-address sbusfpga_csraddr_trng + my-space h# 8 reg +\ we don't support ET or HWORD +h# 7d xdrint " slave-burst-sizes" attribute +h# 7d xdrint " burst-sizes" attribute + +headers +-1 instance value trng-virt +my-address constant my-sbus-address +my-space constant my-sbus-space + +: map-in ( adr space size -- virt ) " map-in" $call-parent ; +: map-out ( virt size -- ) " map-out" $call-parent ; + +: map-in-trng ( -- ) my-sbus-address sbusfpga_csraddr_trng + my-sbus-space h# 8 map-in is trng-virt ; +: map-out-trng ( -- ) trng-virt h# 8 map-out ; + +\ external + +: disabletrng! ( -- ) + map-in-trng + 1 trng-virt l! ( pattern virt -- ) + map-out-trng +; + +disabletrng! + end0 diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_export.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_export.py index 3859bb6..e6a2f1a 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_export.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_export.py @@ -43,7 +43,7 @@ def _get_rw_functions_c(name, csr_name, reg_base, area_base, nwords, busword, al stride = alignment//8; if with_access_functions: - r += "static inline {} {}_read(struct sbusfpga_sdram_softc *sc) {{\n".format(ctype, reg_name) + r += "static inline {} {}_read(struct sbusfpga_{}_softc *sc) {{\n".format(ctype, reg_name, name) if nwords > 1: r += "\t{} r = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_{}, {}L);\n".format(ctype, name, hex(reg_base - area_base)) for sub in range(1, nwords): @@ -54,7 +54,7 @@ def _get_rw_functions_c(name, csr_name, reg_base, area_base, nwords, busword, al r += "\treturn bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_{}, {}L);\n}}\n".format(name, hex(reg_base - area_base)) if not read_only: - r += "static inline void {}_write(struct sbusfpga_sdram_softc *sc, {} v) {{\n".format(reg_name, ctype) + r += "static inline void {}_write(struct sbusfpga_{}_softc *sc, {} v) {{\n".format(reg_name, name, ctype) for sub in range(nwords): shift = (nwords-sub-1)*busword if shift: @@ -102,18 +102,18 @@ def get_csr_header(regions, constants, csr_base=None, with_access_functions=True if with_access_functions and csr.size <= 32: # FIXME: Implement extract/read functions for csr.size > 32-bit. reg_name = name + "_" + csr.name.lower() field_name = reg_name + "_" + field.name.lower() - r += "static inline uint32_t " + field_name + "_extract(struct sbusfpga_sdram_softc *sc, uint32_t oldword) {\n" + r += "static inline uint32_t " + field_name + "_extract(struct sbusfpga_" + name + "_softc *sc, uint32_t oldword) {\n" r += "\tuint32_t mask = ((1 << " + size + ")-1);\n" r += "\treturn ( (oldword >> " + offset + ") & mask );\n}\n" - r += "static inline uint32_t " + field_name + "_read(struct sbusfpga_sdram_softc *sc) {\n" + r += "static inline uint32_t " + field_name + "_read(struct sbusfpga_" + name + "_softc *sc) {\n" r += "\tuint32_t word = " + reg_name + "_read(sc);\n" r += "\treturn " + field_name + "_extract(sc, word);\n" r += "}\n" if not getattr(csr, "read_only", False): - r += "static inline uint32_t " + field_name + "_replace(struct sbusfpga_sdram_softc *sc, uint32_t oldword, uint32_t plain_value) {\n" + r += "static inline uint32_t " + field_name + "_replace(struct sbusfpga_" + name + "_softc *sc, uint32_t oldword, uint32_t plain_value) {\n" r += "\tuint32_t mask = ((1 << " + size + ")-1);\n" r += "\treturn (oldword & (~(mask << " + offset + "))) | (mask & plain_value)<< " + offset + " ;\n}\n" - r += "static inline void " + field_name + "_write(struct sbusfpga_sdram_softc *sc, uint32_t plain_value) {\n" + r += "static inline void " + field_name + "_write(struct sbusfpga_" + name + "_softc *sc, uint32_t plain_value) {\n" r += "\tuint32_t oldword = " + reg_name + "_read(sc);\n" r += "\tuint32_t newword = " + field_name + "_replace(sc, oldword, plain_value);\n" r += "\t" + reg_name + "_write(sc, newword);\n" diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 78f4152..e8faeb7 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -18,6 +18,7 @@ from litedram.phy import s7ddrphy from sbus_to_fpga_fsm import * from sbus_to_fpga_blk_dma import * +from sbus_to_fpga_trng import * from litedram.frontend.dma import * @@ -226,6 +227,8 @@ class SBusFPGA(SoCCore): #self.add_sdcard() + self.submodules.trng = NeoRV32TrngWrapper(platform=platform) + def main(): parser = argparse.ArgumentParser(description="SbusFPGA") parser.add_argument("--build", action="store_true", help="Build bitstream") diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_trng.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_trng.py new file mode 100644 index 0000000..5db0f8e --- /dev/null +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_trng.py @@ -0,0 +1,94 @@ +from migen import * +from migen.genlib.fifo import * +from litex.soc.interconnect.csr import * + +class NeoRV32TrngWrapper(Module, AutoCSR): + def __init__(self, platform): + self.add_sources(platform) + + rden_i = Signal() + wren_i = Signal() + data_i = Signal(32) + data_o = Signal(32) + + self.ctrl = CSRStorage(32, description = "CTRL register; bit 0 : disable ; bit 1 : enable") + self.data = CSRStatus(32, description = "Rnd Data or 0") + + self.submodules.ctrl_fsm = ctrl_fsm = FSM(reset_state = "Reset") + ctrl_fsm.act("Reset", + NextState("Idle") + ) + ctrl_fsm.act("Idle", + If(self.ctrl.re, # someone has written control + If(self.ctrl.storage[0], + data_i.eq(0), + wren_i.eq(1), + ).Elif(self.ctrl.storage[1], + data_i.eq(0xffffffff), + wren_i.eq(1), + ) + ), + If(self.data.we, # someone has read the data, reset so that the same value is never read twice + NextValue(self.data.status, 0), + ) + ) + + # fill out an intermediate buffer, one byte every 11 cycles + # then copy the 4 bytes to data CST and do it all over again + buf = Array(Signal(8) for a in range(4)) + idx = Signal(2) + cnt = Signal(4) + self.submodules.upd_fsm = upd_fsm = FSM(reset_state = "Reset") + upd_fsm.act("Reset", + NextValue(cnt, 11), + NextValue(idx, 0), + NextState("ByteWait") + ) + upd_fsm.act("ByteWait", + If(cnt == 0, + rden_i.eq(1), + NextState("ByteWrite"), + ).Else( + NextValue(cnt, cnt - 1) + ) + ) + upd_fsm.act("ByteWrite", + If (data_o[31] & data_o[30], + NextValue(buf[idx], data_o[0:8]), + NextValue(cnt, 11), + NextValue(idx, idx + 1), + If(idx == 3, + NextState("Copy"), + ).Else( + NextState("ByteWait"), + ) + ).Else( # try again + NextValue(cnt, 11), + NextState("ByteWait"), + ) + ) + upd_fsm.act("Copy", + NextValue(self.data.status, Cat(buf[0], buf[1], buf[2], buf[3])), + NextValue(buf[0], 0), + NextValue(buf[1], 0), + NextValue(buf[2], 0), + NextValue(buf[3], 0), + NextState("ByteWait") + ) + + + + + self.specials += Instance(self.get_netlist_name(), + i_clk_i = ClockSignal("sys"), + i_rden_i = rden_i, + i_wren_i = wren_i, + i_data_i = data_i, + o_data_o = data_o) + + def get_netlist_name(self): + return "neorv32_trng" + + def add_sources(self, platform): + platform.add_source("neorv32_trng_patched.vhd", "vhdl") + From b8e9211a7783d2db229a4be13aafecb8ebe41a81 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 24 Jul 2021 11:42:53 -0400 Subject: [PATCH 48/78] driver update --- NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.c | 8 ++++++-- NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.h | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.c index d67fde8..6218781 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.c @@ -109,6 +109,7 @@ sbusfpga_trng_match(device_t parent, cfdata_t cf, void *aux) #define CONFIG_CSR_DATA_WIDTH 32 // define CSR_LEDS_BASE & others to avoid defining the CSRs of HW we don't handle #define CSR_LEDS_BASE +#define CSR_CURVE25519ENGINE_BASE #define CSR_DDRPHY_BASE #define CSR_EXCHANGE_WITH_MEM_BASE #define CSR_SDRAM_BASE @@ -120,6 +121,7 @@ sbusfpga_trng_match(device_t parent, cfdata_t cf, void *aux) //#define CSR_TRNG_BASE #include "dev/sbus/litex_csr.h" #undef CSR_LEDS_BASE +#undef CSR_CURVE25519ENGINE_BASE #undef CSR_DDRPHY_BASE #undef CSR_EXCHANGE_WITH_MEM_BASE #undef CSR_SDRAM_BASE @@ -135,7 +137,7 @@ sbusfpga_trng_getentropy(size_t nbytes, void *cookie) { struct sbusfpga_trng_softc *sc = cookie; size_t dbytes = 0; int failure = 0; - while ((nbytes - dbytes) > 0) { + while (nbytes > dbytes) { u_int32_t data = trng_data_read(sc); if (data) { rnd_add_data_sync(&sc->sc_rndsource, &data, 4, 32); // 32 is perhaps optimistic @@ -148,8 +150,10 @@ sbusfpga_trng_getentropy(size_t nbytes, void *cookie) { } delay(1); } + if (((dbytes%32)==0) && (nbytes > dbytes)) + delay(1); // let the hardware breathes if the OS needs a lof of bytes } - aprint_normal_dev(sc->sc_dev, "gathered %zd bytes\n", dbytes); + aprint_normal_dev(sc->sc_dev, "gathered %zd bytes [%d]\n", dbytes, failure); } /* diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.h b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.h index 365b367..86557de 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.h +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.h @@ -26,8 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _RDFPGA_TRNG_H_ -#define _RDFPGA_TRNG_H_ +#ifndef _SBUSFPGA_TRNG_H_ +#define _SBUSFPGA_TRNG_H_ struct sbusfpga_trng_softc { device_t sc_dev; /* us as a device */ @@ -40,4 +40,4 @@ struct sbusfpga_trng_softc { struct krndsource sc_rndsource; }; -#endif /* _RDFPGA_TRNG_H_ */ +#endif /* _SBUSFPGA_TRNG_H_ */ From ef215942c063397def9ad627fab12755c18ebcb3 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 24 Jul 2021 11:43:12 -0400 Subject: [PATCH 49/78] bytes not blocks --- NetBSD/9.0/usr/src/sys/dev/sbus/rdfpga_sdcard.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/rdfpga_sdcard.c b/NetBSD/9.0/usr/src/sys/dev/sbus/rdfpga_sdcard.c index 8cee48e..c8968ba 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/rdfpga_sdcard.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/rdfpga_sdcard.c @@ -617,8 +617,8 @@ rdfpga_sdcard_size(dev_t dev) { static void rdfpga_sdcard_minphys(struct buf *bp) { - if (bp->b_bcount > (RDFPGA_SDCARD_VAL_DMA_MAX_SZ/512)) - bp->b_bcount = (RDFPGA_SDCARD_VAL_DMA_MAX_SZ/512); + if (bp->b_bcount > RDFPGA_SDCARD_VAL_DMA_MAX_SZ) + bp->b_bcount = RDFPGA_SDCARD_VAL_DMA_MAX_SZ; } static int From acda04f456acc4a5f2c98ef8a3b943aa1445f57d Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 24 Jul 2021 11:53:05 -0400 Subject: [PATCH 50/78] SDRAM driver now complete enough to sunlabel/newfs --- .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c | 38 ++++++++++++++++--- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c index d91d394..ee8a3c8 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c @@ -65,6 +65,8 @@ CFATTACH_DECL_NEW(sbusfpga_sdram, sizeof(struct sbusfpga_sdram_softc), dev_type_open(sbusfpga_sdram_open); dev_type_close(sbusfpga_sdram_close); +dev_type_read(sbusfpga_sdram_read); +dev_type_write(sbusfpga_sdram_write); dev_type_ioctl(sbusfpga_sdram_ioctl); dev_type_strategy(sbusfpga_sdram_strategy); dev_type_size(sbusfpga_sdram_size); @@ -83,8 +85,8 @@ const struct bdevsw sbusfpga_sdram_bdevsw = { const struct cdevsw sbusfpga_sdram_cdevsw = { .d_open = sbusfpga_sdram_open, .d_close = sbusfpga_sdram_close, - .d_read = noread, - .d_write = nowrite, + .d_read = sbusfpga_sdram_read, + .d_write = sbusfpga_sdram_write, .d_ioctl = sbusfpga_sdram_ioctl, .d_stop = nostop, .d_tty = notty, @@ -120,7 +122,8 @@ sbusfpga_sdram_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l aprint_error("%s:%d: sc == NULL! giving up\n", __PRETTY_FUNCTION__, __LINE__); return (ENXIO); } - + +#if 0 switch (cmd) { /* case VNDIOCCLR: */ /* case VNDIOCCLR50: */ @@ -152,6 +155,13 @@ sbusfpga_sdram_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l err = EINVAL; break; } +#else + err2 = dk_ioctl(&sc->dk, dev, cmd, data, flag, l); + if (err2 != EPASSTHROUGH) + err = err2; + else + err = ENOTTY; +#endif return(err); } @@ -195,6 +205,18 @@ sbusfpga_sdram_close(dev_t dev, int flag, int fmt, struct lwp *l) return dk_close(dksc, dev, flag, fmt, l); } +int +sbusfpga_sdram_read(dev_t dev, struct uio *uio, int flags) +{ + return physio(sbusfpga_sdram_strategy, NULL, dev, B_READ, sbusfpga_sdram_minphys, uio); +} + +int +sbusfpga_sdram_write(dev_t dev, struct uio *uio, int flags) +{ + return physio(sbusfpga_sdram_strategy, NULL, dev, B_WRITE, sbusfpga_sdram_minphys, uio); +} + int sbusfpga_sdram_match(device_t parent, cfdata_t cf, void *aux) { @@ -342,7 +364,7 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) sbusfpga_sdram_set_geometry(sc); bufq_alloc(&sc->dk.sc_bufq, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK); /* needed ? */ - { + if (0) { struct disklabel *lp = sc->dk.sc_dkdev.dk_label; struct cpu_disklabel *clp = sc->dk.sc_dkdev.dk_cpulabel; memset(lp, 0, sizeof(struct disklabel)); @@ -420,8 +442,8 @@ sbusfpga_sdram_size(dev_t dev) { static void sbusfpga_sdram_minphys(struct buf *bp) { - if (bp->b_bcount > (SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ/512)) - bp->b_bcount = (SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ/512); + if (bp->b_bcount > SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ) + bp->b_bcount = SBUSFPGA_SDRAM_VAL_DMA_MAX_SZ; } @@ -498,6 +520,7 @@ sbusfpga_sdram_diskstart(device_t self, struct buf *bp) } else { aprint_error("%s:%d: blk = %lld read out of range! giving up\n", __PRETTY_FUNCTION__, __LINE__, blk); err = EINVAL; + break; } blk += blkcnt; data += 512 * blkcnt; @@ -534,6 +557,7 @@ sbusfpga_sdram_diskstart(device_t self, struct buf *bp) } else { aprint_error("%s:%d: blk = %lld write out of range! giving up\n", __PRETTY_FUNCTION__, __LINE__, blk); err = EINVAL; + break; } blk += blkcnt; data += 512 * blkcnt; @@ -553,6 +577,7 @@ sbusfpga_sdram_diskstart(device_t self, struct buf *bp) #define CONFIG_CSR_DATA_WIDTH 32 // define CSR_LEDS_BASE & others to avoid defining the CSRs of HW we don't handle #define CSR_LEDS_BASE +#define CSR_CURVE25519ENGINE_BASE //#define CSR_DDRPHY_BASE //#define CSR_SDRAM_BASE //#define CSR_EXCHANGE_WITH_MEM_BASE @@ -569,6 +594,7 @@ sbusfpga_sdram_diskstart(device_t self, struct buf *bp) #include "dev/sbus/litex_csr.h" #undef CSR_LEDS_BASE +#undef CSR_CURVE25519ENGINE_BASE //#undef CSR_DDRPHY_BASE //#undef CSR_SDRAM_BASE //#undef CSR_EXCHANGE_WITH_MEM_BASE From 6aa47345508279c80233484028f4b80996805074 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 25 Jul 2021 02:56:43 -0400 Subject: [PATCH 51/78] tune delays --- .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c | 54 +++++++++++++------ 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c index ee8a3c8..37a7427 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c @@ -293,6 +293,7 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) } else { aprint_normal_dev(self, "DMA registers @ %p\n", (void*)sc->sc_bhregs_exchange_with_mem); } +#if 0 if (sa->sa_nreg >= 4) { /* if we map some of the memory itself */ /* normally disabled, it's a debug feature */ @@ -311,6 +312,9 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) } else { sc->sc_bufsiz_mmap = 0; } +#else + sc->sc_bufsiz_mmap = 0; +#endif sc->sc_bufsiz_ddrphy = sa->sa_reg[0].oa_size; sc->sc_bufsiz_sdram = sa->sa_reg[1].oa_size; @@ -651,6 +655,10 @@ dma_init(struct sbusfpga_sdram_softc *sc) { return 1; } +/* tuned on my SPARCstation 20 with 25 MHz SBus & 2*SM61 */ +/* asynchronous would be better ... */ +#define DEF_BLK_DELAY 14 + static inline unsigned long lfsr (unsigned long bits, unsigned long prev); int @@ -671,6 +679,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { } aprint_normal_dev(sc->dk.sc_dev, "First value: 0x%08lx\n", kva_ulong[0]); +#if 0 if (sc->sc_bufsiz_mmap > 0) { int idx = blkn * sc->dma_blk_size / sizeof(unsigned long), x; int bound = sc->sc_bufsiz_mmap / sizeof(unsigned long); @@ -684,6 +693,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { } } } +#endif bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, 4096, BUS_DMASYNC_PREREAD); @@ -697,7 +707,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, 4096, BUS_DMASYNC_POSTREAD); - delay(500); + delay(DEF_BLK_DELAY * 8); count = 0; while (((blkcnt = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < 10)) { @@ -707,7 +717,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { exchange_with_mem_last_blk_read(sc), exchange_with_mem_wr_tosdram_read(sc)); count ++; - delay(500); + delay(DEF_BLK_DELAY); } if (blkcnt) { @@ -732,7 +742,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { while ((((blkcnt = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 10)) { aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram hasn't reached SDRAM yet (status 0x%08x)\n", blkcnt); count ++; - delay(500); + delay(DEF_BLK_DELAY); } if (blkcnt & 0x3) { @@ -750,6 +760,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { exchange_with_mem_blk_rem_read(sc)); } +#if 0 if (sc->sc_bufsiz_mmap > 0) { int idx = blkn * sc->dma_blk_size / sizeof(unsigned long), x; int bound = sc->sc_bufsiz_mmap / sizeof(unsigned long); @@ -768,6 +779,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { } } } +#endif for (int i = 0 ; i < testdatasize/sizeof(unsigned long) ; i++) { kva_ulong[i] = 0x0c0ffee0; @@ -786,13 +798,13 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, 4096, BUS_DMASYNC_POSTWRITE); - delay(500); + delay(DEF_BLK_DELAY * 8); count = 0; while (((blkcnt = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < 10)) { aprint_normal_dev(sc->dk.sc_dev, "DMA Read-from-Sdram ongoing (%u, status 0x%08x)\n", blkcnt & 0x0000FFFF, exchange_with_mem_dma_status_read(sc)); count ++; - delay(500); + delay(DEF_BLK_DELAY); } if (blkcnt) { @@ -815,7 +827,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { while ((((blkcnt = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 10)) { aprint_normal_dev(sc->dk.sc_dev, "DMA Read-from-Sdram hasn't reached memory yet (status 0x%08x)\n", blkcnt); count ++; - delay(500); + delay(DEF_BLK_DELAY); } aprint_normal_dev(sc->dk.sc_dev, "First value: 0x%08lx\n", kva_ulong[0]); @@ -863,12 +875,12 @@ static int sbusfpga_sdram_read_block(struct sbusfpga_sdram_softc *sc, const u_in exchange_with_mem_dma_addr_write(sc, sc->sc_dmamap->dm_segs[0].ds_addr); exchange_with_mem_blk_cnt_write(sc, 0x00000000 | (blkcnt * 512 / sc->dma_blk_size) ); - delay(100); + delay(DEF_BLK_DELAY * blkcnt); count = 0; - while (((check = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < 50)) { + while (((check = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < (4*blkcnt))) { count ++; - delay(100); + delay(DEF_BLK_DELAY); } if (check) { @@ -881,12 +893,17 @@ static int sbusfpga_sdram_read_block(struct sbusfpga_sdram_softc *sc, const u_in exchange_with_mem_wr_tosdram_read(sc)); return ENXIO; } +#if 0 + else { + aprint_normal_dev(sc->dk.sc_dev, "DMA READ finish for %d blk in %d attempts.\n", blkcnt, count); + } +#endif count = 0; - while ((((check = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 50)) { + while ((((check = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < blkcnt)) { aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram hasn't reached SDRAM yet (status 0x%08x)\n", check); count ++; - delay(100); + delay(DEF_BLK_DELAY); } if (check & 0x3) { @@ -919,12 +936,12 @@ static int sbusfpga_sdram_write_block(struct sbusfpga_sdram_softc *sc, const u_i exchange_with_mem_dma_addr_write(sc, sc->sc_dmamap->dm_segs[0].ds_addr); exchange_with_mem_blk_cnt_write(sc, 0x80000000 | (blkcnt * 512 / sc->dma_blk_size) ); - delay(100); + delay(DEF_BLK_DELAY * blkcnt); count = 0; - while (((check = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < 50)) { + while (((check = exchange_with_mem_blk_cnt_read(sc)) != 0) && (count < (4*blkcnt))) { count ++; - delay(100); + delay(DEF_BLK_DELAY); } if (check) { @@ -937,12 +954,17 @@ static int sbusfpga_sdram_write_block(struct sbusfpga_sdram_softc *sc, const u_i exchange_with_mem_wr_tosdram_read(sc)); return ENXIO; } +#if 0 + else { + aprint_normal_dev(sc->dk.sc_dev, "DMA WRITE finish for %d blk in %d attempts.\n", blkcnt, count); + } +#endif count = 0; - while ((((check = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 50)) { + while ((((check = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < blkcnt)) { aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram hasn't reached SDRAM yet (status 0x%08x)\n", check); count ++; - delay(100); + delay(DEF_BLK_DELAY); } if (check & 0x3) { From 0c5e7504538e364c63ac3aeebbb901ebd27124ce Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 25 Jul 2021 06:32:48 -0400 Subject: [PATCH 52/78] Avoid some potential race conditions --- .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c | 30 ++++++++++--------- .../sbus_to_fpga_blk_dma.py | 12 ++++++++ 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c index 37a7427..0431fc7 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c @@ -609,6 +609,8 @@ sbusfpga_sdram_diskstart(device_t self, struct buf *bp) #undef CSR_SDPHY_BASE #undef CSR_TRNG_BASE +#define DMA_STATUS_CHECK_BITS (0x01F) + int dma_init(struct sbusfpga_sdram_softc *sc) { sc->dma_blk_size = exchange_with_mem_blk_size_read(sc); @@ -666,7 +668,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { unsigned long *kva_ulong = (unsigned long*)sc->sc_dma_kva; unsigned long val; unsigned int blkn = 0; // 113; - unsigned int testdatasize = 4096; + const unsigned int testdatasize = 4096; unsigned int blkcnt ; int count; @@ -677,7 +679,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { val = lfsr(32, val); kva_ulong[i] = val; } - aprint_normal_dev(sc->dk.sc_dev, "First value: 0x%08lx\n", kva_ulong[0]); + aprint_normal_dev(sc->dk.sc_dev, "First / last value: 0x%08lx 0x%08lx\n", kva_ulong[0], kva_ulong[(testdatasize/sizeof(unsigned long))-1]); #if 0 if (sc->sc_bufsiz_mmap > 0) { @@ -739,13 +741,13 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { } count = 0; - while ((((blkcnt = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 10)) { + while ((((blkcnt = exchange_with_mem_dma_status_read(sc)) & DMA_STATUS_CHECK_BITS) != 0) && (count < 10)) { aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram hasn't reached SDRAM yet (status 0x%08x)\n", blkcnt); count ++; delay(DEF_BLK_DELAY); } - if (blkcnt & 0x3) { + if (blkcnt & DMA_STATUS_CHECK_BITS) { aprint_error_dev(sc->dk.sc_dev, "DMA Write-to-Sdram can't reach SDRAM ? (%u, status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", blkcnt & 0x0000FFFF, exchange_with_mem_dma_status_read(sc), exchange_with_mem_last_blk_read(sc), @@ -784,7 +786,7 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { for (int i = 0 ; i < testdatasize/sizeof(unsigned long) ; i++) { kva_ulong[i] = 0x0c0ffee0; } - aprint_normal_dev(sc->dk.sc_dev, "First value: 0x%08lx\n", kva_ulong[0]); + aprint_normal_dev(sc->dk.sc_dev, "First / last value: 0x%08lx 0x%08lx\n", kva_ulong[0], kva_ulong[(testdatasize/sizeof(unsigned long))-1]); bus_dmamap_sync(sc->sc_dmatag, sc->sc_dmamap, 0, 4096, BUS_DMASYNC_PREWRITE); @@ -824,15 +826,15 @@ dma_memtest(struct sbusfpga_sdram_softc *sc) { } count = 0; - while ((((blkcnt = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < 10)) { + while ((((blkcnt = exchange_with_mem_dma_status_read(sc)) & DMA_STATUS_CHECK_BITS) != 0) && (count < 10)) { aprint_normal_dev(sc->dk.sc_dev, "DMA Read-from-Sdram hasn't reached memory yet (status 0x%08x)\n", blkcnt); count ++; delay(DEF_BLK_DELAY); } - aprint_normal_dev(sc->dk.sc_dev, "First value: 0x%08lx\n", kva_ulong[0]); + aprint_normal_dev(sc->dk.sc_dev, "First /last value: 0x%08lx 0x%08lx\n", kva_ulong[0], kva_ulong[(testdatasize/sizeof(unsigned long))-1]); - if (blkcnt & 0x3) { + if (blkcnt & DMA_STATUS_CHECK_BITS) { aprint_error_dev(sc->dk.sc_dev, "DMA Read-from-Sdram can't reach memory ? (%u, status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", blkcnt & 0x0000FFFF, exchange_with_mem_dma_status_read(sc), exchange_with_mem_last_blk_read(sc), @@ -900,13 +902,13 @@ static int sbusfpga_sdram_read_block(struct sbusfpga_sdram_softc *sc, const u_in #endif count = 0; - while ((((check = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < blkcnt)) { - aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram hasn't reached SDRAM yet (status 0x%08x)\n", check); + while ((((check = exchange_with_mem_dma_status_read(sc)) & DMA_STATUS_CHECK_BITS) != 0) && (count < blkcnt)) { + //aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram hasn't reached SDRAM yet (status 0x%08x)\n", check); count ++; delay(DEF_BLK_DELAY); } - if (check & 0x3) { + if (check & DMA_STATUS_CHECK_BITS) { aprint_error_dev(sc->dk.sc_dev, "DMA can't reach memory/SDRAM ? (%u, status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", check & 0x0000FFFF, exchange_with_mem_dma_status_read(sc), @@ -961,13 +963,13 @@ static int sbusfpga_sdram_write_block(struct sbusfpga_sdram_softc *sc, const u_i #endif count = 0; - while ((((check = exchange_with_mem_dma_status_read(sc)) & 0x3) != 0) && (count < blkcnt)) { - aprint_normal_dev(sc->dk.sc_dev, "DMA Write-to-Sdram hasn't reached SDRAM yet (status 0x%08x)\n", check); + while ((((check = exchange_with_mem_dma_status_read(sc)) & DMA_STATUS_CHECK_BITS) != 0) && (count < blkcnt)) { + //aprint_normal_dev(sc->dk.sc_dev, "DMA Read_from-Sdram hasn't reached SDRAM yet (status 0x%08x)\n", check); count ++; delay(DEF_BLK_DELAY); } - if (check & 0x3) { + if (check & DMA_STATUS_CHECK_BITS) { aprint_error_dev(sc->dk.sc_dev, "DMA can't reach memory/SDRAM ? (%u, status 0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", check & 0x0000FFFF, exchange_with_mem_dma_status_read(sc), diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py index b8d7f23..828f0f5 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py @@ -1,5 +1,6 @@ from migen import * from migen.genlib.fifo import * +from migen.genlib.cdc import PulseSynchronizer from litex.soc.interconnect.csr import * from litex.soc.interconnect import wishbone @@ -70,9 +71,20 @@ class ExchangeWithMem(Module, AutoCSR): self.submodules.req_r_fsm = req_r_fsm = FSM(reset_state="Reset") self.submodules.req_w_fsm = req_w_fsm = FSM(reset_state="Reset") + # this could use CSRFields... self.comb += self.dma_status.status[0:1].eq(~req_r_fsm.ongoing("Idle")) # Read FSM Busy self.comb += self.dma_status.status[1:2].eq(~req_w_fsm.ongoing("Idle")) # Write FSM Busy self.comb += self.dma_status.status[2:3].eq(self.fromsbus_fifo.readable) # Some data available to write to memory + self.submodules.fromsbus_req_fifo_readable_sync = PulseSynchronizer("sbus", "sys") + fromsbus_req_fifo_readable_in_sys = Signal() + self.comb += self.fromsbus_req_fifo_readable_sync.i.eq(self.fromsbus_req_fifo.readable) + self.comb += fromsbus_req_fifo_readable_in_sys.eq(self.fromsbus_req_fifo_readable_sync.o) + self.comb += self.dma_status.status[3:4].eq(fromsbus_req_fifo_readable_in_sys) # we still have outstanding requests + self.submodules.tosbus_fifo_readable_sync = PulseSynchronizer("sbus", "sys") + tosbus_fifo_readable_in_sys = Signal() + self.comb += self.tosbus_fifo_readable_sync.i.eq(self.tosbus_fifo.readable) + self.comb += tosbus_fifo_readable_in_sys.eq(self.tosbus_fifo_readable_sync.o) + self.comb += self.dma_status.status[4:5].eq(tosbus_fifo_readable_in_sys) # there's still data to be sent to memory; this will drop before the last SBus Master Cycle is finished, but then the SBus is busy so the host won't be able to read the status before the cycle is finished so we're good self.comb += self.dma_status.status[8:9].eq(req_r_fsm.ongoing("ReqFromMemory")) self.comb += self.dma_status.status[9:10].eq(req_r_fsm.ongoing("WaitForData")) From 42c508688547073bd6245da5e6bb11fcaec165f3 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 25 Jul 2021 06:52:05 -0400 Subject: [PATCH 53/78] Trying to integrate Bestrusted's Curve25519 engine ; trivial program works but not after a few repetition :-( --- .../sys/dev/sbus/sbusfpga_curve25519engine.c | 615 +++++ .../sys/dev/sbus/sbusfpga_curve25519engine.h | 49 + sbus-to-ztex-gateware-migen/engine.py | 1996 +++++++++++++++++ .../engine_code/Cargo.toml | 21 + .../engine_code/engine_code.rs | 296 +++ sbus-to-ztex-gateware-migen/netbsd_csr.h | 431 +++- sbus-to-ztex-gateware-migen/prom_csr.fth | 10 +- sbus-to-ztex-gateware-migen/prom_migen.fth | 55 +- .../sbus_to_fpga_fsm.py | 30 +- .../sbus_to_fpga_soc.py | 77 +- sbus-to-ztex-gateware-migen/ztex213_sbus.py | 2 +- 11 files changed, 3541 insertions(+), 41 deletions(-) create mode 100644 NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c create mode 100644 NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.h create mode 100644 sbus-to-ztex-gateware-migen/engine.py create mode 100644 sbus-to-ztex-gateware-migen/engine_code/Cargo.toml create mode 100644 sbus-to-ztex-gateware-migen/engine_code/engine_code.rs diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c new file mode 100644 index 0000000..8bbe0fe --- /dev/null +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c @@ -0,0 +1,615 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2020 Romain Dolbeau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +#include + +int sbusfpga_curve25519engine_print(void *, const char *); +int sbusfpga_curve25519engine_match(device_t, cfdata_t, void *); +void sbusfpga_curve25519engine_attach(device_t, device_t, void *); + +CFATTACH_DECL_NEW(sbusfpga_c29e, sizeof(struct sbusfpga_curve25519engine_softc), + sbusfpga_curve25519engine_match, sbusfpga_curve25519engine_attach, NULL, NULL); + +dev_type_open(sbusfpga_curve25519engine_open); +dev_type_close(sbusfpga_curve25519engine_close); +dev_type_ioctl(sbusfpga_curve25519engine_ioctl); + + + +const struct cdevsw sbusfpga_c29e_cdevsw = { + .d_open = sbusfpga_curve25519engine_open, + .d_close = sbusfpga_curve25519engine_close, + .d_read = noread, + .d_write = nowrite, + .d_ioctl = sbusfpga_curve25519engine_ioctl, + .d_stop = nostop, + .d_tty = notty, + .d_poll = nopoll, + .d_mmap = nommap, + .d_kqfilter = nokqfilter, + .d_discard = nodiscard, + .d_flag = 0 +}; + +extern struct cfdriver sbusfpga_c29e_cd; + +struct sbusfpga_curve25519engine_montgomeryjob { + uint32_t x0_u[8]; + uint32_t x0_w[8]; + uint32_t x1_u[8]; + uint32_t x1_w[8]; + uint32_t affine_u[8]; + uint32_t scalar[8]; +}; + +static int init_program(struct sbusfpga_curve25519engine_softc *sc); +static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusfpga_curve25519engine_montgomeryjob *job, const int window); +static int start_job(struct sbusfpga_curve25519engine_softc *sc); +static int wait_job(struct sbusfpga_curve25519engine_softc *sc); +static int read_outputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusfpga_curve25519engine_montgomeryjob *job, const int window); + +#define SBUSFPGA_DO_MONTGOMERYJOB _IOWR(0, 0, struct sbusfpga_curve25519engine_montgomeryjob) + +int +sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) +{ + struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, minor(dev)); + struct sbusfpga_curve25519engine_montgomeryjob* job = (struct sbusfpga_curve25519engine_montgomeryjob*)data; + int err = 0; + + if (!sc->initialized) { + if (init_program(sc)) { + return ENXIO; + } else { + sc->initialized = 1; + } + } + switch (cmd) { + case SBUSFPGA_DO_MONTGOMERYJOB: { + err = write_inputs(sc, job, 0); + if (err) + return err; + err = start_job(sc); + if (err) + return err; + delay(10); + err = wait_job(sc); + if (err) + return err; + err = read_outputs(sc, job, 0); + if (err) + return err; + } + break; + default: + err = EINVAL; + break; + } + return(err); +} + +int +sbusfpga_curve25519engine_open(dev_t dev, int flags, int mode, struct lwp *l) +{ + return (0); +} + +int +sbusfpga_curve25519engine_close(dev_t dev, int flags, int mode, struct lwp *l) +{ + return (0); +} + +int +sbusfpga_curve25519engine_print(void *aux, const char *busname) +{ + + sbus_print(aux, busname); + return (UNCONF); +} + +int +sbusfpga_curve25519engine_match(device_t parent, cfdata_t cf, void *aux) +{ + struct sbus_attach_args *sa = (struct sbus_attach_args *)aux; + + return (strcmp("betrustedc25519e", sa->sa_name) == 0); +} + +#if 0 +static const uint32_t program[192] = {0x00480800, + 0x007407cc, + 0x007c07cb, + 0x0049d483, + 0x0079b643, + 0x0079e482, + 0x00659783, + 0x006db783, + 0x0079c683, + 0x0079e482, + 0x0069a783, + 0x0071c783, + 0x00480740, + 0x00500640, + 0x00540680, + 0x005806c0, + 0x005c0700, + 0x00015505, + 0x00780008, + 0x0001e006, + 0x005558c6, + 0x00055505, + 0x00780048, + 0x0005e046, + 0x00097585, + 0x00780088, + 0x0009e086, + 0x005d78c6, + 0x000d7585, + 0x007800c8, + 0x000de0c6, + 0x00100007, + 0x00141047, + 0x007458c6, + 0x0019d105, + 0x00780188, + 0x0019e186, + 0x001c3007, + 0x00202047, + 0x002481c5, + 0x00780248, + 0x0025e246, + 0x007488c6, + 0x0029d1c5, + 0x00780288, + 0x0029e286, + 0x002c9247, + 0x0030a287, + 0x00346907, + 0x00385107, + 0x003c5345, + 0x007803c8, + 0x003de3c6, + 0x0040f187, + 0x0044c607, + 0x00500380, + 0x00540400, + 0x005802c0, + 0x005c0440, + 0x00640500, + 0x00680540, + 0x006c0580, + 0x007005c0, + 0x010004c9, + 0x004e14c6, + 0xdf800809, + 0x0079b643, + 0x0079e482, + 0x00659783, + 0x006db783, + 0x0079c683, + 0x0079e482, + 0x0069a783, + 0x0071c783, + 0x00740640, + 0x00780680, + 0x0001e787, + 0x00040007, + 0x00041047, + 0x00081787, + 0x000c2007, + 0x001030c7, + 0x00144087, + 0x00700940, + 0x00185147, + 0x00721706, + 0x01000709, + 0x00186187, + 0xfe000809, + 0x001c5187, + 0x00700980, + 0x002071c7, + 0x00721706, + 0x01000709, + 0x00208207, + 0xfe000809, + 0x00247207, + 0x007009c0, + 0x00289247, + 0x00721706, + 0x01000709, + 0x0028a287, + 0xfe000809, + 0x002c9287, + 0x00700980, + 0x0030b2c7, + 0x00721706, + 0x01000709, + 0x0030c307, + 0xfe000809, + 0x00347307, + 0x00700a00, + 0x0038d347, + 0x00721706, + 0x01000709, + 0x0038e387, + 0xfe000809, + 0x003cd387, + 0x00700a40, + 0x0040f3c7, + 0x00721706, + 0x01000709, + 0x00410407, + 0xfe000809, + 0x0044f407, + 0x00700a00, + 0x00491447, + 0x00721706, + 0x01000709, + 0x00492487, + 0xfe000809, + 0x004cd487, + 0x00700940, + 0x005134c7, + 0x00721706, + 0x01000709, + 0x00514507, + 0xfe000809, + 0x00543507, + 0x007d5747, + 0x0000000a, + 0x0000000a, + 0x0000000a, + 0x0000000a, +}; +static const uint32_t program_len = 141; +#else +static const uint32_t program[16] = { + 0x00640a40, + 0x00680840, + 0x0000000a, + 0x0000000a +}; +static const uint32_t program_len = 3; +#endif + + +/* + * Attach all the sub-devices we can find + */ +void +sbusfpga_curve25519engine_attach(device_t parent, device_t self, void *aux) +{ + struct sbus_attach_args *sa = aux; + struct sbusfpga_curve25519engine_softc *sc = device_private(self); + struct sbus_softc *sbsc = device_private(parent); + int node; + int sbusburst; + + sc->sc_bustag = sa->sa_bustag; + sc->sc_dmatag = sa->sa_dmatag; + sc->sc_dev = self; + + aprint_normal("\n"); + + if (sa->sa_nreg < 3) { + aprint_error(": Not enough registers spaces\n"); + return; + } + + /* map registers */ + if (sbus_bus_map(sc->sc_bustag, + sa->sa_reg[0].oa_space /* sa_slot */, + sa->sa_reg[0].oa_base /* sa_offset */, + sa->sa_reg[0].oa_size /* sa_size */, + BUS_SPACE_MAP_LINEAR, + &sc->sc_bhregs_curve25519engine) != 0) { + aprint_error(": cannot map Curve25519Engine registers\n"); + return; + } else { + aprint_normal_dev(self, "Curve25519Engine registers @ %p\n", (void*)sc->sc_bhregs_curve25519engine); + } + /* map microcode */ + if (sbus_bus_map(sc->sc_bustag, + sa->sa_reg[1].oa_space /* sa_slot */, + sa->sa_reg[1].oa_base /* sa_offset */, + sa->sa_reg[1].oa_size /* sa_size */, + BUS_SPACE_MAP_LINEAR, + &sc->sc_bhregs_microcode) != 0) { + aprint_error(": cannot map Curve25519Engine microcode\n"); + return; + } else { + aprint_normal_dev(self, "Curve25519Engine microcode @ %p\n", (void*)sc->sc_bhregs_microcode); + } + /* map register file */ + if (sbus_bus_map(sc->sc_bustag, + sa->sa_reg[2].oa_space /* sa_slot */, + sa->sa_reg[2].oa_base /* sa_offset */, + sa->sa_reg[2].oa_size /* sa_size */, + BUS_SPACE_MAP_LINEAR, + &sc->sc_bhregs_regfile) != 0) { + aprint_error(": cannot map Curve25519Engine regfile\n"); + return; + } else { + aprint_normal_dev(self, "Curve25519Engine regfile @ %p\n", (void*)sc->sc_bhregs_regfile); + } + sc->sc_bufsiz_curve25519engine = sa->sa_reg[0].oa_size; + sc->sc_bufsiz_microcode = sa->sa_reg[1].oa_size; + sc->sc_bufsiz_regfile = sa->sa_reg[2].oa_size; + + node = sc->sc_node = sa->sa_node; + + /* + * Get transfer burst size from PROM + */ + sbusburst = sbsc->sc_burst; + if (sbusburst == 0) + sbusburst = SBUS_BURST_32 - 1; /* 1->16 */ + + sc->sc_burst = prom_getpropint(node, "burst-sizes", -1); + if (sc->sc_burst == -1) + /* take SBus burst sizes */ + sc->sc_burst = sbusburst; + + /* Clamp at parent's burst sizes */ + sc->sc_burst &= sbusburst; + + aprint_normal("\n"); + aprint_normal_dev(self, "nid 0x%x, bustag %p, burst 0x%x (parent 0x%0x)\n", + sc->sc_node, + sc->sc_bustag, + sc->sc_burst, + sbsc->sc_burst); + + if (init_program(sc)) { + if (init_program(sc)) { + aprint_normal_dev(sc->sc_dev, "INIT - FAILED\n"); + sc->initialized = 0; + } else { + sc->initialized = 1; + } + } else { + sc->initialized = 1; + } +} + +#define CONFIG_CSR_DATA_WIDTH 32 +// define CSR_LEDS_BASE & others to avoid defining the CSRs of HW we don't handle +#define CSR_LEDS_BASE +//#define CSR_CURVE25519ENGINE_BASE +#define CSR_DDRPHY_BASE +#define CSR_EXCHANGE_WITH_MEM_BASE +#define CSR_SDRAM_BASE +#define CSR_SDBLOCK2MEM_BASE +#define CSR_SDCORE_BASE +#define CSR_SDIRQ_BASE +#define CSR_SDMEM2BLOCK_BASE +#define CSR_SDPHY_BASE +#define CSR_TRNG_BASE +#include "dev/sbus/litex_csr.h" +#undef CSR_LEDS_BASE +//#undef CSR_CURVE25519ENGINE_BASE +#undef CSR_DDRPHY_BASE +#undef CSR_EXCHANGE_WITH_MEM_BASE +#undef CSR_SDRAM_BASE +#undef CSR_SDBLOCK2MEM_BASE +#undef CSR_SDCORE_BASE +#undef CSR_SDIRQ_BASE +#undef CSR_SDMEM2BLOCK_BASE +#undef CSR_SDPHY_BASE +#undef CSR_TRNG_BASE + + +static int init_program(struct sbusfpga_curve25519engine_softc *sc) { + /* the microcode is a the beginning */ + int err = 0; + uint32_t i; + + /* first we need to turn the engine power on ... */ + if ((curve25519engine_power_read(sc) & 1) == 0) { + curve25519engine_power_write(sc, 1); + delay(2); + } + + for (i = 0 ; i < program_len + 1 ; i++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_microcode, (i*4), program[i]); + if ((i%8)==7) + delay(1); + } + + curve25519engine_window_write(sc, 0); /* could use window_window to access fields, but it creates a RMW cycle for nothing */ + curve25519engine_mpstart_write(sc, 0); + curve25519engine_mplen_write(sc, program_len); + + aprint_normal_dev(sc->sc_dev, "INIT - Curve25519Engine status: 0x%08x\n", curve25519engine_status_read(sc)); + +#if 1 + /* double check */ + u_int32_t x; + int count = 0; + for (i = 0 ; i < program_len + 1 && count < 10; i++) { + x = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_microcode, (i*4)); + if (x != program[i]) { + aprint_error_dev(sc->sc_dev, "INIT - Curve25519Engine program failure: [%d] 0x%08x <> 0x%08x\n", i, x, program[i]); + err = 1; + count ++; + } + if ((i%8)==7) + delay(1); + } + if ((x = curve25519engine_window_read(sc)) != 0) { + aprint_error_dev(sc->sc_dev, "INIT - Curve25519Engine register failure: window = 0x%08x\n", x); + err = 1; + } + if ((x = curve25519engine_mpstart_read(sc)) != 0) { + aprint_error_dev(sc->sc_dev, "INIT - Curve25519Engine register failure: mpstart = 0x%08x\n", x); + err = 1; + } + if ((x = curve25519engine_mplen_read(sc)) != program_len) { + aprint_error_dev(sc->sc_dev, "INIT - Curve25519Engine register failure: mplen = 0x%08x\n", x); + err = 1; + } +#endif + + curve25519engine_power_write(sc, 0); + + return err; +} + +static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusfpga_curve25519engine_montgomeryjob *job, const int window) { + const uint32_t base = window * 0x400; + int i; + uint32_t status = curve25519engine_status_read(sc); + int err = 0; + if (status & 1) { + aprint_error_dev(sc->sc_dev, "WRITE - Curve25519Engine status: 0x%08x, still running?\n", status); + return -ENXIO; + } + + /* first we need to turn the engine power on ... */ + if ((curve25519engine_power_read(sc) & 1) == 0) { + curve25519engine_power_write(sc, 1); + delay(2); + } + + +#define REG_BASE(reg) (base + (reg * 32)) +#define SUBREG_ADDR(reg, off) (REG_BASE(reg) + (off)*4) + for (i = 0 ; i < 8 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(24,i), job->affine_u[i]); + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(25,i), job->x0_u[i]); + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(26,i), job->x0_w[i]); + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(27,i), job->x1_u[i]); + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(28,i), job->x1_w[i]); + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(31,i), job->scalar[i]); + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(19,i), ((i == 0) ? 254 : 0)); + delay(1); + } +#undef SUBREG_ADDR +#undef REG_BASE + +#if 1 +#define REG_BASE(reg) (base + (reg * 32)) +#define SUBREG_ADDR(reg, off) (REG_BASE(reg) + (off)*4) + for (i = 0 ; i < 8 && !err; i ++) { + if (job->affine_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(24,i))) err = ENXIO; + if (job->x0_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(25,i))) err = ENXIO; + if (job->x0_w[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(26,i))) err = ENXIO; + if (job->x1_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(27,i))) err = ENXIO; + if (job->x1_w[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(28,i))) err = ENXIO; + if (job->scalar[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(31,i))) err = ENXIO; + delay(1); + } + if (err) aprint_error_dev(sc->sc_dev, "WRITE - data did not read-write properly\n"); +#undef SUBREG_ADDR +#undef REG_BASE +#endif + + return err; +} + +static int start_job(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t status = curve25519engine_status_read(sc); + if (status & 1) { + aprint_error_dev(sc->sc_dev, "START - Curve25519Engine status: 0x%08x, still running?\n", status); + return -ENXIO; + } + curve25519engine_control_write(sc, 1); + aprint_normal_dev(sc->sc_dev, "START - Curve25519Engine status: 0x%08x\n", curve25519engine_status_read(sc)); + + return 0; +} + +static int wait_job(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t status = curve25519engine_status_read(sc); + int count = 0; + while ((status & 1) && (count < 50)) { + aprint_normal_dev(sc->sc_dev, "WAIT - ongoing, Curve25519Engine status: 0x%08x [%d]\n", status, count); + count ++; + delay(20); + status = curve25519engine_status_read(sc); + } + //curve25519engine_control_write(sc, 0); + if (status & 1) { + aprint_error_dev(sc->sc_dev, "WAIT - Curve25519Engine status: 0x%08x, did not finish in time? [0x%08x]\n", status, curve25519engine_instruction_read(sc)); + return -ENXIO; + } else { + aprint_normal_dev(sc->sc_dev, "WAIT - Curve25519Engine status: 0x%08x [%d, 0x%08x]\n", status, count, curve25519engine_instruction_read(sc)); + } + + return 0; +} + +static int read_outputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusfpga_curve25519engine_montgomeryjob *job, const int window) { + const uint32_t base = window * 0x400; + int i; + uint32_t status = curve25519engine_status_read(sc); + if (status & 1) { + aprint_error_dev(sc->sc_dev, "READ - Curve25519Engine status: 0x%08x, still running?\n", status); + return -ENXIO; + } + +#define REG_BASE(reg) (base + (reg * 32)) +#define SUBREG_ADDR(reg, off) (REG_BASE(reg) + (off)*4) + for (i = 0 ; i < 8 ; i ++) { + job->affine_u[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(24,i)); + job->x0_u[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(25,i)); + job->x0_w[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(26,i)); + job->x1_u[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(27,i)); + job->x1_w[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(28,i)); + job->scalar[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(31,i)); + delay(1); + } + aprint_normal_dev(sc->sc_dev, "READ - Curve25519Engine 19 low 32 bits: 0x%08x\n", bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(19,0))); +#undef SUBREG_ADDR +#undef REG_BASE + + curve25519engine_power_write(sc, 0); + + return 0; +} diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.h b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.h new file mode 100644 index 0000000..1bda49a --- /dev/null +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.h @@ -0,0 +1,49 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2020 Romain Dolbeau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SBUSFPGA_CURVE25519ENGINE_H_ +#define _SBUSFPGA_CURVE25519ENGINE_H_ + +struct sbusfpga_curve25519engine_softc { + device_t sc_dev; /* us as a device */ + u_int sc_rev; /* revision */ + int sc_node; /* PROM node ID */ + int sc_burst; /* DVMA burst size in effect */ + bus_space_tag_t sc_bustag; /* bus tag */ + bus_space_handle_t sc_bhregs_curve25519engine; /* bus handle */ + bus_space_handle_t sc_bhregs_microcode; /* bus handle */ + bus_space_handle_t sc_bhregs_regfile; /* bus handle */ + //void * sc_buffer; /* VA of the registers */ + int sc_bufsiz_curve25519engine; /* Size of buffer */ + int sc_bufsiz_microcode; /* Size of buffer */ + int sc_bufsiz_regfile; /* Size of buffer */ + bus_dma_tag_t sc_dmatag; + int initialized; +}; + +#endif /* _SBUSFPGA_CURVE25519ENGINE_H_ */ diff --git a/sbus-to-ztex-gateware-migen/engine.py b/sbus-to-ztex-gateware-migen/engine.py new file mode 100644 index 0000000..9938a75 --- /dev/null +++ b/sbus-to-ztex-gateware-migen/engine.py @@ -0,0 +1,1996 @@ +from migen import * +from migen.genlib.cdc import MultiReg + +from litex.soc.interconnect.csr import * +from litex.soc.integration.doc import AutoDoc, ModuleDoc +from litex.soc.interconnect import wishbone +from litex.soc.interconnect.csr_eventmanager import * + +prime_string = "$2^{{255}}-19$" # 2\ :sup:`255`-19 +field_latex = "$\mathbf{{F}}_{{{{2^{{255}}}}-19}}$" + +opcode_bits = 6 # number of bits used to encode the opcode field +opcodes = { # mnemonic : [bit coding, docstring] + "UDF" : [-1, "Placeholder for undefined opcodes"], + "PSA" : [0, "Wd $\gets$ Ra // pass A"], + "PSB" : [1, "Wd $\gets$ Rb // pass B"], + "MSK" : [2, "Wd $\gets$ Replicate(Ra[0], 256) & Rb // for doing cswap()"], + "XOR" : [3, "Wd $\gets$ Ra ^ Rb // bitwise XOR"], + "NOT" : [4, "Wd $\gets$ ~Ra // binary invert"], + "ADD" : [5, "Wd $\gets$ Ra + Rb // 256-bit binary add, must be followed by TRD,SUB"], + "SUB" : [6, "Wd $\gets$ Ra - Rb // 256-bit binary subtraction, this is not the same as a subtraction in the finite field"], + "MUL" : [7, f"Wd $\gets$ Ra * Rb // multiplication in {field_latex} - result is reduced"], + "TRD" : [8, "If Ra $\geqq 2^{{255}}-19$ then Wd $\gets$ $2^{{255}}-19$, else Wd $\gets$ 0 // Test reduce"], + "BRZ" : [9, "If Ra == 0 then mpc[9:0] $\gets$ mpc[9:0] + immediate[9:0] + 1, else mpc $\gets$ mpc + 1 // Branch if zero"], + "FIN" : [10, "halt execution and assert interrupt to host CPU that microcode execution is done"], + "SHL" : [11, "Wd $\gets$ Ra << 1 // shift Ra left by one and store in Wd"], + "XBT" : [12, "Wd[0] $\gets$ Ra[254] // extract the 255th bit of Ra and put it into the 0th bit of Wd"], + "MAX" : [13, "Maximum opcode number (for bounds checking)"], +} + +num_registers = 32 +instruction_layout = [ + ("opcode", opcode_bits, "opcode to be executed"), + ("ra", log2_int(num_registers), "operand A read register"), + ("ca", 1, "set to substitute constant table value for A"), + ("rb", log2_int(num_registers), "operand B read register"), + ("cb", 1, "set to substitute constant table value for B"), + ("wd", log2_int(num_registers), "write register"), + ("immediate", 9, "Used by jumps to load the next PC value") +] + +class RegisterFile(Module, AutoDoc): + def __init__(self, depth=512, width=256, bypass=False): + reset_cycles = 4 + self.intro = ModuleDoc(title="Register File", body=""" +This implements the register file for the Curve25519 engine. It's implemented using +7-series specific block RAMs in order to take advantage of architecture-specific features +to ensure a compact and performant implementation. + +The core primitive is the RAMB36E1. This can be configured as a 64/72-bit wide memory +but only if used in "SDP" (simple dual port) mode. In SDP, you have one read, one write port. +However, the register file needs to produce two operands per cycle, while accepting up to +one operand per cycle. + +In order to do this, we stipulate that the RF runs at `rf_clk` (200MHz), but uses four phases +to produce/consume data. "Engine clock" `eng_clk` (50MHz) runs at a lower rate to accommodate +large-width arithmetic in a single cycle. + +The phasing is defined as follows: + +Phase 0: + - read from port A +Phase 1: + - read from port B +Phase 2: + - write data +Phase 3: + - quite cycle, used to create extra setup time for next stage (requires multicycle-path constraints) + +The writing of data is done in the second phase means that write happen to the same address +as being read, you get the old value. For pipelined operation, it could be desirable to shift +the write to happen before the reads, but as of now the implementation is not pipelined. + +The register file is unavailable for {} `eng_clk` cycles after reset. + +When configured as a 64 bit memory, the depth of the block is 512 bits, corresponding to +an address width of 9 bits. + + """.format(reset_cycles)) + + instruction = Record(instruction_layout) + phase = Signal(2) # internal phase + self.phase = Signal() # external phase + self.comb += self.phase.eq(phase[1]) # divide down internal phase so slower modules can capture it + + # these are the signals in and out of the register file + self.ra_dat = Signal(width) # this is passed in from outside the module because we want to mux with e.g. memory bus + self.ra_adr = Signal(log2_int(depth)) + self.rb_dat = Signal(width) + self.rb_adr = Signal(log2_int(depth)) + + # register file pipelines the write target address, going to the exec units; also needs the window to be complete + # window is assumed to be static and does not change throughout a give program run, so it's not pipelined + self.instruction_pipe_in = Signal(len(instruction)) + self.instruction_pipe_out = Signal(len(instruction)) + self.window = Signal(log2_int(depth) - log2_int(num_registers)) + + # this is the immediate data to write in, coming from the exec units + self.wd_dat = Signal(width) + self.wd_adr = Signal(log2_int(depth)) + self.wd_bwe = Signal(width//8) # byte masks for writing + self.we = Signal() + self.clear = Signal() + + self.running = Signal() # used for activity gating to RAM + + eng_sync = Signal(reset=1) + + rf_adr = Signal(log2_int(depth)) + self.comb += [ + If(phase == 0, + rf_adr.eq(self.ra_adr), + ).Elif(phase == 1, + rf_adr.eq(self.rb_adr), + ) + ] + rf_dat = Signal(width) + self.sync.eng_clk += [ + # TODO: check that this is in sync with expected values + self.instruction_pipe_out.eq(self.instruction_pipe_in), + ] + # unfortunately, -1L speed grade is too slow to support pipeline bypassing of the register file: + # bypass path closes at about 5.4ns, which fails to meet the 5ns cycle time target for the four-phase RF + if bypass: + self.sync.rf_clk += [ + If(phase == 1, + If((self.wd_adr != self.ra_adr) | ~self.we, + self.ra_dat.eq(rf_dat), + ).Else( + self.ra_dat.eq(self.wd_dat), + ), + self.rb_dat.eq(self.rb_dat), + ).Elif(phase == 2, + self.ra_dat.eq(self.ra_dat), + If((self.wd_adr != self.rb_adr) | ~self.we, + self.rb_dat.eq(rf_dat), + ).Else( + self.rb_dat.eq(self.wd_dat), + ) + ).Else( + self.ra_dat.eq(self.ra_dat), + self.rb_dat.eq(self.rb_dat), + ), + ] + else: + self.sync.rf_clk += [ + If(phase == 1, + self.ra_dat.eq(rf_dat), + self.rb_dat.eq(self.rb_dat), + ).Elif(phase == 2, + self.ra_dat.eq(self.ra_dat), + self.rb_dat.eq(rf_dat), + ).Else( + self.ra_dat.eq(self.ra_dat), + self.rb_dat.eq(self.rb_dat), + ), + ] + wren_pipe = Signal() # do not change this variable name, it is constrained in the XDC + self.sync.rf_clk += [ + If(eng_sync, + phase.eq(0), + ).Else( + phase.eq(phase + 1), + ), + wren_pipe.eq((phase == 1) & self.we), # we want wren to hit on phase==2, but we pipeline it to relax timing. so capture the input to the pipe on phase == 1 + ] + wd_bwe_pipe = Signal(width//8) + self.sync.rf_clk += [ + # add a register to relax timing on wd_bwe. This offsets the signal by one rf_clk (clk200) period, + # but because write happens on phase 2 and the signal is valid on eng_clk (clk50) edges, this will + # not affect the functionality + wd_bwe_pipe.eq(self.wd_bwe) + ] + + for word in range(int(256/64)): + self.specials += Instance("BRAM_SDP_MACRO", name="RF_RAMB" + str(word), + p_BRAM_SIZE = "36Kb", + p_DEVICE = "7SERIES", + p_WRITE_WIDTH = 64, + p_READ_WIDTH = 64, + p_DO_REG = 0, + p_INIT_FILE = "NONE", + p_SIM_COLLISION_CHECK = "ALL", # "WARNING_ONLY", "GENERATE_X_ONLY", "NONE" + p_SRVAL = 0, + p_WRITE_MODE = "READ_FIRST", + i_RDCLK = ClockSignal("rf_clk"), + i_WRCLK = ClockSignal("rf_clk"), + i_RDADDR = rf_adr, + i_WRADDR = self.wd_adr, + i_DI = self.wd_dat[word*64 : word*64 + 64], + o_DO = rf_dat[word*64 : word*64 + 64], + i_RDEN = self.running, # reduce power when not running + i_WREN = wren_pipe, # (phase == 2) & self.we, but pipelined one stage + i_RST = ResetSignal("rf_clk"), + i_WE = wd_bwe_pipe[word*8 : word*8 + 8], + + i_REGCE = 1, # should be ignored, but added to quiet down simulation warnings + ) + + # create an internal reset signal that synchronizes the "eng" to the "rf" domains + # it will also reset the register file on demand + reset_counter = Signal(log2_int(reset_cycles), reset=reset_cycles - 1) + self.sync.eng_clk += [ + If(self.clear, + reset_counter.eq(reset_cycles - 1), + eng_sync.eq(1), + ).Else( + If(reset_counter != 0, + reset_counter.eq(reset_counter - 1), + eng_sync.eq(1), + ).Else( + eng_sync.eq(0) + ), + ) + ] + +class Curve25519Const(Module, AutoDoc): + def __init__(self, insert_docs=False): + global did_const_doc + constant_defs = { + 0: [0, "zero", "The number zero"], + 1: [1, "one", "The number one"], + 2: [121665, "am24", "The value $\\frac{{A-2}}{{4}}$"], + 3: [0x7FFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFED, "field", f"Binary coding of {prime_string}"], + 4: [121666, "ap24", "The value $\\frac{{A+2}}{{4}}$"], + 5: [5, "five", "The number 5 (for pow22501)"], + 6: [10, "ten", "The number 10 (for pow22501)"], + 7: [20, "twenty", "The number 20 (for pow22501)"], + 8: [50, "fifty", "The number 50 (for pow22501)"], + 9: [100, "one hundred", "The number 100 (for pow22501)"], + } + self.adr = Signal(5) + self.const = Signal(256) + constant_str = "This module encodes the constants that can be substituted for any register value. Therefore, up to 32 constants can be encoded.\n\n" + for code, const in constant_defs.items(): + self.comb += [ + If(self.adr == code, + self.const.eq(const[0]), + ) + ] + constant_str += """ +**{}** + + Substitute register {} with {}: {}\n""".format(const[1], code, const[2], const[0]) + if insert_docs: + self.constants = ModuleDoc(title="Curve25519 Constants", body=constant_str) + +# ------------------------------------------------------------------------ EXECUTION UNITS +class ExecUnit(Module, AutoDoc): + def __init__(self, width=256, opcode_list=["UDF"], insert_docs=False): + if insert_docs: + self.intro = ModuleDoc(title="ExecUnit class", body=""" + ExecUnit is the superclass template for execution units. + + Configuration Arguments: + - `opcode_list` is the list of opcodes that an ExecUnit can process + - `width` is the bit-width of the execution pathway + + Signal API for an exec unit: + - `a` and `b` are the inputs. + - `instruction_in` is the instruction corresponding to the currently present `a` and `b` inputs + - `start` is a single-clock signal which indicates processing should start + - `q` is the output + - `instruction_out` is the instruction for the result present at the `q` output + - `q_valid` is a single cycle pulse that indicates that the `q` result and `wa_out` value is valid + + + """) + self.instruction = Record(instruction_layout) + + self.a = Signal(width) + self.b = Signal(width) + self.q = Signal(width) + self.start = Signal() + self.q_valid = Signal() + # pipeline the instruction + self.instruction_in = Signal(len(self.instruction)) + self.instruction_out = Signal(len(self.instruction)) + + self.opcode_list = opcode_list + self.comb += [ + self.instruction.raw_bits().eq(self.instruction_in) + ] + +class ExecMask(ExecUnit): + def __init__(self, width=256): + ExecUnit.__init__(self, width, ["MSK"], insert_docs=True) # we insert_docs to be true for exactly once module exactly once + self.intro = ModuleDoc(title="Masking ExecUnit Subclass", body=f""" +This execution unit implements the bit-mask and operation. It takes Ra[0] (the +zeroth bit of Ra) and replicates it to {str(width)} bits wide, and then ANDs it with +the full contents of Rb. This operation is introduced as one of the elements of +the `cswap()` routine, which is a constant-time swap of two variables based on a `swap` flag. + +Here is an example of how to swap the contents of `ra` and `rb` based on the value of the 0th bit of `swap`:: + + XOR dummy, ra, rb // dummy $\gets$ ra ^ rb + MSK dummy, swap, dummy // If swap[0] then dummy $\gets$ dummy, else dummy $\gets$ 0 + XOR ra, dummy, ra // ra $\gets$ ra ^ dummy + XOR rb, dummy, rb // rb $\gets$ rb ^ dummy +""") + self.sync.eng_clk += [ + self.q_valid.eq(self.start), + self.instruction_out.eq(self.instruction_in), + ] + self.comb += [ + self.q.eq(self.b & Replicate(self.a[0], width)), + ] + +class ExecLogic(ExecUnit): + def __init__(self, width=256): + ExecUnit.__init__(self, width, ["XOR", "NOT", "PSA", "PSB", "XBT", "SHL"]) + self.intro = ModuleDoc(title="Logic ExecUnit Subclass", body=f""" +This execution unit implements bit-wise logic operations: XOR, NOT, and +passthrough. + +* XOR returns the result of A^B +* NOT returns the result of !A +* PSA returns the value of A +* PSB returns the value of B +* SHL returns A << 1 +* XBT returns the 255th bit of A, reported in the 0th bit of the result + +""") + + zeros = Signal(255, reset=0) + self.sync.eng_clk += [ + self.q_valid.eq(self.start), + self.instruction_out.eq(self.instruction_in), + ] + self.comb += [ + If(self.instruction.opcode == opcodes["XOR"][0], + self.q.eq(self.a ^ self.b) + ).Elif(self.instruction.opcode == opcodes["NOT"][0], + self.q.eq(~self.a) + ).Elif(self.instruction.opcode == opcodes["PSA"][0], + self.q.eq(self.a), + ).Elif(self.instruction.opcode == opcodes["PSB"][0], + self.q.eq(self.b), + ).Elif(self.instruction.opcode == opcodes["XBT"][0], + self.q.eq(Cat(self.a[254], zeros)) + ).Elif(self.instruction.opcode == opcodes["SHL"][0], + self.q.eq(Cat(0, self.a[:255])), + ), + ] + +class ExecAddSub(ExecUnit, AutoDoc): + def __init__(self, width=256): + ExecUnit.__init__(self, width, ["ADD", "SUB"]) + self.notes = ModuleDoc(title="Add/Sub ExecUnit Subclass", body=f""" +This execution module implements 256-bit binary addition and subtraction. + +Note that to implement operations in $\mathbf{{F}}_p$, where *p* is $2^{{255}}-19$, this must be compounded +with other operators as follows: + +Addition of Ra + Rb into Rc in {field_latex}: + +.. code-block:: c + + ADD Rc, Ra, Rb // Rc <- Ra + Rb + TRD Rd, Rc // Rd <- ReductionValue(Rc) + SUB Rc, Rc, Rd // Rc <- Rc - Rd + +Negation of Ra into Rc in {field_latex}: + +.. code-block:: c + + SUB Rc, #FIELDPRIME, Ra // Rc <- 2^255-19 - Ra + +Note that **#FIELDPRIME** is one of the 32 available hard-coded constants +that can be substituted for any register in any arithmetic operation, please +see the section on "Constants" for more details. + +Subtraction of Ra - Rb into Rc in {field_latex}: + +.. code-block:: c + + SUB Rb, #FIELDPRIME, Rb // Rb <- 2^255-19 - Rb + ADD Rc, Ra, Rb // Rc <- Ra + Rb + TRD Rd, Rc // Rd <- ReductionValue(Rc) + SUB Rc, Rc, Rd // Rc <- Rc - Rd + +In all the examples above, Ra and Rb must be members of {field_latex}. + """) + + self.sync.eng_clk += [ + self.q_valid.eq(self.start), + self.instruction_out.eq(self.instruction_in), + ] + self.comb += [ + If(self.instruction.opcode == opcodes["ADD"][0], + self.q.eq(self.a + self.b), + ).Elif(self.instruction.opcode == opcodes["SUB"][0], + self.q.eq(self.a - self.b), + ), + ] + +class ExecTestReduce(ExecUnit, AutoDoc): + def __init__(self, width=256): + ExecUnit.__init__(self, width, ["TRD"]) + + self.notes = ModuleDoc(title="Modular Reduction Test ExecUnit Subclass", body=f""" +First, observe that $2^n-19$ is 0x07FF....FFED. +Next, observe that arithmetic in the field of {prime_string} will never set +the 256th bit. + +Modular reduction must happen when an arithmetic operation +overflows the bounds of the modulus. When this happens, one must +subtract the modulus (in this case {prime_string}). + +The reduce operation is done in two halves. The first half is +to check if a reduction must happen. The second is to do the subtraction. +In order to allow for constant-time operation, we always do the subtraction, +even if it is not strictly necessary. + +We use this to our advantage, and compute a reduction using +a test operator that produces a residue, and a subtraction operation. + +It's up to the programmer to ensure that the two instruction sequence +is never broken up. + +Thus the reduction algorithm is as follows: + +1. TestReduce + - If the 256th bit is set (e.g, ra[255]), then return {prime_string} + - If bits ra[255:5] are all 1, and bits ra[4:0] are greater than or equal to 0x1D, then return {prime_string} + - Otherwise return 0 +2. Subtract + - Subtract the return value of TestReduce from the tested value + + """) + self.sync.eng_clk += [ + self.q_valid.eq(self.start), + self.instruction_out.eq(self.instruction_in), + ] + self.comb += [ + If( (self.a >= 0x7FFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFED), + self.q.eq(0x7FFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFED) + ).Else( + self.q.eq(0x0) + ), + ] + +class ExecMul(ExecUnit, AutoDoc): + def __init__(self, width=256, sim=False): + ExecUnit.__init__(self, width, ["MUL"]) + + self.sync.eng_clk += [ # pipeline the instruction + self.instruction_out.eq(self.instruction_in), + ] + self.notes = ModuleDoc(title=f"Multiplication in {field_latex} ExecUnit Subclass", body=f""" +Unlike the ADD/SUB module, this operator explicitly works in {field_latex}. It takes in two inputs, +Ra and Rb, and both must be members of {field_latex}. The result is also reduced to a member of {field_latex}. + +The multiplier is designed with a separate clock, `mul_clk` so that it can be remapped to a faster +domain than `engine_clk` for better performance. The nominal target for `mul_clk` is 100MHz. + +The base algorithm for this implementation is lifted from the paper "Compact and Flexible FPGA Implementation +of Ed25519 and X25519" by Furkan Turan and Ingrid Verbauwhede (https://doi.org/10.1145/3312742). The algorithm +specified in this paper is optimized for the DSP48E blocks found inside a 7-Series Xilinx FPGA. In particular, +we can compute 17-bit multiplies using this hardware block, and 255 divides evenly into 17 to produce +a requirement of 15x DSP48E blocks. + +At a high level, the steps to compute the multiplication are: + +1. Schoolbook multiplication +2. Collapse partial sums +3. Propagate carries +4. Is the sum $\geq$ $2^{{255}}-19$? +5. If yes, add 19; else add 0 +6. Propagate carries again, in case the addition by 19 causes overflows + +The multiplier would run about 30% faster if step (6) were skipped. This step happens +in a fairly small minority of cases, maybe a fraction of 1%, and the worst-case +carry propagate through every limb (mathspeak for "digits") is diminishingly rare. The test for +whether or not to propagate carries is fairly straightforward. However, short-circuiting +the carry propagate step based upon the properties of the data creates +a timing side-channel. Therefore, we prefer a slower but safer implementation, even if +we are spending a bunch of cycles propagating zeros most of the time. + +A constant-time optimization would be for the multiplier to simply produce a 256-bit +result, and then use a subsequent TRD/SUB instruction pair. However, the non-pipelined +version of the engine25519 executes at a rate of 60ns per instruction, or 120ns total to +compute the TRD/SUB combination, whereas iterating through the carry propagates +would take 140ns total (as the mul core runs 2x clock speed of the rest of the engine). +This is basically a wash. + +However, if pipelining (and bypassing) were implemented, this might become a viable +optimization, but bypassing such a wide core would also have resource and speed +implications of its own. + +The above steps are coordinated by the `mseq` state machine. Control lines for +the DSP48E blocks are grouped into two sets, one controls the global state of +things such as the operation mode and input modes, and the other controls the +routing of individual 17-bit limbs (e.g. "digits" of our 17-bit representation of +numbers) to various sources and destinations. + +The following sections walk through the algorithm in detail. + +Schoolbook Multiplication +------------------------- + +The first step in the algorithm is called "schoolbook multiplication". It's +almost that, but with a twist. Below is what actual schoolbook multiplication +would be like, if you had a pair of numbers that were broken into three "limbs" (digits) +A[2:0] and B[2:0]. + +:: + + | A2 A1 A0 + x | B2 B1 B0 + ------------------------------------------ + | A2*B0 A1*B0 A0*B0 + A2*B1 | A1*B1 A0*B1 + A2*B2 A1*B2 | A0*B2 + (overflow) (not overflowing) + +The result of schoolbook multiplication is a result that potentially has +2x the number of limbs than the either multiplicand. + +Mapping the overflow back into the prime field (e.g. wrapping the overflow around) +is a process called reduction. It turns out that for +a prime field like {field_latex}, reduction works out to taking the limbs that +extend beyond the base number of limbs in the field, shifting them right by the +number of limbs, multiplying it by 19, and adding it back in; and if the result +isn't a member of the field, add 19 one last time, and take the result as just +the bottom 255 bits (ignore any carry overflow). + +This trick works because the form of the field is $2^{{n}}-p$: it is a power +of 2, reduced by some small amount $p$. By starting from a power of 2, +most of the binary numbers representable in an n-bit word are valid members of +the field. The only ones that are not valid field members are the numbers that are equal +to $2^{{n}}-p$ but less than $2^{{n}}-1$ (the biggest number that fits in n bits). +To turn these invalid binary numbers into members of the field, you just need +to add $p$, and the reduction is complete. + +.. image:: https://raw.githubusercontent.com/betrusted-io/gateware/master/gateware/curve25519/reduction_diagram.png + :alt: A diagram illustrating modular reduction + +The diagram above draws out the number lines for both a simple binary number line, +and for some field $\mathbf{{F}}_{{{{2^{{n}}}}-p}}$. Both lines start at 0 on the left, +and increment until they roll over. The point at which $\mathbf{{F}}_{{{{2^{{n}}}}-p}}$ +rolls over is a distance $p$ from the end of the binary number line: thus, we can +observe that $2^{{n}}-1$ reduces to $p-1$. Adding 1 results in $2^{{n}}$, which reduces +to $p$: that is, the top bit, wrapped around, and multiplied +it by $p$. + +As we continue toward the right, the numbers continue to go up and wrap around, and +for each wrap the distance between the binary wrap point and the $\mathbf{{F}}_{{{{2^{{n}}}}-p}}$ +wrap point increases by a factor of $p$, such that $2^{{n+1}}$ reduces to $2*p$. Thus modular +reduction of natural binary numbers that are larger than our field $2^{{n}}-p$ +consists of taking the bits that overflow an $n$-bit representation, shifting them to +the right by $n$, and multiplying by $p$. + +A more tractable example to compute than {field_latex} is the field $\mathbf{{F}}_{{{{2^{{6}}}}-5}} = 59$. +The members of the field are from 0-58, and reduction is done by taking any number modulo 59. Thus, +the number 59 reduces to 0; 60 reduces to 1; 61 reduces to 2, and so forth, until we get to 64, which +reduces to 5 -- the value of the overflowed bits (1) times $p$. + +Let's look at some more examples. First, recall that the biggest member of the +field, 58, in binary is 0b00_11_1010. + +Let's consider a simple case where we are presented a partial sum that overflows +the field by one bit, say, the number 0b01_11_0000, which is decimal 112. In this case, we take +the overflowed bit, shift it to the right, multiply by 5: + + 0b01_11_0000 + ^ move this bit to the right multiply by 0b101 (5) + 0b00_11_0000 + 0b101 = 0b00_11_0101 = 53 + +And we can confirm using a calculator that 112 % 59 = 53. Now let's overflow +by yet another bit, say, the number 0b11_11_0000. Let's try the math again: + + 0b11_11_0000 + ^ move to the right and multiply by 0b101: 0b101 * 0b11 = 0b1111 + 0b00_11_0000 + 0b1111 = 0b00_11_1111 + +This result is still not a member of the field, as the maximum value is 0b0011_1010. +In this case, we need to add the number 5 once again to resolve this "special-case" +overflow where we have a binary number that fits in $n$ bits but is in that sliver +between $2^{{n}}-p$ and $2^{{n}}-1$: + + 0b00_11_1111 + 0b101 = 0b01_00_0100 + +At this step, we can discard the MSB overflow, and the result is 0b0100 = 4; +and we can check with a calculator that 240 % 59 = 4. + +Therefore, when doing schoolbook multiplication, the partial products that start to +overflow to the left can be brought back around to the right hand side, after +multiplying by $p$, in this case, the number 19. This magical property is one +of the reasons why {field_latex} is quite amenable to math on binary machines. + +Let's use this finding to rewrite the straight schoolbook +multiplication form from above, but now with the modular reduction applied to +the partial sums, so it all wraps around into this compact form: +:: + + | A2 A1 A0 + x | B2 B1 B0 + ------------------------------------------ + | A2*B0 A1*B0 A0*B0 + | A1*B1 A0*B1 19*A2*B1 + + | A0*B2 19*A2*B2 19*A1*B2 + ---------------------------- + S2 S1 S0 + +As discussed above, each overflowed limb is wrapped around and multiplied by 19, +creating a number of partial sums S[2:0] that now has as many terms as +there are limbs, but with each partial sum still potentially +overflowing the native width of the limb. Thus, the inputs to a limb are 17 bits wide, +but we retain precision up to 48 bits during the partial sum stage, and then do a +subsequent condensation of partial sums to reduce things back down to 17 bits again. +The condensation is done in the next three steps, "collapse partial sums", "propagate carries", +and finally "normalize". + +However, before moving on to those sections, there is an additional trick we need +to apply for an efficient implementation of this multiplication step in hardware. + +In order to minimize the amount of data movement, we observe that for each row, +the "B" values are shared between all the multipliers, and the "A" values are +constant along the diagonals. Thus we can avoid re-loading the "A" values every +cycle by shifting the partial sums diagonally through the computation, allowing +the "A" values to be loaded as "A" and "A*19" into holding register once before +the computations starts, and selecting between the two options based on the step +number during the computation. + +.. image:: https://raw.githubusercontent.com/betrusted-io/gateware/master/gateware/curve25519/mapping.png + :alt: Mapping schoolbook multiply onto the hardware array to minimize data movement + +The diagram above illustrates how the schoolbook multiply is mapped onto the hardware +array. The top diagram is an exact redrawing of the previous text box, where the +partial sums that would extend to the left have been multiplied by 19 and wrapped around. +Each colored block corresponds to a given DSP48E1 block. The red arrow +illustrates the path of a partial sum in both the schoolbook form and the unwrapped +form for hardware implementation. In the bottom diagram, one can clearly see that +the Ax coefficients are constant for each column, and that for each row, the Bx +values are identical across all blocks in each step. Thus each column corresponds to +a single DSP48E1 block. We take advantage of the ability of the DSP48E1 block to +hold two selectable A values to pre-load Ax and Ax*19 before the computation starts, and +we bus together the Bx values and change them in sequence with each round. The +partial sums are then routed to the "down and right" to complete the mapping. The final +result is one cycle shifted from the canonical mapping. + +We have a one-cycle structural pipeline delay going from this step to the next one, so +we use this pipeline delay to do a shift with no add by setting the `opmode` from `C+M` to +`C+0` (in other words, instead of adding to the current multiplication output for the last +step, we squash that input and set it to 0). + +The fact that we pipeline the data also gives us an opportunity to pick up the upper limb +of the partial sum collapse "for free" by copying it into the "D" register of the DSP48E1 +during the shift step. + +In C, the code basically looks like this: + +.. code-block:: c + + // initialize the a_bar set of data + for( int i = 0; i < DSP17_ARRAY_LEN; i++ ) {{ + a_bar_dsp[i] = a_dsp[i] * 19; + }} + operand p; + for( int i = 0; i < DSP17_ARRAY_LEN; i++ ) {{ + p[i] = 0; + }} + + // core multiply + for( int col = 0; col < 15; col++ ) {{ + for( int row = 0; row < 15; row++ ) {{ + if( row >= col ) {{ + p[row] += a_dsp[row-col] * b_dsp[col]; + }} else {{ + p[row] += a_bar_dsp[15+row-col] * b_dsp[col]; + }} + }} + }} + +This completes in 15 cycles. + +Collapse Partial Sums +--------------------- + +The potential width of the partial sum is up to 43 bits wide (according to +the paper cited above; the native partial sum precision of the DSP48E1 is 48 bits). +This step divides the partial sums up into 17-bit words, and then shifts the higher +to the next limbs over, allowing them to collapse into a smaller sum that +overflows less. + +:: + + ... P2[16:0] P1[16:0] P0[16:0] + ... P1[33:17] P0[33:17] P14[33:17]*19 + ... P0[50:34] P14[50:34]*19 P13[50:34]*19 + +Again, the magic number 19 shows up to allow sums which "wrapped around" +to add back in. Note that in the timing diagram below, we refer to the +mid- and upper- words of the shifted partial sums as "Q" and "R" respectively, +because the timing diagram lacks the width within a data bubble to +write out the full notation: so `Q0,1` is P14[33:17] and `R0,2` is P13[50:34] for P0[16:0]. + +This is what the C code equivalent looks like for this operation. + +.. code-block:: c + + // the lowest limb has to handle two upper limbs wrapping around (Q/R) + prop[0] = (p[0] & 0x1ffff) + + (((p[14] * 1) >> 17) & 0x1ffff) * 19 + + (((p[13] * 1) >> 34) & 0x1ffff) * 19; + // the second lowest limb has to handle just one limb wrapping around (Q) + prop[1] = (p[1] & 0x1ffff) + + ((p[0] >> 17) & 0x1ffff) + + (((p[14] * 1) >> 34) & 0x1ffff) * 19; + // the rest are just shift-and-add without the modular wrap-around + for(int bitslice = 2; bitslice < 15; bitslice += 1) {{ + prop[bitslice] = (p[bitslice] & 0x1ffff) + ((p[bitslice - 1] >> 17) & 0x1ffff) + ((p[bitslice - 2] >> 34)); + }} + +This completes in 2 cycles after a one-cycle pipeline stall delay penalty to retrieve +the partial sum result from the previous step. + +Propagate Carries +----------------- + +The partial sums will generate carries, which need to be propagated down the +chain. The C-code equivalent of this looks as follows: + +.. code-block:: c + + for(int i = 0; i < 15; i++) {{ + if ( i+1 < 15 ) {{ + prop[i+1] = (prop[i] >> 17) + prop[i+1]; + prop[i] = prop[i] & 0x1ffff; + }} + }} + +This completes in 14 cycles. + +Normalize +--------- + +We're almost here, except that $0 \leq result \leq 2^{{256}}-1$, which is slightly +larger than the range of {field_latex}. + +Thus we need to check if number is somewhere in between 0x7ff....ffed and +0x7ff....ffff, or if the 256th bit will be set. In these cases, we need to add 19 to +the result, so that the result is a member of the field $2^{{255}}-19$ (the 256th bit +is dropped automatically when concatenating the fifteen 17-bit limbs together). + +We use the DSP48E1 block to help accelerate the test for this case, so that it +can complete in a single cycle without slowing down the machine. We use the "pattern +detect" (PD) feature of the DSP48E1 to check for all "1's" in bit positions 255-5, and a +single LUT to compare the final 5 bits to check for numbers between {prime_string} and +$2^{{255}}-1$. We then OR this result with the 256th bit. + +If the result falls within this special "overflow" case, we add the number 19, otherwise, +we add 0. Note that this add-by-19-or-0 step is implemented by pre-loading the number 19 into the A:B +pipeline registers of the DSP4E1 block during the "propagate" stage. Selection of +whether to add 19 or 0 relies on the fact that the DSP48E1 block has an input multiplexer +to its internal adder that can pick data from multiple sources, including the ability to +pick no source by loading the number 0. Thus the operation mode of the DSP48E1 is adjusted +to either pull an input from A:B (that is, the number 19) or the number 0, based on the +result of the overflow computation. Thus the PD feature is important in preventing this +step from being rate-limiting. With the PD feature we only have to check an effective 16 +intermediate results, instead of 256 raw bits, and then drive set the operation mode of +the ALU. + +Thus, this operation completes in a single cycle. + +After adding the number 19, we have to once again propagate carries. Even if we add the number +0, we also have to "propagate carries" for constant-time operation. This is done by +running the carry propagate operation described above a second time. + +Once the second carry propagate is finished, we have the final result. + +Potential corner case +--------------------- + +There is a potential corner case where if the carry-propagated result going into +"normalize" is between + + 0xFFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFDA and + 0xFFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFEC + +In this case, the top bit would be wrapped around, multiplied by 19, and added to +the LSB, but the result would not be a member of $2^{{255}}-19$ (it would be one +of the 19 numbers just short of $2^{{255}}-1$), and the multiplier would pass it +on as if it were a valid result. + +In some cases, this isn't even a problem, because if the subsequent result goes through +any operation that includes a "TRD" instruction, it should reduce the number +correctly. + +However, I do not think this corner case is possible, because the overflow path to set the +high bit is from the top limb going from 0x1_FFFF -> 0x2_0000 (that is, 0x7FFFC -> 0x80000 +when written MSB-aligned) due to a carry coming in from the lower limb, and +it would require the carry to be very large, not just +1 as shown in the simple +rollover case, but a value from 0x1_FFED-0x1_FFDB. + +I don't have a formal mathematical proof of this, but I strongly suspect that +carry values going into the top limb cannot approach these large numbers, and therefore +it is not possible to hit this corner case. + +In the case that it _could_ be hit, the fix would be to add an additional +detection stage to handle the case that the result is not normalized, and +to add 19 to the final sum. This can be accelerated to a single cycle by also +adding 1 into the partial products, short-circuiting the carry propagate because +this should be the only special case we're trying to check for (we should definitely +not be able to re-overflow because we are only adding at most 19 to the final result +in the previous step). + +It'd be great to have a real mathematician comment if this is a real corner case. + +Maybe this is a more solid reasoning why this corner case can't happen: + +The biggest value of a partial sum is 0x53_FFAC_0015 (0x1_FFFF * 0x1_FFFF * 15). +This means the biggest value of the third overflowed 17-bit limb is 0x14. Therefore +the biggest value resulting from the "collapse partial sums" stage is +0x1_FFFF + 0x1_FFFF + 0x14 = 0x4_0012. Thus the largest carry term that has +to propagate is 0x4_0012 >> 17 = 2. 2 is much smaller than the amount required +to trigger this condition, that is, a value in the range of 0x1_FFED-0x1_FFDB. +Thus, perhaps this condition simply can't happen? + +""") + # array of 15, 17-bit wide signals = 255 bits + a_17 = [Signal(17),Signal(17),Signal(17),Signal(17),Signal(17), + Signal(17),Signal(17),Signal(17),Signal(17),Signal(17), + Signal(17),Signal(17),Signal(17),Signal(17),Signal(17),] + b_17 = [Signal(17),Signal(17),Signal(17),Signal(17),Signal(17), + Signal(17),Signal(17),Signal(17),Signal(17),Signal(17), + Signal(17),Signal(17),Signal(17),Signal(17),Signal(17),] + # split incoming data into 17-bit wide chunks + for i in range(15): + self.comb += [ + a_17[i].eq(self.a[i*17:i*17+17]), + b_17[i].eq(self.b[i*17:i*17+17]), + ] + + # signals common to all DSP blocks + dsp_alumode = Signal(4) + dsp_opmode = Signal(7) + dsp_reset = Signal() + dsp_a1_ce = Signal() + dsp_a2_ce = Signal() + dsp_b1_ce = Signal() + dsp_b2_ce = Signal() + dsp_d_ce = Signal() + dsp_p_ce = Signal() + self.comb += [ + dsp_reset.eq(ResetSignal()), + dsp_b1_ce.eq(0), # not used + ] + zeros = Signal(48, reset=0) # dummy zeros signals to tie off unused bits of the DSP48E + self.comb += zeros.eq(0) + + step = Signal(max=15+1) # controls the multiplication step + prop = Signal() # count the propagations + + for i in range(15): + # create all the per-block DSP signals before we loop through and connect them + setattr(self, "dsp_a" + str(i), Signal(48, name="dsp_a" + str(i))) + setattr(self, "dsp_b" + str(i), Signal(17, name="dsp_b" + str(i))) + setattr(self, "dsp_c" + str(i), Signal(48, name="dsp_c" + str(i))) + setattr(self, "dsp_d" + str(i), Signal(17, name="dsp_d" + str(i))) + setattr(self, "dsp_match" + str(i), Signal(name="dsp_match"+str(i))) + setattr(self, "dsp_p" + str(i), Signal(48, name="dsp_p"+str(i))) + setattr(self, "dsp_p_ce" + str(i), Signal(48, name="dsp_p_ce"+str(i))) + setattr(self, "dsp_inmode" + str(i), Signal(5, name="dsp_inmode"+str(i))) + + self.timing = ModuleDoc(title="Detailed timing operation", body=""" + +Below is a detailed timing diagram that illustrates the expected sequence of events +by the implementation of this code. + +Signal descriptions: + +* `clk` is `mul_clk`, nominally 100MHz (2x engine clock) +* `go` is the signal from the microcode sequencer to latch inputs and start computation +* `self.a` is the `a` operand +* `self.b` is the `b` operand +* `state` is the current `mseq` state machine's state +* `step` is a counter used by `mseq` to control how many iterations to run in a given state +* `prop` is a counter used to count which iteration of the carry propagate we're on +* `dsp.a`-`dsp.d` is the `a-d` inputs to the DSP48E1 blocks +* `A1_CE` is the enable to the A1 pipe register. Note that we configure 2x pipeline registers on the A input. +* `A1` is a pipe register internal to the DSP48E1 block +* `A2_CE` is the enable to the A2 pipe register +* `A2` is a pipe register internal to the DSP48E1 block +* `B2_CE` is the enable to the B2 pipe register. Note that we configure 1x pipeline registers on the B input, and when 1x register is selected, the second pipe register (B2) is used. Thus there is no B1 register. +* `B2` is a pipe register internal to the DSP48E1 block +* `C` is the C input value. Note that this one input is *not* pipelined, and thus there is no register enable for it. Because it is not pipelined it's also likely to be critical-path. We use this mainly to loop P results back into the ALU with masking operations applied within a single cycle. +* `D_CE` is the enable to the D pipe register. There is only one possible D register in the DSP48E1 +* `D` is a pipe register internal to the DSP48E1 block that feeds the pre-adder +* `inmode` configures the input mode to the DSP48E1 ALU blocks. It is not pipelined and allows us to re-route data from A, B, C, and D to various ALU internals. +* `opmode` configures what computation to perform by the DSP48E1 ALU on the current cycle. It is not pipelined. +* `P_CE` is the enable for the output product register. +* `P` is the output product register presented by the DSP48E1 ALU. +* `overflow` is the overflow detection output from the DSP48E1 ALU. Its result timing is synchronous with the `P` register. +* `done` is the signal from the multiplier back to the microcode sequencer to latch the result and finish computation + +.. wavedrom:: + :caption: Detailed timing of the multiply operation + + { "config": {skin : "default"}, + "signal" : [ + { "name": "clk", "wave": "p......|.........|.......|....." }, + { "name": "go", "wave": "010..........................10" }, + { "name": "self.a", "wave": "x2...........................2.", "data": ["A0[255:0]","A1[255:0]"] }, + { "name": "self.b", "wave": "x2...........................2.", "data": ["B0[255:0]","B1[255:0]"] }, + { "name": "state", "wave": "2.34......5555...|..86...|..923", "data":["IDLE","SETA","MPY","DLY","PLSB","PMSB","PROP","NORM","PROP","DONE","IDLE","SETA"]}, + { "name": "step", "wave": "x..2===|==5...55|5556.666|66xxx", "data":["0","1", "2", "3","13","14","0","1","2","11","12","13","0","1","2","11","12","13"]}, + { "name": "prop", "wave": "x.........5.....|...6....|..xxx", "data":["0","1"]}, + { "name": "dsp.a", "wave": "x2x2x.....8x.................2x", "data": ["A0xx","A19","0", "A1xx"] }, + { "name": "dsp.b", "wave": "x2====|==x55xxxxxxx8xx.......2=", "data": ["19","B00","B01","B02","B03","B13","B14","1or19","1or19","19","19","B1_00"] }, + { "name": "dsp.c", "wave": "x...2===|=x5x5...|..x6...|..xxx", "data":["Q0","Q1","Q2","Q3","Q13","P0,0","C* >> 17 ","C* >> 17 "]}, + { "name": "dsp.d", "wave": "x.........55x.xxxxxx...xxxxxxx.", "data":["*Q0,1","R0,2"]}, + {}, + { "name": "A1_CE", "wave": "1.010.....10..................." }, + { "name": "A1", "wave": "x.2.2......8.........x.........", "data": ["A0xx","A0xx*19","0"] }, + { "name": "A2_CE", "wave": "0..10......10.................." }, + { "name": "A2", "wave": "x...2.......8........x.........", "data":["A0xx","0"] }, + { "name": "B2_CE", "wave": "01.......01.0......10.........." }, + { "name": "B2", "wave": "x.22===|==x55xxxx.xx8x.........", "data": ["19","B00","B01","B02","B03","B13","B14","1or19","1or19","19"] }, + { "name": "C", "wave": "x...2===|==555...|..86...|..x..", "data": ["Q0","Q1","Q2","Q3","Q13","Q14","P0,0","*P","C* >> 17 ","C&","C* >> 17 "] }, + { "name": "D_CE", "wave": "0.........1.0.................." }, + { "name": "D", "wave": "x..........55xx................", "data": ["Q0,1","R0,2","QS14,1","RS14,2","QS14,1","RS14,2"] }, + { "name": "inmode", "wave": "x.2.2.....x5.x.xx.xx8x.........", "data":["A1B2","AnB2","DB2","0B2"]}, + { "name": "opmode", "wave": "x.2.=.....2555...|..86...|..xxx", "data":["M","C+M","C+0","C+M","P+M","C+P","AB/0+C","C+P"]}, + {}, + { "name": "P_CE", "wave": "0.1.....|....5555|5516666|660.1", "data": ["P1", "P2", "P3","P4","P13","P14","P1", "P2", "P3","P4","P13","P14"] }, + { "name": "P", "wave": "x..2====|===55555|5552666|666x.", "data": ["A19","P0","P1","P2","P3","P13","P14","P0","PLSB","PMSB","C1","C2","C3","C12", "C13","C14","S+","C1","C2","C3","C12", "C13","C14","final"] }, + { "name": "overflow", "wave": "x...................2x.........", "data":["Y/N"]}, + { "name": "done", "wave": "0...........................10." }, + ]} + +Notes: + +1. the final product sum on the first DLY cycle is just a shift to get the + product results into the right unit. Thus, for the load of `dsp.d` `*Q0,1`, it needs + to pick the result off of the neighboring DSP unit, because it needs to acquire the value + before the final shift. +2. The `S+` on the P line is the non-normalized sum. This is basically the final result, but + sometimes with the 19 added to the least significant limb, in the case that the result is greater than + or equal to $2^{{255}}-19$. This addition must be propagated through the whole result. +3. The "done" state is slightly more complicated than illustrated here. Because the multiplier runs at + twice the speed of the sequencing engine (two `mul_clk` per `eng_clk`), "done" actually spans between + 2 and 3 states. In the case that the computation finishes in-phase with the slower engine clock, we assert + "done" for two cycles. In the case that we finish out of phase, have to wait a half `eng_clk` cycle + (one state in `mul_clk`) before asserting the done pulse for two `mul_clk` cycles (thus 3 total cycles). + The computation is fixed-time, so the determination of how many wait states is done at the design stage and + hard-coded. However, anytime the algorithm is adjusted, the designer needs to re-check the number of + cycles it took and pick the correct "done" sequencing. + + """) + + self.diagrams = ModuleDoc(title="Dataflow Diagrams", body=""" + +Here's a collection of data flow diagrams that help illustrate how to configure the DSP48E1 block. +The DSP48E1 block has a lot of configuration options, so instead of overlaying on the messy overall +diagram of the DSP48E1, we simplify its construction and draw only the pieces relevant to each phase +of the algorithm. + +There's no substitute for consulting Xilinx UG479 (https://www.xilinx.com/support/documentation/user_guides/ug479_7Series_DSP48E1.pdf), +but if you're just getting started here's a few breadcrumbs to help you steer around the block. + +1. The block contains a pre-adder, multiplier, and "ALU". +2. It has four major inputs, A, B, C, and D. A/B are typically multiplier inputs, C is mostly intended for carry propagation and shuttling partial sums, and D is a pre-adder input. Thus a common form of computation is P = (A+D)*B + C. +3. Almost any input can be zero'd out, and so if you wanted to compute just A*B, what is actually computed is (A+D)*B + C but with the C and D values zero'd out. This is controlled by combinations of `inmode` and `opmode`. +4. Inputs A-D and output P can all be registered, and for this implementation we put two registers on A, one register on B, zero registers on C, one register on D, and one register on P. +5. Inputs A and B can have two pipeline registers. While the datasheet makes it look like you could be able to selectively write from the DSP48E1 input to either A1/A2 or B1/B2, in fact, you can't. + A2 can only get a value from A1 (thus setting A2 necessitates overwriting the value in A1). However, you can gate the A2's enable, so it can hold a value indefinitely, and the multiplier can route an input from either A1 or A2. We use this to our advantage and load `dsp.a` into the A2 register, and `dsp.a*19` into the A1 register, and then use the `inmode` configuration to switch between these two inputs based on which partial sum we're computing at the moment. + I think normally this feature is used to implement pipelining and pipeline bypassing in other applications, and we are slightly abusing it here to our advantage. +6. Because we configured C to have no input register, it can be used for cycle-to-cycle feedback of partial sums. + Introducing an input register here (per DRC recco spit out by Vivado) could speed up the clock rate but it also introduces a single-cycle stall every time we have to do a partial sum feedback, which is a greater performance impact for our implementation. +7. The "ALU" part of the DSP48E1 is used as the partial sum adder in our implementation (but it can also do logic operations and other fun things that we don't need). It actually adds four numbers: P <- X + Y + Z + Carry bit. + We don't use the carry "bit" as it is only one-bit wide and we are propagating several bits of carry at once, so it is hard-wired to 0. X/Y/Z are up to 48 bits wide, and allows us to add combinations of the multiplier output, a concatenation of A:B (A as MSB, B as LSB), C, P, the number 0, and a couple other source options we don't use in this implementation. This is controlled by `opmode`. +8. In parallel to the "ALU" is a pattern detector. The pattern being detected is hard-coded into the bitstream, and in this case we are looking for a run of `1`'s to help accelerate the overflow detection problem. The output of the pattern detector is always being computed, and dataflow-synchronous to the P output. +9. Unused bits of verilog instances in Migen need to be tied to 0; Migen does not automatically extend/pad shorter `Signal` values to match verilog input widths. This is important because the DSP48E1 input widths don't always exactly match the Migen widths. We create a "zeros" signal and `Cat()` it onto the MSBs as necessary to ensure all inputs to the DSP48E1 are properly specified. + +.. image:: https://raw.githubusercontent.com/betrusted-io/gateware/master/gateware/curve25519/mpy_pipe3.png + :alt: data flow block diagram of the multiplier core + +Above is the relevant elements of the DSP48E1 block as configured for the systolic dataflow for the "schoolbook" +multiply operation. Items shaded in gray are external to the DSP48E1 block. + +.. image:: https://raw.githubusercontent.com/betrusted-io/gateware/master/gateware/curve25519/psum3.png + :alt: data flow block diagram of the partial sum step + +Above is the configuration of the DSP48E1 block for the partial sum steps. Partial sum takes two cycles to +sum together the three 17-bit segments of the partial sums. + +.. image:: https://raw.githubusercontent.com/betrusted-io/gateware/master/gateware/curve25519/carry_prop3.png + :alt: data flow block diagram of the carry propagate + +Above is the configuration of the DSP48E1 block for the carry propagate step. This step must be repeated +14 times to handle the worst-case carry propagate path. During the carry propagate step, the pattern +detector is active, and on the final step we check it to see if the result overflows $2^{{255}}-19$. + +.. image:: https://raw.githubusercontent.com/betrusted-io/gateware/master/gateware/curve25519/normalize4.png + :alt: data flow block diagram of the normalization step + +Above is the configuration of the DSP48E1 block for the normalization step. If the result overflows $2^{{255}}-19$, +we must add 19 to make it a member of the prime field once again. We can do this in a single cycle by +short-circuiting the carry propagate: we already know we will have to propagate a carry to handle the overflow +case (there are only 19 possible numbers that will overflow this, and all of them have 1's set up the entire +chain), so we pre-add the carry simultaneous with adding the number 19 to the least significant limb. We also +use this step to mask out the upper level bits on the partial sums, because the top bits are now the old +carries that have already been propagated. If we fail to do this, then we re-propagate the carries from the last step. + + """) + + start_pipe = Signal() + self.sync.mul_clk += start_pipe.eq(self.start) # break critical path of instruction decode -> SETUP_A state muxes + self.submodules.mseq = mseq = ClockDomainsRenamer("mul_clk")(FSM(reset_state="IDLE")) + mseq.act("IDLE", + NextValue(step, 0), + NextValue(prop, 0), + If(start_pipe, + NextState("SETUP_A") + ) + ) + mseq.act("SETUP_A", # SETA, load the a, a19 values values + NextState("MULTIPLY"), + ) + mseq.act("MULTIPLY", # MPY + If(step < 14, + NextValue(step, step + 1) + ).Else( + NextState("P_DELAY"), + NextValue(step, 0), + ) + ) + mseq.act("P_DELAY", # DLY - due to pipelining of P register, we have a structural hazard that delays feedback by one cycle + # we take advantage of this time to (1) shift the results into canonical position and (2) nab a copy of the data for the PSUM_MSB state + NextState("PSUM_LSB") + ) + mseq.act("PSUM_LSB", # PLSB + NextState("PSUM_MSB") + ) + mseq.act("PSUM_MSB", # PMSB + NextState("CARRYPROP") + ) + mseq.act("CARRYPROP", # PROP + If( step == 13, + If( prop == 0, + NextState("NORMALIZE"), + NextValue(step, 0), + ).Else( + NextState("DONE"), # if modifying to the "DONE" state, change q-latch statement at the end + ) + ).Else( + NextValue(step, step + 1), + ) + ) + mseq.act("NORMALIZE", # NORM + NextState("CARRYPROP"), + NextValue(prop, 1), + NextValue(step, 0), + ) + ### note that the post-amble "manually" aligns the mul_clk to eng_clk phases + ### this can have one of two outcomes if the previous number of states is even or odd + ### in this case, we end up phase mis-aligned, so we have to burn a dummy cycle to sync clocks + ### see q_valid logic at end of this module + mseq.act("DONE", # DONE -- we are actually finished on an odd phase of the eng_clk, can't assert RF here + NextState("DONE2"), + ) + mseq.act("DONE2", # assert valid to the RF here + NextState("DONE3"), + ) + mseq.act("DONE3", # second done state, because we are latching into a half-rate clock domain, so valid is good for one full eng_clk + NextState("IDLE"), + # Note: we could, in theory, pipeline the next multiply by detecting if go goes high here, + # and bypassing IDLE and going straight to SETA, but... + ) + + # DSP48E opcode encodings + # general DSP48E computation is P <- X + Y + Z + C + OP_PASS_M = 0b000_01_01 # X:Y <- M; Z <-0; P <- 0 + M + 0 + OP_M_PLUS_PCIN = 0b001_01_01 # X:Y <- M; Z <-PCIN; P <- PCIN + M + 0 + OP_M_PLUS_C = 0b011_01_01 # X:Y <- M; Z <-C; P <- C + M + 0 + OP_M_PLUS_P = 0b010_01_01 # X:Y <- M; Z <-P ; P <- P + M + 0 + OP_P_PLUS_PCIN17 = 0b101_10_00 # X <- P; Y <- 0; Z <- PCIN >> 17; P <- PCIN>>17 + P + 0 + OP_C_PLUS_P = 0b010_11_00 # X <- 0; Y <- C; Z <- P; P <- 0 + C + P + OP_AB_PLUS_P = 0b010_00_11 # X <- A:B; Y <- 0; Z <- P; P <- A:B + 0 + P + 0 + OP_AB_PLUS_C = 0b011_00_11 # X <- A:B; Y <- 0; Z <- C; P <- A:B + 0 + C + 0 + OP_0_PLUS_P = 0b010_00_00 # X <- 0; Y <- 0; Z <- P; P <- 0 + 0 + P + 0 + OP_C_PLUS_0 = 0b011_00_00 # X <- 0; Y <- 0; Z <- C; P <- C + 0 + 0 + 0 + INMODE_A1 = 0b0001 + INMODE_A2 = 0b0000 + INMODE_D = 0b0110 + INMODE_0 = 0b0010 + INMODE_B2 = 0b0 + # INMODE_B1 = 0b1 # should not be used in this configuration, only 1 BREG configured + + overflow_25519 = Signal() # set during normalize if we're overflowing 2^255-19 + + # see the self.timing documentation (above, best viewed after post-processing with sphinx) for how this all works. + self.comb += [ + dsp_alumode.eq(0), + If(mseq.before_entering("SETUP_A"), + dsp_b2_ce.eq(1), + dsp_a1_ce.eq(1), + ).Elif(mseq.ongoing("SETUP_A"), + # at this point, these are already loaded: A1 <- Axx, B2 <- 19 + # P <- A1 * B2 + dsp_opmode.eq(OP_PASS_M), + # pipeline in the b1 value for the first round of the multiply + dsp_b2_ce.eq(1), + dsp_p_ce.eq(1), + ).Elif(mseq.ongoing("MULTIPLY"), + dsp_p_ce.eq(1), + If(step == 0, + dsp_a1_ce.eq(1), + dsp_a2_ce.eq(1), # latch the pipelined Axx * 19 signal on the first round of multiply + dsp_opmode.eq(OP_PASS_M), # don't add PCIN on the first partial product, as it's bogus on step 0 + ).Else( + dsp_a1_ce.eq(0), + dsp_a2_ce.eq(0), + dsp_opmode.eq(OP_M_PLUS_C), + ), + If(step != 14, + dsp_b2_ce.eq(1), + ).Else( + dsp_b2_ce.eq(0), + ) + ).Elif(mseq.ongoing("P_DELAY"), + dsp_opmode.eq(OP_C_PLUS_0), + dsp_p_ce.eq(1), + dsp_b2_ce.eq(1), + dsp_d_ce.eq(1), + dsp_a1_ce.eq(1), + ).Elif(mseq.ongoing("PSUM_LSB"), + dsp_p_ce.eq(1), + dsp_b2_ce.eq(1), + dsp_d_ce.eq(1), + dsp_opmode.eq(OP_M_PLUS_C), + dsp_a2_ce.eq(1), + ).Elif(mseq.ongoing("PSUM_MSB"), + dsp_p_ce.eq(1), + dsp_opmode.eq(OP_M_PLUS_P), + ).Elif(mseq.ongoing("CARRYPROP"), + dsp_p_ce.eq(0), # move to individual unit P_CEs for this stage + dsp_opmode.eq(OP_C_PLUS_P), + If(step==13, + dsp_b2_ce.eq(1), + ) + ).Elif(mseq.ongoing("NORMALIZE"), + dsp_p_ce.eq(1), + If(overflow_25519 | (self.dsp_p14[17] == 1), + dsp_opmode.eq(OP_AB_PLUS_C), + ).Else( + dsp_opmode.eq(OP_C_PLUS_0), + ) + ) + ] + b_step = Signal(17) + self.comb += [ + # the code below doesn't synthesize well, so let's write out the barrel shifter explicitly + # getattr(self, "dsp_b" + str(i)).eq((self.b >> (17 * (step + 1))) & 0x1_ffff), # b_17[step+1] + # written out explicitly because the fancy for-loop format also leads to a weird synthesis result... + If(step == 0, b_step.eq(b_17[1]) + ).Elif(step == 1, b_step.eq(b_17[2]) + ).Elif(step == 2, b_step.eq(b_17[3]) + ).Elif(step == 3, b_step.eq(b_17[4]) + ).Elif(step == 4, b_step.eq(b_17[5]) + ).Elif(step == 5, b_step.eq(b_17[6]) + ).Elif(step == 6, b_step.eq(b_17[7]) + ).Elif(step == 7, b_step.eq(b_17[8]) + ).Elif(step == 8, b_step.eq(b_17[9]) + ).Elif(step == 9, b_step.eq(b_17[10]) + ).Elif(step == 10, b_step.eq(b_17[11]) + ).Elif(step == 11, b_step.eq(b_17[12]) + ).Elif(step == 12, b_step.eq(b_17[13]) + ).Elif(step == 13, b_step.eq(b_17[14]) + ) + ] + + # reduce width of DSP's INMODE combinational path using a sub machine that reduces + # the complexity of the `mseq` machine and allows for a pipeline stage to be inserted... + INMODE_IDLE = 0 + INMODE_MPY = 1 + INMODE_PROP1 = 2 + INMODE_PROP2 = 3 + inmode_sel = Signal(2) + self.sync.mul_clk += [ + If(mseq.ongoing("IDLE") | mseq.ongoing("SETUP_A"), + inmode_sel.eq(INMODE_IDLE) + ).Elif(mseq.ongoing("MULTIPLY"), + inmode_sel.eq(INMODE_MPY), + ).Elif(mseq.ongoing("P_DELAY") | mseq.ongoing("PSUM_LSB"), + inmode_sel.eq(INMODE_PROP1) + ).Else( + inmode_sel.eq(INMODE_PROP2) + ) + ] + + for i in range(15): + # INMODE is a critical path, so rewrite code not in computation order but in signal use order to better + # understand how to optimize it. + self.comb += [ + If(inmode_sel == INMODE_IDLE, + getattr(self, "dsp_inmode" + str(i)).eq(Cat(INMODE_A1, INMODE_B2)), + ), + If(inmode_sel == INMODE_MPY, + If(step == 0, + getattr(self, "dsp_inmode" + str(i)).eq(Cat(INMODE_A1, INMODE_B2)), + # A1 has Axx on the first step only + ).Elif(i > (14 - step), # lay out the diagonal wrap-around of partial sums + getattr(self, "dsp_inmode" + str(i)).eq(Cat(INMODE_A1, INMODE_B2)), # A1 has Axx*19 + ).Else( + getattr(self, "dsp_inmode" + str(i)).eq(Cat(INMODE_A2, INMODE_B2)), + # A2 has Axx for rest of steps + ) + ), + If(inmode_sel == INMODE_PROP1, + getattr(self, "dsp_inmode" + str(i)).eq(Cat(INMODE_D, INMODE_B2)), + ), + If(inmode_sel == INMODE_PROP2, + getattr(self, "dsp_inmode" + str(i)).eq(Cat(INMODE_0, INMODE_B2)), + ) + ] + + # rest of signals are in computation order below + self.comb += [ + If(mseq.before_entering("SETUP_A"), + getattr( self, "dsp_a" + str(i) ).eq(Cat(a_17[i], zeros[:(30-17)])), + getattr( self, "dsp_b" + str(i) ).eq(19), + ).Elif(mseq.ongoing("SETUP_A"), + getattr(self, "dsp_b" + str(i)).eq(b_17[0]), # preload B00 + ).Elif(mseq.ongoing("MULTIPLY"), + getattr(self, "dsp_c" + str(i)).eq(getattr(self, "dsp_p" + str( (i+1) % 15 ))), + If(step == 0, + getattr(self, "dsp_a" + str(i)).eq(getattr(self, "dsp_p" + str(i))), + ), + If(step < 14, + getattr(self, "dsp_b" + str(i)).eq(Cat(b_step, zeros[:1])), # b_17[step+1]; note that b input is 18 bits wide, so pad with one 0 to prevent a dangling X on the high bit + ), + ) + ] + + if i > 0: # sum is different from bottom limb, as the top MSB wraps around + self.comb += [ + If(mseq.ongoing("P_DELAY"), + getattr(self, "dsp_c" + str(i)).eq(getattr(self, "dsp_p" + str((i + 1) % 15))), + getattr(self, "dsp_d" + str(i)).eq((getattr(self, "dsp_p" + str(i)) >> 17) & 0x1_ffff), # (i-1)+1, the +1 is because the result has not been shifted yet + getattr(self, "dsp_b" + str(i)).eq(1), + )] + else: + self.comb += [ + If(mseq.ongoing("P_DELAY"), + getattr(self, "dsp_a" + str(i)).eq(zeros), + getattr(self, "dsp_c" + str(i)).eq(getattr(self, "dsp_p" + str((i + 1) % 15))), + getattr(self, "dsp_d" + str(i)).eq((getattr(self, "dsp_p" + str(0)) >> 17) & 0x1_ffff), + getattr(self, "dsp_b" + str(i)).eq(19), + )] + + self.comb += [ + If(mseq.ongoing("PSUM_LSB"), + getattr(self, "dsp_c" + str(i)).eq(getattr(self, "dsp_p" + str(i)) & 0x1_ffff), + )] + if i > 1: # sum-ordering is different for the bottom two limbs, as the top wraps around into two limbs + self.comb += [ + If(mseq.ongoing("PSUM_LSB"), + getattr(self, "dsp_d" + str(i)).eq((getattr(self, "dsp_p" + str(i - 2)) >> 34) & 0x1_ffff), + getattr(self, "dsp_b" + str(i)).eq(1), + )] + elif i == 1: + self.comb += [ + If(mseq.ongoing("PSUM_LSB"), + getattr(self, "dsp_d" + str(i)).eq((getattr(self, "dsp_p" + str(14)) >> 34) & 0x1_ffff), + getattr(self, "dsp_b" + str(i)).eq(19), + )] + else: + self.comb += [ + If(mseq.ongoing("PSUM_LSB"), + getattr(self, "dsp_d" + str(i)).eq((getattr(self, "dsp_p" + str(13)) >> 34) & 0x1_ffff), + getattr(self, "dsp_b" + str(i)).eq(19), + )] + + self.comb += [ + If(mseq.ongoing("PSUM_MSB"), + getattr(self, "dsp_c0").eq(zeros), # dsp_c is actually don't care due to the opmode + ).Elif(mseq.ongoing("NORMALIZE"), + getattr(self, "dsp_c" + str(i)).eq(getattr(self, "dsp_p" + str(i)) & 0x1_ffff), + ) + ] + + if i == 0: + self.comb += [ + If(mseq.ongoing("CARRYPROP"), + getattr(self, "dsp_c" + str(i)).eq( zeros ), + ), + If(mseq.ongoing("CARRYPROP") & (step == 13), + getattr(self, "dsp_b" + str(i)).eq( 19 ), # special-case constant to handle normalization in overflow of prime field; a is loded with 0 on previous cycle + ), + ] + else: + self.comb += [ + If(mseq.ongoing("CARRYPROP"), + getattr(self, "dsp_c" + str(i)).eq( Cat(getattr(self, "dsp_p" + str(i - 1)) >> 17, zeros[:17]) ), + getattr(self, "dsp_p_ce" + str(i)).eq(step == (i-1)), + ), + If(mseq.ongoing("CARRYPROP") & (step == 13), + getattr(self, "dsp_b" + str(i)).eq(0), + ) + ] + if sim: + instance = "DSP48E1_sim" + else: + instance = "DSP48E1" + self.specials += [ + Instance(instance, name="DSP_ENG25519_" + str(i), + # configure number of input registers + p_ACASCREG=1, + p_AREG=2, + p_ADREG=0, + p_ALUMODEREG=0, + p_BCASCREG=1, + p_BREG=1, + + # only pipeline at the output + p_CARRYINREG=0, + p_CARRYINSELREG=0, + p_CREG=0, + p_DREG=1, # i think we can use this to save some fabric registers + p_INMODEREG=0, + p_MREG=0, + p_OPMODEREG=0, + p_PREG=1, + + p_A_INPUT="DIRECT", + p_B_INPUT="DIRECT", + p_USE_DPORT="TRUE", + p_USE_MULT="DYNAMIC", + p_USE_SIMD="ONE48", + + # setup pattern detector to catch the case of mostly 1's + p_AUTORESET_PATDET="NO_RESET", + p_MASK =0xffff_fffe_0000, #'1'*(48-17)+'0'*17, # 1 bits are ignored, 0 compared + p_PATTERN=0x1_ffff, # '0'*(48-17)+'1'*17, # compare against 0x1_FFFF + p_SEL_MASK="MASK", + p_SEL_PATTERN="PATTERN", + p_USE_PATTERN_DETECT="PATDET", + + # signals + i_A=getattr(self, "dsp_a" + str(i)), + i_ALUMODE=dsp_alumode, + i_B=Cat(getattr(self, "dsp_b" + str(i)), zeros[:(18-17)]), # extra bits must be set to zero + i_C=getattr(self, "dsp_c" + str(i)), + i_CARRYIN=0, + i_CARRYINSEL=zeros[:3], + i_CEA1=dsp_a1_ce, + i_CEA2=dsp_a2_ce, + i_CEAD=0, # no pipe + i_CEALUMODE=0, # no pipe + i_CEB1=dsp_b1_ce, + i_CEB2=dsp_b2_ce, + i_CEC=0, # no pipe + i_CECARRYIN=0, + i_CECTRL=0, # no pipe on opmode + i_CED=dsp_d_ce, + i_CEP=dsp_p_ce | getattr(self, "dsp_p_ce" + str(i)), + i_CLK=ClockSignal("mul_clk"), # run at 2x speed of engine clock + i_D=Cat(getattr(self, "dsp_d" + str(i)), zeros[:(25-17)]), + i_INMODE=getattr(self, "dsp_inmode" + str(i)), + i_OPMODE=dsp_opmode, + o_P=getattr(self, "dsp_p" + str(i)), + o_PATTERNDETECT=getattr(self, "dsp_match" + str(i)), + + # tie unused CE + i_CEM=0, + i_CEINMODE=1, + + # resets + i_RSTA=dsp_reset, + i_RSTALLCARRYIN=dsp_reset, + i_RSTALUMODE=dsp_reset, + i_RSTB=dsp_reset, + i_RSTC=dsp_reset, + i_RSTCTRL=dsp_reset, + i_RSTD=dsp_reset, + i_RSTINMODE=dsp_reset, + i_RSTM=dsp_reset, + i_RSTP=dsp_reset, + ) + ] + self.sync.mul_clk += [ # this syncs into the eng_clk domain + If(mseq.ongoing("DONE"), ## mod this to sync with the phase that the state machine ends on + self.q[i * 17:i * 17 + 17].eq(getattr(self, "dsp_p" + str(i))[:17]), + ).Else( + self.q[i * 17:i * 17 + 17].eq(self.q[i * 17:i * 17 + 17]), + ), + ] + # whether we are asserting on DONE/DONE2 or DONE2/DONE3 depends on even/odd # of states previously spent to compute the mul + self.sync.mul_clk += [ + If(mseq.ongoing("DONE2") | mseq.ongoing("DONE3"), + self.q_valid.eq(1), + ).Else( + self.q_valid.eq(0), + ) + ] + # compute special-case detection if the partial sum output is >= 2^255-19 + self.comb += [ + overflow_25519.eq( + self.dsp_match14 & + self.dsp_match13 & + self.dsp_match12 & + self.dsp_match11 & + self.dsp_match10 & + self.dsp_match9 & + self.dsp_match8 & + self.dsp_match7 & + self.dsp_match6 & + self.dsp_match5 & + self.dsp_match4 & + self.dsp_match3 & + self.dsp_match2 & + self.dsp_match1 & + (self.dsp_p0 >= 0x1_ffed) + ) + ] + + +class Engine(Module, AutoCSR, AutoDoc): + def __init__(self, platform, prefix, sim=False, build_prefix=""): + opdoc = "\n" + for mnemonic, description in opcodes.items(): + opdoc += f" * **{mnemonic}** ({str(description[0])}) -- {description[1]} \n" + + self.intro = ModuleDoc(title="Curve25519 Engine", body=""" +The Curve25519 engine is a microcoded hardware accelerator for Curve25519 operations. +The Engine loosely resembles a Harvard architecture microcoded CPU, with a single +512-entry, 256-bit wide 2R1W windowed-register file, a handful of execution units, and a "mailbox" +unit (like a load/store, but transactional to wishbone). The Engine's microcode is +contained in a 1k-entry, 32-bit wide microcode block. Microcode procedures are written to +the block, and execution will start from the `mpstart` offset when the `go` bit is set. +Execution will stop after either one of two conditions are met: either a `FIN` instruction +is executed, or the microcode program counter (mpc) goes past the stop threshold, computed +as `mpstart` + `mplen`. + +The register file is "windowed". A single window consists of 32x256-bit wide registers, +and there are up to 16 windows. The concept behind windows is that core routines, such +as point doubling and point addition, are codable using no more than 32 intermediate +registers. The same microcode can be used, then, to serve point operations to up to +16 different clients, selectable by setting the appropriate window. Note that the register +file will stripe across four 4kiB pages, which means that memory protection can be +enforced at page-level boundaries by hardware (with the help of the OS) for up to four +separate clients, each getting four register windows. + +Every register read can be overridden from a constant ROM, by asserting `ca` or `cb` for +registers a and b respectively. When either of these bits are asserted, the respective +register address is fed into a "constants" lookup table, and the result of that table lookup is +replaced for the constant value. This means up to 32 commonly used constants may be stored +in the hardware for quick retrieval. + +.. image:: https://raw.githubusercontent.com/betrusted-io/gateware/master/gateware/curve25519/block_diagram.png + :alt: High-level block diagram of the Curev25519 engine + +Above is a high-level block diagram of the Curve25519 engine. Four clocks are present +in this microarchitecture, and they are phase-aligned thanks to the 7-Series MMCM +and low-skew global clock network. `eng_clk` is 50MHz, `mul_clk` is 100MHz, and +`rf_clk` is 200MHz. The slowest 50MHz `eng_clk` clock controls the `seq` state machine, whose +state names are listed on the left. A 50MHz base clock is chosen because this allows a +single-cycle 256-bit add/sub using hardware carry chains in the Spartan7 -1L speed grade, +greatly simplifying most of the arithmetic blocks. Faster clocks are used to pump the microcode +RAM (100MHz) and register file (200MHz), so that we are wasting less time fetching instructions +and operands. In particular, the register file uses four phases because we are emulating +a three-port register file (2R1W) using a single-port memory primitive, and the microcode RAM +runs at 100MHz (sysclk) for convenience of reading/writing instructions from the Wishbone bus. +Not shown in the diagram are the global "window" register bits, or the multiplexers that +switch off the datapaths when the system is not running allowing Wishbone full access to +the machine state. + +Execution units are subclasses of "ExecUnit", and their instantiation is controlled by +inclusion in the `exec_units` dictionary. Likewise, opcodes are defined in the `opcodes`, +dictionary, and opcodes are bound to ExecUnits by passing them as the `opcode_list` argument +to the execution units. + +Note that execution units can take an arbitrary amount of time to complete. Most will complete +in one cycle, but for example, the multiplier takes 52 cycles @ 100MHz, or 26 `eng_clk` cycles. +The current implementation does not allow pipelined operation; registered stages are provided +to break combinational paths and bring up the base clock rate, but every instruction must go through +the entire FETCH-EXEC-WAIT_DONE cycle before the next one can issue. + +The design is partially outfitted with registers to facilitate pipelining in the future, but +the current simplified implementation is expected to provide adequate speedup. It's +probably not worth the additional resources to do e.g. pipeline bypassing and hazard checking, +as the target FPGA design is nearly at capacity. + +A conservative implementation (no optimization of intermediate values, immediate reduction of +every add/sub operation) of Montgomery scalar multiplication using Engine25519 +completes one scalar multiply operation in 2.270ms, compared to 103ms in software. +This does not include the time required to do the final affine inversion (done in software, +with significant overhead -- about 100ms), or the time to load the microcode and operands (about 5us). +The affine inversion can also be microcoded, it just hasn't been done yet. + +The Engine address space is divided up as follows (expressed as offset from base):: + + 0x0_0000 - 0x0_0fff: microcode (one 4k byte page) + 0x1_0000 - 0x1_3fff: memory-mapped register file (4 x 4k pages = 16kbytes) + +Here are the currently implemented opcodes for The Engine: +{} + """.format(opdoc)) + + microcode_width = 32 + microcode_depth = 1024 + running = Signal() # asserted when microcode is running + + instruction = Record(instruction_layout) # current instruction to execute + illegal_opcode = Signal() + + ### register file + rf_depth_raw = 512 + rf_width_raw = 256 + self.submodules.rf = rf = RegisterFile(depth=rf_depth_raw, width=rf_width_raw) + self.window = CSRStorage(fields=[ + CSRField("window", size=log2_int(rf_depth_raw) - log2_int(num_registers), description="Selects the current register window to use"), + ]) + + self.mpstart = CSRStorage(fields=[ + CSRField("mpstart", size=log2_int(microcode_depth), description="Where to start execution") + ]) + self.mplen = CSRStorage(fields=[ + CSRField("mplen", size=log2_int(microcode_depth), description="Length of the current microcode program. Thus valid code must be in the range of [mpstart, mpstart + mplen]"), + ]) + self.control = CSRStorage(fields=[ + CSRField("go", size=1, pulse=True, description="Writing to this puts the engine in `run` mode, and it will execute mplen microcode instructions starting at mpstart"), + ]) + self.mpresume = CSRStatus(fields=[ + CSRField("mpresume", size=log2_int(microcode_depth), description="Where to resume execution after a pause") + ]) + + self.power = CSRStorage(fields=[ + CSRField("on", size=1, reset=0, + description="Writing `1` turns on the clocks to this block, `0` stops the clocks (for power savings). The handling of the clock gate is in a different module, this is just a flag to that block."), + CSRField("pause_req", size=1, description="Writing a `1` to this block will pause execution at the next micro-op, and allow for read-out of data from RF/microcode. Must check pause_gnt to confirm the pause has happened. Used to interrupt flow for suspend/resume."), + ]) + # bring pause into the eng_clk domain + pause_req = Signal() + self.sync.eng_clk += pause_req.eq(self.power.fields.pause_req) + # re-sync the eng_clk phase to the RF phase whenever clocks are re-applied. We don't guarantee that the clocks start exactly + # at the same time, so you can get phase shift... + power_on_delay = Signal(max=16, reset=15) + eng_powered_on = Signal() + self.sync += [ # stretch out any power on pulse so we can process a reset in the clk50 domain after its enable has been switched on + If(~self.power.fields.on, + power_on_delay.eq(15) + ).Elif(power_on_delay > 0, + power_on_delay.eq(power_on_delay - 1) + ).Else( + power_on_delay.eq(0) + ), + eng_powered_on.eq(power_on_delay == 0), # make a signal that specifies that the engine is powered on that happens 16 cycles after the clocks are turned on + # note that this signal drops only *after* the power has been toggled, because when the clock is cut, + # the downstream "eng_clk" domain signals won't capture the latest state. So, once the power comes on, + # eng_powered_on must drop for a few cycles, then come back up again, which properly triggers a synchronization of the RF. + ] + eng_on_50 = Signal() + eng_on_50_r = Signal() + self.specials += MultiReg(eng_powered_on, eng_on_50, "eng_clk") + self.sync.eng_clk += eng_on_50_r.eq(eng_on_50) + rf_reset_clear = Signal() + self.specials += MultiReg(ResetSignal("eng_clk"), rf_reset_clear, "eng_clk") # sync up the register file's fast clock to our slow clock + self.comb += rf.clear.eq(rf_reset_clear | (eng_on_50 & ~eng_on_50_r)) + + self.status = CSRStatus(fields=[ + CSRField("running", size=1, description="When set, the microcode engine is running. All wishbone access to RF and microcode memory areas will stall until this bit is clear"), + CSRField("mpc", size=log2_int(microcode_depth), description="Current location of the microcode program counter. Mostly for debug."), + CSRField("pause_gnt", size=1, description="When set, the engine execution has been paused, and the RF & microcode ROM can be read out for suspend/resume"), + CSRField("sigill", size=1, description="Illegal Instruction"), + CSRField("finished", size=1, description="Finished"), + ]) + pause_gnt = Signal() + mpc = Signal(log2_int(microcode_depth)) # the microcode program counter + running_r = Signal() + self.sync += [ + self.status.fields.running.eq(running), + self.status.fields.pause_gnt.eq(pause_gnt), + self.status.fields.mpc.eq(mpc), + self.status.fields.sigill.eq(illegal_opcode), + self.status.fields.finished.eq(((~running & running_r) | self.status.fields.finished) & (~(running & ~running_r))), + ] + + self.submodules.ev = EventManager() + self.ev.finished = EventSourcePulse(description="Microcode run finished execution") + self.ev.illegal_opcode = EventSourcePulse(description="Illegal opcode encountered") + self.ev.finalize() + ill_op_r = Signal() + self.sync += [ + running_r.eq(running), + ill_op_r.eq(illegal_opcode), + ] + self.comb += [ + self.ev.finished.trigger.eq(~running & running_r), # falling edge pulse on running + self.ev.illegal_opcode.trigger.eq(~ill_op_r & illegal_opcode), + ] + + ### microcode memory - 1rd/1wr dedicated to wishbone, 1rd for execution + microcode = Memory(microcode_width, microcode_depth) + self.specials += microcode + micro_wrport = microcode.get_port(write_capable=True, mode=READ_FIRST) # READ_FIRST allows BRAM inference + self.specials += micro_wrport + micro_rdport = microcode.get_port(mode=READ_FIRST) + self.specials += micro_rdport + micro_runport = microcode.get_port(mode=READ_FIRST) # , clock_domain="eng_clk" + self.specials += micro_runport + + self.comb += [ + micro_runport.adr.eq(mpc), + instruction.raw_bits().eq(micro_runport.dat_r), # mapping should follow the record definition *exactly* + instruction.eq(micro_runport.dat_r), + ] + instruction_fields = [] + for opcode, bits, description in instruction_layout: + instruction_fields.append(CSRField(opcode, size=bits, description=description)) + self.instruction = CSRStatus(description="Current instruction being executed by the engine. The format of this register exactly reflects the binary layout of an Engine instruction.", fields=instruction_fields) + self.comb += [ + self.instruction.status.eq(micro_runport.dat_r) + ] + + ### wishbone bus interface: decode the two address spaces and dispatch accordingly + self.bus = bus = wishbone.Interface() + wdata = Signal(32) + wadr = Signal(log2_int(rf_depth_raw) + 3) # wishbone bus is 32-bits wide, so 3 extra bits to select the sub-words out of the 256-bit registers + wmask = Signal(4) + wdata_we = Signal() + rdata_re = Signal() + rdata_ack = Signal() + rdata_req = Signal() + radr = Signal(log2_int(rf_depth_raw) + 3) + + micro_rd_waitstates = 2 + micro_rdack = Signal(max=(micro_rd_waitstates+1)) + self.sync += [ + If( ((bus.adr & ((0xFFFF_C000) >> 2)) >= ((prefix | 0x1_0000) >> 2)) & (((bus.adr & ((0xFFFF_C000) >> 2)) < ((prefix | 0x1_4000) >> 2))), + # fully decode register file address to avoid aliasing + If(bus.cyc & bus.stb & bus.we & ~bus.ack, + If(~running | pause_gnt, + wdata.eq(bus.dat_w), + wadr.eq(bus.adr[:wadr.nbits]), + wmask.eq(bus.sel), + wdata_we.eq(1), + If(rf.phase, + bus.ack.eq(1), + ).Else( + bus.ack.eq(0), + ), + ).Else( + wdata_we.eq(0), + bus.ack.eq(0), + ) + ).Elif(bus.cyc & bus.stb & ~bus.we & ~bus.ack, + If(~running | pause_gnt, + radr.eq(bus.adr[:radr.nbits]), + rdata_re.eq(1), + bus.dat_r.eq( rf.ra_dat >> ((radr & 0x7) * 32) ), + bus.ack.eq(rdata_ack), + rdata_req.eq(1), + ).Else( + rdata_re.eq(0), + bus.ack.eq(0), + rdata_req.eq(0), + ) + ).Else( + wdata_we.eq(0), + bus.ack.eq(0), + rdata_req.eq(0), + rdata_re.eq(0), + ) + ).Elif( (bus.adr & ((0xFFFF_F000) >> 2)) == ((0x0 | prefix) >> 2), + # fully decode microcode address to avoid aliasing + If(bus.cyc & bus.stb & bus.we & ~bus.ack, + micro_wrport.adr.eq(bus.adr), + micro_wrport.dat_w.eq(bus.dat_w), + micro_wrport.we.eq(1), + bus.ack.eq(1), + ).Elif(bus.cyc & bus.stb & ~bus.we & ~bus.ack, + micro_wrport.we.eq(0), + micro_rdport.adr.eq(bus.adr), + bus.dat_r.eq(micro_rdport.dat_r), + + If(micro_rdack == 0, # 1 cycle delay for read to occur + bus.ack.eq(1), + ).Else( + bus.ack.eq(0), + micro_rdack.eq(micro_rdack - 1), + ) + ).Else( + micro_wrport.we.eq(0), + micro_rdack.eq(micro_rd_waitstates), + bus.ack.eq(0), + ) + ).Else( + # handle all mis-target reads not explicitly decoded + If(bus.cyc & bus.stb & ~bus.we & ~bus.ack, + bus.dat_r.eq(0xC0DE_BADD), + bus.ack.eq(1), + ).Elif(bus.cyc & bus.stb & bus.we & ~bus.ack, + bus.ack.eq(1), # ignore writes -- but don't hang the bus + ).Else( + bus.ack.eq(0), + ) + + ) + ] + + ### execution path signals to register file + ra_dat = Signal(rf_width_raw) + ra_adr = Signal(log2_int(num_registers)) + ra_const = Signal() + rb_dat = Signal(rf_width_raw) + rb_adr = Signal(log2_int(num_registers)) + rb_const = Signal() + wd_dat = Signal(rf_width_raw) + wd_adr = Signal(log2_int(num_registers)) + rf_write = Signal() + + self.submodules.ra_const_rom = Curve25519Const(insert_docs=True) + self.submodules.rb_const_rom = Curve25519Const() + + ### merge execution path signals with host access paths + self.comb += [ + ra_const.eq(instruction.ca), + rb_const.eq(instruction.cb), + ra_adr.eq(instruction.ra), + rb_adr.eq(instruction.rb), + self.ra_const_rom.adr.eq(ra_adr), + self.rb_const_rom.adr.eq(rb_adr), + rf.window.eq(self.window.fields.window), + + If(running & ~pause_gnt, + rf.ra_adr.eq(Cat(ra_adr, self.window.fields.window)), + rf.rb_adr.eq(Cat(rb_adr, self.window.fields.window)), + rf.instruction_pipe_in.eq(instruction.raw_bits()), + rf.wd_adr.eq(Cat(wd_adr, self.window.fields.window)), + rf.wd_dat.eq(wd_dat), + rf.wd_bwe.eq(0xFFFF_FFFF), # enable all bytes + rf.we.eq(rf_write), + ).Else( + rf.ra_adr.eq(radr >> 3), + rf.wd_adr.eq(wadr >> 3), + rf.wd_dat.eq(Cat(wdata,wdata,wdata,wdata,wdata,wdata,wdata,wdata)), # replicate; use byte-enable to multiplex + rf.wd_bwe.eq(0xF << ((wadr & 0x7) * 4)), # select the byte + rf.we.eq(wdata_we), + ), + If(~ra_const, + ra_dat.eq(rf.ra_dat), + ).Else( + ra_dat.eq(self.ra_const_rom.const) + ), + If(~rb_const, + rb_dat.eq(rf.rb_dat), + ).Else( + rb_dat.eq(self.rb_const_rom.const) + ) + ] + # simple machine to wait 2 RF clock cycles for data to propagate out of the register file and back to the host + rd_wait_states=4 + bus_rd_wait = Signal(max=(rd_wait_states+1)) + self.sync.rf_clk += [ + If(rdata_req, + If(~running | pause_gnt, + If(bus_rd_wait != 0, + bus_rd_wait.eq(bus_rd_wait-1), + ).Else( + rdata_ack.eq(1), + ) + ) + ).Else( + rdata_ack.eq(0), + bus_rd_wait.eq(rd_wait_states), + ) + ] + + sext_immediate = Signal(log2_int(microcode_depth)) + self.comb += sext_immediate.eq(Cat(instruction.immediate, instruction.immediate[8])) # migen signed math failed us. so manually sign extend. this breaks the configurability of the code. + + ### Microcode sequencer. Very simple: it can only run linear sections of microcode. Feature not bug; + ### constant time operation is a defense against timing attacks. + + # pulse-stretch the go from sys->eng_clk. Don't use Migen CDC primitives, as they add latency; a BlindTransfer + # primitive on its own will take about as much time as a couple instructions on The Engine. + engine_go = Signal() + go_stretch = Signal(2) + self.sync += [ # note that we will miss this if the system throttles our clocks when this pulse arrives + If(self.control.fields.go, + go_stretch.eq(2) + ).Else( + If(go_stretch != 0, + go_stretch.eq(go_stretch - 1), + ) + ) + ] + self.comb += engine_go.eq(self.control.fields.go | (go_stretch != 0)) + + self.submodules.seq = seq = ClockDomainsRenamer("eng_clk")(FSM(reset_state="IDLE")) + mpc_stop = Signal(log2_int(microcode_depth)) + window_latch = Signal(self.window.fields.window.size) + exec = Signal() # indicates to execution units to start running + done = Signal() # indicates when the given execution units are done (as-muxed from subunits) + self.comb += rf.running.eq(~seq.ongoing("IDLE") | rdata_re), # let the RF know when we're not executing, so it can idle to save power + seq.act("IDLE", + NextValue(pause_gnt, 0), + If(engine_go, + If(pause_req, + NextValue(mpc, self.mpresume.fields.mpresume) + ).Else( + NextValue(mpc, self.mpstart.fields.mpstart) + ), + NextValue(mpc_stop, self.mpstart.fields.mpstart + self.mplen.fields.mplen - 1), + NextValue(window_latch, self.window.fields.window), + NextValue(running, 1), + NextState("FETCH"), + ).Else( + NextValue(running, 0), + ) + ) + seq.act("FETCH", + If(pause_req, + NextState("PAUSED"), + NextValue(pause_gnt, 1), + ).Else( + # one cycle latency for instruction fetch + NextState("EXEC"), + NextValue(pause_gnt, 0), + ) + ) + seq.act("EXEC", # not a great name. This is actually where the register file fetches its contents. + If(instruction.opcode == opcodes["BRZ"][0], + NextState("DO_BRZ"), + ).Elif(instruction.opcode == opcodes["FIN"][0], + NextState("IDLE"), + NextValue(running, 0), + ).Elif(instruction.opcode < opcodes["MAX"][0], # check if the opcode is legal before running it + exec.eq(1), + NextState("WAIT_DONE"), + ).Else( + NextState("ILLEGAL_OPCODE"), + ) + ) + seq.act("WAIT_DONE", # this is where the actual instruction execution happens. + If(done, # TODO: for now, we just wait for each instruction to finish; but the foundations are around for pipelining... + If(mpc < mpc_stop, + NextState("FETCH"), + NextValue(mpc, mpc + 1), + ).Else( + NextState("IDLE"), + NextValue(running, 0), + ) + ) + ) + seq.act("ILLEGAL_OPCODE", + NextState("IDLE"), + NextValue(running, 0), + illegal_opcode.eq(1), + ) + seq.act("DO_BRZ", + If(ra_dat == 0, + If( (sext_immediate + mpc + 1 < mpc_stop) & (sext_immediate + mpc + 1 >= self.mpstart.fields.mpstart), # validate new PC is in range + NextState("FETCH"), + NextValue(mpc, sext_immediate + mpc + 1), + ).Else( + NextState("IDLE"), + NextValue(running, 0), + ) + ).Else( + If(mpc < mpc_stop, + NextState("FETCH"), + NextValue(mpc, mpc + 1), + ).Else( + NextState("IDLE"), + NextValue(running, 0), + ) + ), + ) + seq.act("PAUSED", + If(~pause_req, + NextValue(pause_gnt, 0), + NextState("FETCH"), # could probably go directly to "EXEC", but, this is a minor detail recovering from pause + ) + ) + + exec_units = { + "exec_mask" : ExecMask(width=rf_width_raw), + "exec_logic" : ExecLogic(width=rf_width_raw), + "exec_addsub" : ExecAddSub(width=rf_width_raw), + "exec_testreduce": ExecTestReduce(width=rf_width_raw), + "exec_mul" : ExecMul(width=rf_width_raw, sim=sim), + } + index = 0 + for name, unit in exec_units.items(): + setattr(self.submodules, name, unit); + setattr(self, "done" + str(index), Signal(name="done"+str(index))) + setattr(self, "unit_q" + str(index), Signal(wd_dat.nbits, name="unit_q"+str(index))) + setattr(self, "unit_sel" + str(index), Signal(name="unit_sel"+str(index))) + setattr(self, "unit_wd" + str(index), Signal(log2_int(num_registers), name="unit_wd"+str(index))) + subdecode = Signal() + for op in unit.opcode_list: + self.comb += [ + If(instruction.opcode == opcodes[op][0], + subdecode.eq(1) + ) + ] + instruction_out = Record(instruction_layout) + self.comb += [ + instruction_out.raw_bits().eq(unit.instruction_out) + ] + self.comb += [ + unit.start.eq(exec & subdecode), + getattr(self, "done" + str(index)).eq(unit.q_valid), + unit.a.eq(ra_dat), + unit.b.eq(rb_dat), + unit.instruction_in.eq(instruction.raw_bits()), + getattr(self, "unit_q" + str(index)).eq(unit.q), + getattr(self, "unit_sel" + str(index)).eq(subdecode), + getattr(self, "unit_wd" + str(index)).eq(instruction_out.wd), + ] + index += 1 + + for i in range(index): + self.comb += [ + If(getattr(self, "done" + str(i)), + done.eq(1), # TODO: for proper pipelining, handle case of two units done simultaneously! + wd_dat.eq(getattr(self, "unit_q" + str(i))), + wd_adr.eq(getattr(self, "unit_wd" + str(i))), + ).Elif(seq.ongoing("IDLE"), + done.eq(0), + ) + ] + + self.comb += [ + rf_write.eq(done), + ] + + ##### TIMING CONSTRAINTS -- you want these. Trust me. + + clk50 = "clk50" + clk100 = "clk100" + clk200 = "clk200" + # registered exec units need this set of rules + ### clk200->clk50 multi-cycle paths: + # we architecturally guarantee extra setup time from the register file to the point of consumption: + # read data is stable by the 3rd phase of the RF fetch cycle, and so it is in fact ready even before + # the other signals that trigger the execute mode, hence 4+1 cycles total setup time + platform.add_platform_command("set_multicycle_path 5 -setup -start -from [get_clocks " + clk200 + "] -to [get_clocks " + clk50 + "] -through [get_cells *rf_r*_dat_reg*]") + platform.add_platform_command("set_multicycle_path 4 -hold -end -from [get_clocks " + clk200 + "] -to [get_clocks " + clk50 + "] -through [get_cells *rf_r*_dat_reg*]") + ### clk200->clk100 multi-cycle paths: + # same as above, but for the multiplier path. + platform.add_platform_command("set_multicycle_path 3 -setup -start -from [get_clocks " + clk200 + "] -to [get_clocks " + clk100 + "] -through [get_cells *rf_r*_dat_reg*]") + platform.add_platform_command("set_multicycle_path 2 -hold -end -from [get_clocks " + clk200 + "] -to [get_clocks " + clk100 + "] -through [get_cells *rf_r*_dat_reg*]") + + # unregistered exec units need this set of rules + ### clk200->clk200 multi-cycle paths: + # this is for the case when we don't register the data, and just go straight from RF out put RF input. In the worst case + # we have three (? maybe five?) clk200 cycles to compute as we phase through the reads and writes + platform.add_platform_command("set_multicycle_path 3 -setup -from [get_clocks " + clk200 + "] -to [get_clocks " + clk200 + "] -through [get_cells *rf_r*_dat_reg*]") + platform.add_platform_command("set_multicycle_path 2 -hold -end -from [get_clocks " + clk200 + "] -to [get_clocks " + clk200 + "] -through [get_cells *rf_r*_dat_reg*]") + + # other paths + ### sys->clk200 multi-cycle paths: + # microcode fetch is stable 10ns before use by the register file, by design + platform.add_platform_command("set_multicycle_path 2 -setup -from [get_clocks " + clk100 + "] -to [get_clocks " + clk100 + "] -through [get_nets {net}*]", net=ra_const) + platform.add_platform_command("set_multicycle_path 1 -hold -end -from [get_clocks " + clk100 + "] -to [get_clocks " + clk100 + "] -through [get_nets {net}*]", net=ra_const) + platform.add_platform_command("set_multicycle_path 2 -setup -from [get_clocks " + clk100 + "] -to [get_clocks " + clk100 + "] -through [get_nets {net}*]", net=rb_const) + platform.add_platform_command("set_multicycle_path 1 -hold -end -from [get_clocks " + clk100 + "] -to [get_clocks " + clk100 + "] -through [get_nets {net}*]", net=rb_const) + platform.add_platform_command("set_multicycle_path 2 -setup -from [get_clocks " + clk100 + "] -to [get_clocks " + clk100 + "] -through [get_nets {net}*]", net=self.ra_const_rom.adr) + platform.add_platform_command("set_multicycle_path 1 -hold -end -from [get_clocks " + clk100 + "] -to [get_clocks " + clk100 + "] -through [get_nets {net}*]", net=self.ra_const_rom.adr) + platform.add_platform_command("set_multicycle_path 2 -setup -from [get_clocks " + clk100 + "] -to [get_clocks " + clk100 + "] -through [get_nets {net}*]", net=self.rb_const_rom.adr) + platform.add_platform_command("set_multicycle_path 1 -hold -end -from [get_clocks " + clk100 + "] -to [get_clocks " + clk100 + "] -through [get_nets {net}*]", net=self.rb_const_rom.adr) + # ignore the clk200 reset path for timing purposes -- there is >1 cycle guaranteed after reset for everything to settle before anything moves on these paths + platform.add_platform_command("set_false_path -through [get_nets " + clk200 + "_rst]") + # ignore the clk50 reset path for timing purposes -- there is > 1 cycle guaranteed after reset for everything to settle before anything moves on these paths (applies for other crypto engines, (SHA/AES) as well) + platform.add_platform_command("set_false_path -through [get_nets " + clk50 + "_rst]") + ### sys->clk50 multi-cycle paths: + # microcode fetch is guaranteed not to transition in the middle of an exec computation + platform.add_platform_command("set_multicycle_path 2 -setup -start -from [get_clocks " + clk100 + "] -to [get_clocks " + clk50 + "] -through [get_cells microcode_reg*]") + platform.add_platform_command("set_multicycle_path 1 -hold -end -from [get_clocks " + clk100 + "] -to [get_clocks " + clk50 + "] -through [get_cells microcode_reg*]") + ### clk50->clk200 multi-cycle paths: + # engine running will set up a full eng_clk cycle before any RF accesses need to be valid + platform.add_platform_command("set_multicycle_path 4 -setup -from [get_clocks " + clk50 + "] -to [get_clocks " + clk200 + "] -through [get_nets {{ {net1} {net2} {net3} }}]", net1=running, net2=running_r, net3=rf.running) + platform.add_platform_command("set_multicycle_path 3 -hold -end -from [get_clocks " + clk50 + "] -to [get_clocks " + clk200 + "] -through [get_nets {{ {net1} {net2} {net3} }}]", net1=running, net2=running_r, net3=rf.running) + # this signal is a combo from clk50+sys + platform.add_platform_command("set_multicycle_path 4 -setup -from [get_clocks " + clk50 + "] -to [get_clocks " + clk200 + "] -through [get_pins *rf_wren_pipe_reg/D]") + platform.add_platform_command("set_multicycle_path 3 -hold -end -from [get_clocks " + clk50 + "] -to [get_clocks " + clk200 + "] -through [get_pins *rf_wren_pipe_reg/D]") + # data writeback happens on phase==2, and thus is stable for at least two clk200 clocks extra + platform.add_platform_command("set_multicycle_path 2 -setup -from [get_clocks " + clk50 + "] -to [get_clocks " + clk200 + "] -through [get_pins RF_RAMB*/*/DI*DI*]") + platform.add_platform_command("set_multicycle_path 1 -hold -end -from [get_clocks " + clk50 + "] -to [get_clocks " + clk200 + "] -through [get_pins RF_RAMB*/*/DI*DI*]") + platform.add_platform_command("set_multicycle_path 2 -setup -from [get_clocks " + clk50 + "] -to [get_clocks " + clk200 + "] -through [get_pins RF_RAMB*/*/ADDR*ADDR*]") + platform.add_platform_command("set_multicycle_path 1 -hold -end -from [get_clocks " + clk50 + "] -to [get_clocks " + clk200 + "] -through [get_pins RF_RAMB*/*/ADDR*ADDR*]") + ### sys->clk200 multi-cycle paths: + # data writeback happens on phase==2, and thus is stable for at least two clk200 clocks extra + one full eng_clk (total 25ns) + platform.add_platform_command("set_multicycle_path 4 -setup -from [get_clocks " + clk100 + "] -to [get_clocks " + clk200 + "] -through [get_pins RF_RAMB*/*/DI*DI*]") + platform.add_platform_command("set_multicycle_path 3 -hold -end -from [get_clocks " + clk100 + "] -to [get_clocks " + clk200 + "] -through [get_pins RF_RAMB*/*/DI*DI*]") + platform.add_platform_command("set_multicycle_path 4 -setup -from [get_clocks " + clk100 + "] -to [get_clocks " + clk200 + "] -through [get_pins RF_RAMB*/*/ADDR*ADDR*]") + platform.add_platform_command("set_multicycle_path 3 -hold -end -from [get_clocks " + clk100 + "] -to [get_clocks " + clk200 + "] -through [get_pins RF_RAMB*/*/ADDR*ADDR*]") + # this signal is a combo from clk50+sys + platform.add_platform_command("set_multicycle_path 4 -setup -from [get_clocks " + clk100 + "] -to [get_clocks " + clk200 + "] -through [get_pins *rf_wren_pipe_reg/D]") + platform.add_platform_command("set_multicycle_path 3 -hold -end -from [get_clocks " + clk100 + "] -to [get_clocks " + clk200 + "] -through [get_pins *rf_wren_pipe_reg/D]") diff --git a/sbus-to-ztex-gateware-migen/engine_code/Cargo.toml b/sbus-to-ztex-gateware-migen/engine_code/Cargo.toml new file mode 100644 index 0000000..38dd892 --- /dev/null +++ b/sbus-to-ztex-gateware-migen/engine_code/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "engine_code" +version = "0.1.0" +authors = ["Romain Dolbeau "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] + +[dependencies.engine25519-as] +git="https://github.com/betrusted-io/engine25519-as.git" +rev="6681e73c1fdc4a460b5ef9f9c7c91aef546d00f3" + +[dev-dependencies.engine25519-as] +git="https://github.com/betrusted-io/engine25519-as.git" +rev="6681e73c1fdc4a460b5ef9f9c7c91aef546d00f3" + +[[bin]] +name = "engine_code" +path = "engine_code.rs" diff --git a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs new file mode 100644 index 0000000..d906a40 --- /dev/null +++ b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs @@ -0,0 +1,296 @@ +#![recursion_limit="512"] + +extern crate engine25519_as; +use engine25519_as::*; + +fn main() -> std::io::Result<()> { + let mcode = assemble_engine25519!( + start: + // P.U in %20 + // P.W in %21 + // Q.U in %22 + // Q.W in %23 + // affine_PmQ in %24 // I + // %30 is the TRD scratch register and cswap dummy + // %29 is the subtraction temporary value register and k_t + // x0.U in %25 // I + // x0.W in %26 // I + // x1.U in %27 // I + // x1.W in %28 /// I + // %19 is the loop counter, starts with 254 (if 0, loop runs exactly once) // I + // %31 is the scalar // I + // %18 is the swap variable + psa %18, #0 + + // for i in (0..255).rev() + mainloop: + // let choice: u8 = (bits[i + 1] ^ bits[i]) as u8; + // ProjectivePoint::conditional_swap(&mut x0, &mut x1, choice.into()); + xbt %29, %31 // orignally[k_t = (k>>t) & 1] now[k_t = k[254]] + shl %31, %31 // k = k<<1 + xor %18, %18, %29 // swap ^= k_t + + // cswap x0.U (%25), x1.U (%27) + xor %30, %25, %27 + msk %30, %18, %30 + xor %25, %30, %25 + xor %27, %30, %27 + // cswap x0.W (%26), x1.W (%28) + xor %30, %26, %28 + msk %30, %18, %30 + xor %26, %30, %26 + xor %28, %30, %28 + + psa %18, %29 // swap = k_t + + // differential_add_and_double(&mut x0, &mut x1, &affine_u); + psa %20, %25 + psa %21, %26 + psa %22, %27 + psa %23, %28 + // affine_u is already in %24 + + // let t0 = &P.U + &P.W; + add %0, %20, %21 + trd %30, %0 + sub %0, %0, %30 + // let t1 = &P.U - &P.W; + sub %21, #3, %21 // negate &P.W using #FIELDPRIME (#3) + add %1, %20, %21 + trd %30, %1 + sub %1, %1, %30 + // let t2 = &Q.U + &Q.W; + add %2, %22, %23 + trd %30, %2 + sub %2, %2, %30 + // let t3 = &Q.U - &Q.W; + sub %23, #3, %23 + add %3, %22, %23 + trd %30, %3 + sub %3, %3, %30 + // let t4 = t0.square(); // (U_P + W_P)^2 = U_P^2 + 2 U_P W_P + W_P^2 + mul %4, %0, %0 + // let t5 = t1.square(); // (U_P - W_P)^2 = U_P^2 - 2 U_P W_P + W_P^2 + mul %5, %1, %1 + // let t6 = &t4 - &t5; // 4 U_P W_P + sub %29, #3, %5 + add %6, %4, %29 + trd %30, %6 + sub %6, %6, %30 + // let t7 = &t0 * &t3; // (U_P + W_P) (U_Q - W_Q) = U_P U_Q + W_P U_Q - U_P W_Q - W_P W_Q + mul %7, %0, %3 + // let t8 = &t1 * &t2; // (U_P - W_P) (U_Q + W_Q) = U_P U_Q - W_P U_Q + U_P W_Q - W_P W_Q + mul %8, %1, %2 + // let t9 = &t7 + &t8; // 2 (U_P U_Q - W_P W_Q) + add %9, %7, %8 + trd %30, %9 + sub %9, %9, %30 + // let t10 = &t7 - &t8; // 2 (W_P U_Q - U_P W_Q) + sub %29, #3, %8 + add %10, %7, %29 + trd %30, %10 + sub %10, %10, %30 + // let t11 = t9.square(); // 4 (U_P U_Q - W_P W_Q)^2 + mul %11, %9, %9 + // let t12 = t10.square(); // 4 (W_P U_Q - U_P W_Q)^2 + mul %12, %10, %10 + // let t13 = &APLUS2_OVER_FOUR * &t6; // (A + 2) U_P U_Q + mul %13, #4, %6 // #4 is A+2/4 + // let t14 = &t4 * &t5; // ((U_P + W_P)(U_P - W_P))^2 = (U_P^2 - W_P^2)^2 + mul %14, %4, %5 + // let t15 = &t13 + &t5; // (U_P - W_P)^2 + (A + 2) U_P W_P + add %15, %13, %5 + trd %30, %15 + sub %15, %15, %30 + // let t16 = &t6 * &t15; // 4 (U_P W_P) ((U_P - W_P)^2 + (A + 2) U_P W_P) + mul %16, %6, %15 + // let t17 = affine_PmQ * &t12; // U_D * 4 (W_P U_Q - U_P W_Q)^2 + mul %17, %24, %12 // affine_PmQ loaded into %24 + + ///// these can be eliminated down the road, but included for 1:1 algorithm correspodence to reference in early testing + // P.U = t14; // U_{P'} = (U_P + W_P)^2 (U_P - W_P)^2 + psa %20, %14 + // P.W = t16; // W_{P'} = (4 U_P W_P) ((U_P - W_P)^2 + ((A + 2)/4) 4 U_P W_P) + psa %21, %16 + // let t18 = t11; // W_D * 4 (U_P U_Q - W_P W_Q)^2 + // Q.U = t18; // U_{Q'} = W_D * 4 (U_P U_Q - W_P W_Q)^2 + psa %22, %11 // collapsed two to save a register + // Q.W = t17; // W_{Q'} = U_D * 4 (W_P U_Q - U_P W_Q)^2 + psa %23, %17 + + ///// 'return' arguments for next iteration, can be optimized out later + psa %25, %20 + psa %26, %21 + psa %27, %22 + psa %28, %23 + + brz end, %19 // if loop counter is 0, quit + sub %19, %19, #1 // subtract one from the loop counter and run again + brz mainloop, #0 // go back to the top + end: + // ProjectivePoint::conditional_swap(&mut x0, &mut x1, Choice::from(bits[0] as u8)); + // cswap x0.U (%25), x1.U (%27) + xor %30, %25, %27 + msk %30, %18, %30 + xor %25, %30, %25 + xor %27, %30, %27 + // cswap x0.W (%26), x1.W (%28) + xor %30, %26, %28 + msk %30, %18, %30 + xor %26, %30, %26 + xor %28, %30, %28 + + // AFFINE SPLICE -- pass arguments to the affine block + psa %29, %25 + psa %30, %26 + // W.invert() in %21 + // U in %29 + // W in %30 + // result in %31 + // loop counter in %28 + + // from FieldElement.invert() + // let (t19, t3) = self.pow22501(); // t19: 249..0 ; t3: 3,1,0 + // let t0 = self.square(); // 1 e_0 = 2^1 + mul %0, %30, %30 // self is W, e.g. %30 + // let t1 = t0.square().square(); // 3 e_1 = 2^3 + mul %1, %0, %0 + mul %1, %1, %1 + // let t2 = self * &t1; // 3,0 e_2 = 2^3 + 2^0 + mul %2, %30, %1 + // let t3 = &t0 * &t2; // 3,1,0 + mul %3, %0, %2 + // let t4 = t3.square(); // 4,2,1 + mul %4, %3, %3 + // let t5 = &t2 * &t4; // 4,3,2,1,0 + mul %5, %2, %4 + + // let t6 = t5.pow2k(5); // 9,8,7,6,5 + psa %28, #5 // coincidentally, constant #5 is the number 5 + mul %6, %5, %5 + pow2k_5: + sub %28, %28, #1 // %28 = %28 - 1 + brz pow2k_5_exit, %28 + mul %6, %6, %6 + brz pow2k_5, #0 + pow2k_5_exit: + // let t7 = &t6 * &t5; // 9,8,7,6,5,4,3,2,1,0 + mul %7, %6, %5 + + // let t8 = t7.pow2k(10); // 19..10 + psa %28, #6 // constant #6 is the number 10 + mul %8, %7, %7 + pow2k_10: + sub %28, %28, #1 + brz pow2k_10_exit, %28 + mul %8, %8, %8 + brz pow2k_10, #0 + pow2k_10_exit: + // let t9 = &t8 * &t7; // 19..0 + mul %9, %8, %7 + + // let t10 = t9.pow2k(20); // 39..20 + psa %28, #7 // constant #7 is the number 20 + mul %10, %9, %9 + pow2k_20: + sub %28, %28, #1 + brz pow2k_20_exit, %28 + mul %10, %10, %10 + brz pow2k_20, #0 + pow2k_20_exit: + // let t11 = &t10 * &t9; // 39..0 + mul %11, %10, %9 + + // let t12 = t11.pow2k(10); // 49..10 + psa %28, #6 // constant #6 is the number 10 + mul %12, %11, %11 + pow2k_10b: + sub %28, %28, #1 + brz pow2k_10b_exit, %28 + mul %12, %12, %12 + brz pow2k_10b, #0 + pow2k_10b_exit: + // let t13 = &t12 * &t7; // 49..0 + mul %13, %12, %7 + + // let t14 = t13.pow2k(50); // 99..50 + psa %28, #8 // constant #8 is the number 50 + mul %14, %13, %13 + pow2k_50a: + sub %28, %28, #1 + brz pow2k_50a_exit, %28 + mul %14, %14, %14 + brz pow2k_50a, #0 + pow2k_50a_exit: + // let t15 = &t14 * &t13; // 99..0 + mul %15, %14, %13 + + // let t16 = t15.pow2k(100); // 199..100 + psa %28, #9 // constant #9 is the number 100 + mul %16, %15, %15 + pow2k_100: + sub %28, %28, #1 + brz pow2k_100_exit, %28 + mul %16, %16, %16 + brz pow2k_100, #0 + pow2k_100_exit: + // let t17 = &t16 * &t15; // 199..0 + mul %17, %16, %15 + + // let t18 = t17.pow2k(50); // 249..50 + psa %28, #8 // constant #8 is the number 50 + mul %18, %17, %17 + pow2k_50b: + sub %28, %28, #1 + brz pow2k_50b_exit, %28 + mul %18, %18, %18 + brz pow2k_50b, #0 + pow2k_50b_exit: + // let t19 = &t18 * &t13; // 249..0 + mul %19, %18, %13 + //(t19, t3) // just a return value, values are already there, do nothing + + //let t20 = t19.pow2k(5); // 254..5 + psa %28, #5 + mul %20, %19, %19 + pow2k_5_last: + sub %28, %28, #1 + brz pow2k_5_last_exit, %28 + mul %20, %20, %20 + brz pow2k_5_last, #0 + pow2k_5_last_exit: + + //let t21 = &t20 * &t3; // 254..5,3,1,0 + mul %21, %20, %3 + + // u = &self.U * &self.W.invert() + mul %31, %29, %21 + fin // finish execution + ); + let mcode2 = assemble_engine25519!( + start: + // P.U in %20 + // P.W in %21 + // Q.U in %22 + // Q.W in %23 + // affine_PmQ in %24 // I + // %30 is the TRD scratch register and cswap dummy + // %29 is the subtraction temporary value register and k_t + // x0.U in %25 // I + // x0.W in %26 // I + // x1.U in %27 // I + // x1.W in %28 /// I + // %19 is the loop counter, starts with 254 (if 0, loop runs exactly once) // I + // %31 is the scalar // I + // %18 is the swap variable + psa %25, #9 + psa %26, #1 + fin + ); + let mut pos = 0; + while pos < mcode2.len() { + println!("0x{:08x},", mcode2[pos]); + pos = pos + 1; + } + Ok(()) +} diff --git a/sbus-to-ztex-gateware-migen/netbsd_csr.h b/sbus-to-ztex-gateware-migen/netbsd_csr.h index cf7ec66..87637be 100644 --- a/sbus-to-ztex-gateware-migen/netbsd_csr.h +++ b/sbus-to-ztex-gateware-migen/netbsd_csr.h @@ -1,5 +1,5 @@ //-------------------------------------------------------------------------------- -// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-07-20 07:32:43 +// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-07-25 05:25:02 //-------------------------------------------------------------------------------- #ifndef __GENERATED_CSR_H #define __GENERATED_CSR_H @@ -20,9 +20,430 @@ static inline void leds_out_write(struct sbusfpga_leds_softc *sc, uint32_t v) { } #endif // CSR_LEDS_BASE +/* curve25519engine */ +#ifndef CSR_CURVE25519ENGINE_BASE +#define CSR_CURVE25519ENGINE_BASE (CSR_BASE + 0x1000L) +#define CSR_CURVE25519ENGINE_WINDOW_ADDR (CSR_CURVE25519ENGINE_BASE + 0x0L) +#define CSR_CURVE25519ENGINE_WINDOW_SIZE 1 +static inline uint32_t curve25519engine_window_read(struct sbusfpga_curve25519engine_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x0L); +} +static inline void curve25519engine_window_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x0L, v); +} +#define CSR_CURVE25519ENGINE_WINDOW_WINDOW_OFFSET 0 +#define CSR_CURVE25519ENGINE_WINDOW_WINDOW_SIZE 4 +static inline uint32_t curve25519engine_window_window_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 4)-1); + return ( (oldword >> 0) & mask ); +} +static inline uint32_t curve25519engine_window_window_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_window_read(sc); + return curve25519engine_window_window_extract(sc, word); +} +static inline uint32_t curve25519engine_window_window_replace(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 4)-1); + return (oldword & (~(mask << 0))) | (mask & plain_value)<< 0 ; +} +static inline void curve25519engine_window_window_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t plain_value) { + uint32_t oldword = curve25519engine_window_read(sc); + uint32_t newword = curve25519engine_window_window_replace(sc, oldword, plain_value); + curve25519engine_window_write(sc, newword); +} +#define CSR_CURVE25519ENGINE_MPSTART_ADDR (CSR_CURVE25519ENGINE_BASE + 0x4L) +#define CSR_CURVE25519ENGINE_MPSTART_SIZE 1 +static inline uint32_t curve25519engine_mpstart_read(struct sbusfpga_curve25519engine_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x4L); +} +static inline void curve25519engine_mpstart_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x4L, v); +} +#define CSR_CURVE25519ENGINE_MPSTART_MPSTART_OFFSET 0 +#define CSR_CURVE25519ENGINE_MPSTART_MPSTART_SIZE 10 +static inline uint32_t curve25519engine_mpstart_mpstart_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 10)-1); + return ( (oldword >> 0) & mask ); +} +static inline uint32_t curve25519engine_mpstart_mpstart_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_mpstart_read(sc); + return curve25519engine_mpstart_mpstart_extract(sc, word); +} +static inline uint32_t curve25519engine_mpstart_mpstart_replace(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 10)-1); + return (oldword & (~(mask << 0))) | (mask & plain_value)<< 0 ; +} +static inline void curve25519engine_mpstart_mpstart_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t plain_value) { + uint32_t oldword = curve25519engine_mpstart_read(sc); + uint32_t newword = curve25519engine_mpstart_mpstart_replace(sc, oldword, plain_value); + curve25519engine_mpstart_write(sc, newword); +} +#define CSR_CURVE25519ENGINE_MPLEN_ADDR (CSR_CURVE25519ENGINE_BASE + 0x8L) +#define CSR_CURVE25519ENGINE_MPLEN_SIZE 1 +static inline uint32_t curve25519engine_mplen_read(struct sbusfpga_curve25519engine_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x8L); +} +static inline void curve25519engine_mplen_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x8L, v); +} +#define CSR_CURVE25519ENGINE_MPLEN_MPLEN_OFFSET 0 +#define CSR_CURVE25519ENGINE_MPLEN_MPLEN_SIZE 10 +static inline uint32_t curve25519engine_mplen_mplen_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 10)-1); + return ( (oldword >> 0) & mask ); +} +static inline uint32_t curve25519engine_mplen_mplen_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_mplen_read(sc); + return curve25519engine_mplen_mplen_extract(sc, word); +} +static inline uint32_t curve25519engine_mplen_mplen_replace(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 10)-1); + return (oldword & (~(mask << 0))) | (mask & plain_value)<< 0 ; +} +static inline void curve25519engine_mplen_mplen_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t plain_value) { + uint32_t oldword = curve25519engine_mplen_read(sc); + uint32_t newword = curve25519engine_mplen_mplen_replace(sc, oldword, plain_value); + curve25519engine_mplen_write(sc, newword); +} +#define CSR_CURVE25519ENGINE_CONTROL_ADDR (CSR_CURVE25519ENGINE_BASE + 0xcL) +#define CSR_CURVE25519ENGINE_CONTROL_SIZE 1 +static inline uint32_t curve25519engine_control_read(struct sbusfpga_curve25519engine_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0xcL); +} +static inline void curve25519engine_control_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0xcL, v); +} +#define CSR_CURVE25519ENGINE_CONTROL_GO_OFFSET 0 +#define CSR_CURVE25519ENGINE_CONTROL_GO_SIZE 1 +static inline uint32_t curve25519engine_control_go_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 0) & mask ); +} +static inline uint32_t curve25519engine_control_go_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_control_read(sc); + return curve25519engine_control_go_extract(sc, word); +} +static inline uint32_t curve25519engine_control_go_replace(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 1)-1); + return (oldword & (~(mask << 0))) | (mask & plain_value)<< 0 ; +} +static inline void curve25519engine_control_go_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t plain_value) { + uint32_t oldword = curve25519engine_control_read(sc); + uint32_t newword = curve25519engine_control_go_replace(sc, oldword, plain_value); + curve25519engine_control_write(sc, newword); +} +#define CSR_CURVE25519ENGINE_MPRESUME_ADDR (CSR_CURVE25519ENGINE_BASE + 0x10L) +#define CSR_CURVE25519ENGINE_MPRESUME_SIZE 1 +static inline uint32_t curve25519engine_mpresume_read(struct sbusfpga_curve25519engine_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x10L); +} +#define CSR_CURVE25519ENGINE_MPRESUME_MPRESUME_OFFSET 0 +#define CSR_CURVE25519ENGINE_MPRESUME_MPRESUME_SIZE 10 +static inline uint32_t curve25519engine_mpresume_mpresume_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 10)-1); + return ( (oldword >> 0) & mask ); +} +static inline uint32_t curve25519engine_mpresume_mpresume_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_mpresume_read(sc); + return curve25519engine_mpresume_mpresume_extract(sc, word); +} +#define CSR_CURVE25519ENGINE_POWER_ADDR (CSR_CURVE25519ENGINE_BASE + 0x14L) +#define CSR_CURVE25519ENGINE_POWER_SIZE 1 +static inline uint32_t curve25519engine_power_read(struct sbusfpga_curve25519engine_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x14L); +} +static inline void curve25519engine_power_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x14L, v); +} +#define CSR_CURVE25519ENGINE_POWER_ON_OFFSET 0 +#define CSR_CURVE25519ENGINE_POWER_ON_SIZE 1 +static inline uint32_t curve25519engine_power_on_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 0) & mask ); +} +static inline uint32_t curve25519engine_power_on_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_power_read(sc); + return curve25519engine_power_on_extract(sc, word); +} +static inline uint32_t curve25519engine_power_on_replace(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 1)-1); + return (oldword & (~(mask << 0))) | (mask & plain_value)<< 0 ; +} +static inline void curve25519engine_power_on_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t plain_value) { + uint32_t oldword = curve25519engine_power_read(sc); + uint32_t newword = curve25519engine_power_on_replace(sc, oldword, plain_value); + curve25519engine_power_write(sc, newword); +} +#define CSR_CURVE25519ENGINE_POWER_PAUSE_REQ_OFFSET 1 +#define CSR_CURVE25519ENGINE_POWER_PAUSE_REQ_SIZE 1 +static inline uint32_t curve25519engine_power_pause_req_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 1) & mask ); +} +static inline uint32_t curve25519engine_power_pause_req_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_power_read(sc); + return curve25519engine_power_pause_req_extract(sc, word); +} +static inline uint32_t curve25519engine_power_pause_req_replace(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 1)-1); + return (oldword & (~(mask << 1))) | (mask & plain_value)<< 1 ; +} +static inline void curve25519engine_power_pause_req_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t plain_value) { + uint32_t oldword = curve25519engine_power_read(sc); + uint32_t newword = curve25519engine_power_pause_req_replace(sc, oldword, plain_value); + curve25519engine_power_write(sc, newword); +} +#define CSR_CURVE25519ENGINE_STATUS_ADDR (CSR_CURVE25519ENGINE_BASE + 0x18L) +#define CSR_CURVE25519ENGINE_STATUS_SIZE 1 +static inline uint32_t curve25519engine_status_read(struct sbusfpga_curve25519engine_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x18L); +} +#define CSR_CURVE25519ENGINE_STATUS_RUNNING_OFFSET 0 +#define CSR_CURVE25519ENGINE_STATUS_RUNNING_SIZE 1 +static inline uint32_t curve25519engine_status_running_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 0) & mask ); +} +static inline uint32_t curve25519engine_status_running_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_status_read(sc); + return curve25519engine_status_running_extract(sc, word); +} +#define CSR_CURVE25519ENGINE_STATUS_MPC_OFFSET 1 +#define CSR_CURVE25519ENGINE_STATUS_MPC_SIZE 10 +static inline uint32_t curve25519engine_status_mpc_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 10)-1); + return ( (oldword >> 1) & mask ); +} +static inline uint32_t curve25519engine_status_mpc_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_status_read(sc); + return curve25519engine_status_mpc_extract(sc, word); +} +#define CSR_CURVE25519ENGINE_STATUS_PAUSE_GNT_OFFSET 11 +#define CSR_CURVE25519ENGINE_STATUS_PAUSE_GNT_SIZE 1 +static inline uint32_t curve25519engine_status_pause_gnt_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 11) & mask ); +} +static inline uint32_t curve25519engine_status_pause_gnt_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_status_read(sc); + return curve25519engine_status_pause_gnt_extract(sc, word); +} +#define CSR_CURVE25519ENGINE_STATUS_SIGILL_OFFSET 12 +#define CSR_CURVE25519ENGINE_STATUS_SIGILL_SIZE 1 +static inline uint32_t curve25519engine_status_sigill_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 12) & mask ); +} +static inline uint32_t curve25519engine_status_sigill_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_status_read(sc); + return curve25519engine_status_sigill_extract(sc, word); +} +#define CSR_CURVE25519ENGINE_STATUS_FINISHED_OFFSET 13 +#define CSR_CURVE25519ENGINE_STATUS_FINISHED_SIZE 1 +static inline uint32_t curve25519engine_status_finished_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 13) & mask ); +} +static inline uint32_t curve25519engine_status_finished_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_status_read(sc); + return curve25519engine_status_finished_extract(sc, word); +} +#define CSR_CURVE25519ENGINE_EV_STATUS_ADDR (CSR_CURVE25519ENGINE_BASE + 0x1cL) +#define CSR_CURVE25519ENGINE_EV_STATUS_SIZE 1 +static inline uint32_t curve25519engine_ev_status_read(struct sbusfpga_curve25519engine_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x1cL); +} +#define CSR_CURVE25519ENGINE_EV_STATUS_FINISHED_OFFSET 0 +#define CSR_CURVE25519ENGINE_EV_STATUS_FINISHED_SIZE 1 +static inline uint32_t curve25519engine_ev_status_finished_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 0) & mask ); +} +static inline uint32_t curve25519engine_ev_status_finished_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_ev_status_read(sc); + return curve25519engine_ev_status_finished_extract(sc, word); +} +#define CSR_CURVE25519ENGINE_EV_STATUS_ILLEGAL_OPCODE_OFFSET 1 +#define CSR_CURVE25519ENGINE_EV_STATUS_ILLEGAL_OPCODE_SIZE 1 +static inline uint32_t curve25519engine_ev_status_illegal_opcode_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 1) & mask ); +} +static inline uint32_t curve25519engine_ev_status_illegal_opcode_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_ev_status_read(sc); + return curve25519engine_ev_status_illegal_opcode_extract(sc, word); +} +#define CSR_CURVE25519ENGINE_EV_PENDING_ADDR (CSR_CURVE25519ENGINE_BASE + 0x20L) +#define CSR_CURVE25519ENGINE_EV_PENDING_SIZE 1 +static inline uint32_t curve25519engine_ev_pending_read(struct sbusfpga_curve25519engine_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x20L); +} +static inline void curve25519engine_ev_pending_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x20L, v); +} +#define CSR_CURVE25519ENGINE_EV_PENDING_FINISHED_OFFSET 0 +#define CSR_CURVE25519ENGINE_EV_PENDING_FINISHED_SIZE 1 +static inline uint32_t curve25519engine_ev_pending_finished_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 0) & mask ); +} +static inline uint32_t curve25519engine_ev_pending_finished_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_ev_pending_read(sc); + return curve25519engine_ev_pending_finished_extract(sc, word); +} +static inline uint32_t curve25519engine_ev_pending_finished_replace(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 1)-1); + return (oldword & (~(mask << 0))) | (mask & plain_value)<< 0 ; +} +static inline void curve25519engine_ev_pending_finished_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t plain_value) { + uint32_t oldword = curve25519engine_ev_pending_read(sc); + uint32_t newword = curve25519engine_ev_pending_finished_replace(sc, oldword, plain_value); + curve25519engine_ev_pending_write(sc, newword); +} +#define CSR_CURVE25519ENGINE_EV_PENDING_ILLEGAL_OPCODE_OFFSET 1 +#define CSR_CURVE25519ENGINE_EV_PENDING_ILLEGAL_OPCODE_SIZE 1 +static inline uint32_t curve25519engine_ev_pending_illegal_opcode_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 1) & mask ); +} +static inline uint32_t curve25519engine_ev_pending_illegal_opcode_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_ev_pending_read(sc); + return curve25519engine_ev_pending_illegal_opcode_extract(sc, word); +} +static inline uint32_t curve25519engine_ev_pending_illegal_opcode_replace(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 1)-1); + return (oldword & (~(mask << 1))) | (mask & plain_value)<< 1 ; +} +static inline void curve25519engine_ev_pending_illegal_opcode_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t plain_value) { + uint32_t oldword = curve25519engine_ev_pending_read(sc); + uint32_t newword = curve25519engine_ev_pending_illegal_opcode_replace(sc, oldword, plain_value); + curve25519engine_ev_pending_write(sc, newword); +} +#define CSR_CURVE25519ENGINE_EV_ENABLE_ADDR (CSR_CURVE25519ENGINE_BASE + 0x24L) +#define CSR_CURVE25519ENGINE_EV_ENABLE_SIZE 1 +static inline uint32_t curve25519engine_ev_enable_read(struct sbusfpga_curve25519engine_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x24L); +} +static inline void curve25519engine_ev_enable_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x24L, v); +} +#define CSR_CURVE25519ENGINE_EV_ENABLE_FINISHED_OFFSET 0 +#define CSR_CURVE25519ENGINE_EV_ENABLE_FINISHED_SIZE 1 +static inline uint32_t curve25519engine_ev_enable_finished_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 0) & mask ); +} +static inline uint32_t curve25519engine_ev_enable_finished_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_ev_enable_read(sc); + return curve25519engine_ev_enable_finished_extract(sc, word); +} +static inline uint32_t curve25519engine_ev_enable_finished_replace(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 1)-1); + return (oldword & (~(mask << 0))) | (mask & plain_value)<< 0 ; +} +static inline void curve25519engine_ev_enable_finished_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t plain_value) { + uint32_t oldword = curve25519engine_ev_enable_read(sc); + uint32_t newword = curve25519engine_ev_enable_finished_replace(sc, oldword, plain_value); + curve25519engine_ev_enable_write(sc, newword); +} +#define CSR_CURVE25519ENGINE_EV_ENABLE_ILLEGAL_OPCODE_OFFSET 1 +#define CSR_CURVE25519ENGINE_EV_ENABLE_ILLEGAL_OPCODE_SIZE 1 +static inline uint32_t curve25519engine_ev_enable_illegal_opcode_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 1) & mask ); +} +static inline uint32_t curve25519engine_ev_enable_illegal_opcode_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_ev_enable_read(sc); + return curve25519engine_ev_enable_illegal_opcode_extract(sc, word); +} +static inline uint32_t curve25519engine_ev_enable_illegal_opcode_replace(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 1)-1); + return (oldword & (~(mask << 1))) | (mask & plain_value)<< 1 ; +} +static inline void curve25519engine_ev_enable_illegal_opcode_write(struct sbusfpga_curve25519engine_softc *sc, uint32_t plain_value) { + uint32_t oldword = curve25519engine_ev_enable_read(sc); + uint32_t newword = curve25519engine_ev_enable_illegal_opcode_replace(sc, oldword, plain_value); + curve25519engine_ev_enable_write(sc, newword); +} +#define CSR_CURVE25519ENGINE_INSTRUCTION_ADDR (CSR_CURVE25519ENGINE_BASE + 0x28L) +#define CSR_CURVE25519ENGINE_INSTRUCTION_SIZE 1 +static inline uint32_t curve25519engine_instruction_read(struct sbusfpga_curve25519engine_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x28L); +} +#define CSR_CURVE25519ENGINE_INSTRUCTION_OPCODE_OFFSET 0 +#define CSR_CURVE25519ENGINE_INSTRUCTION_OPCODE_SIZE 6 +static inline uint32_t curve25519engine_instruction_opcode_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 6)-1); + return ( (oldword >> 0) & mask ); +} +static inline uint32_t curve25519engine_instruction_opcode_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_instruction_read(sc); + return curve25519engine_instruction_opcode_extract(sc, word); +} +#define CSR_CURVE25519ENGINE_INSTRUCTION_RA_OFFSET 6 +#define CSR_CURVE25519ENGINE_INSTRUCTION_RA_SIZE 5 +static inline uint32_t curve25519engine_instruction_ra_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 5)-1); + return ( (oldword >> 6) & mask ); +} +static inline uint32_t curve25519engine_instruction_ra_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_instruction_read(sc); + return curve25519engine_instruction_ra_extract(sc, word); +} +#define CSR_CURVE25519ENGINE_INSTRUCTION_CA_OFFSET 11 +#define CSR_CURVE25519ENGINE_INSTRUCTION_CA_SIZE 1 +static inline uint32_t curve25519engine_instruction_ca_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 11) & mask ); +} +static inline uint32_t curve25519engine_instruction_ca_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_instruction_read(sc); + return curve25519engine_instruction_ca_extract(sc, word); +} +#define CSR_CURVE25519ENGINE_INSTRUCTION_RB_OFFSET 12 +#define CSR_CURVE25519ENGINE_INSTRUCTION_RB_SIZE 5 +static inline uint32_t curve25519engine_instruction_rb_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 5)-1); + return ( (oldword >> 12) & mask ); +} +static inline uint32_t curve25519engine_instruction_rb_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_instruction_read(sc); + return curve25519engine_instruction_rb_extract(sc, word); +} +#define CSR_CURVE25519ENGINE_INSTRUCTION_CB_OFFSET 17 +#define CSR_CURVE25519ENGINE_INSTRUCTION_CB_SIZE 1 +static inline uint32_t curve25519engine_instruction_cb_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 17) & mask ); +} +static inline uint32_t curve25519engine_instruction_cb_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_instruction_read(sc); + return curve25519engine_instruction_cb_extract(sc, word); +} +#define CSR_CURVE25519ENGINE_INSTRUCTION_WD_OFFSET 18 +#define CSR_CURVE25519ENGINE_INSTRUCTION_WD_SIZE 5 +static inline uint32_t curve25519engine_instruction_wd_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 5)-1); + return ( (oldword >> 18) & mask ); +} +static inline uint32_t curve25519engine_instruction_wd_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_instruction_read(sc); + return curve25519engine_instruction_wd_extract(sc, word); +} +#define CSR_CURVE25519ENGINE_INSTRUCTION_IMMEDIATE_OFFSET 23 +#define CSR_CURVE25519ENGINE_INSTRUCTION_IMMEDIATE_SIZE 9 +static inline uint32_t curve25519engine_instruction_immediate_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 9)-1); + return ( (oldword >> 23) & mask ); +} +static inline uint32_t curve25519engine_instruction_immediate_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_instruction_read(sc); + return curve25519engine_instruction_immediate_extract(sc, word); +} +#endif // CSR_CURVE25519ENGINE_BASE + /* ddrphy */ #ifndef CSR_DDRPHY_BASE -#define CSR_DDRPHY_BASE (CSR_BASE + 0x1000L) +#define CSR_DDRPHY_BASE (CSR_BASE + 0x2000L) #define CSR_DDRPHY_RST_ADDR (CSR_DDRPHY_BASE + 0x0L) #define CSR_DDRPHY_RST_SIZE 1 static inline uint32_t ddrphy_rst_read(struct sbusfpga_ddrphy_softc *sc) { @@ -131,7 +552,7 @@ static inline void ddrphy_wrphase_write(struct sbusfpga_ddrphy_softc *sc, uint32 /* exchange_with_mem */ #ifndef CSR_EXCHANGE_WITH_MEM_BASE -#define CSR_EXCHANGE_WITH_MEM_BASE (CSR_BASE + 0x2000L) +#define CSR_EXCHANGE_WITH_MEM_BASE (CSR_BASE + 0x3000L) #define CSR_EXCHANGE_WITH_MEM_BLK_SIZE_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x0L) #define CSR_EXCHANGE_WITH_MEM_BLK_SIZE_SIZE 1 static inline uint32_t exchange_with_mem_blk_size_read(struct sbusfpga_exchange_with_mem_softc *sc) { @@ -200,7 +621,7 @@ static inline uint32_t exchange_with_mem_wr_tosdram_read(struct sbusfpga_exchang /* sdram */ #ifndef CSR_SDRAM_BASE -#define CSR_SDRAM_BASE (CSR_BASE + 0x3000L) +#define CSR_SDRAM_BASE (CSR_BASE + 0x4000L) #define CSR_SDRAM_DFII_CONTROL_ADDR (CSR_SDRAM_BASE + 0x0L) #define CSR_SDRAM_DFII_CONTROL_SIZE 1 static inline uint32_t sdram_dfii_control_read(struct sbusfpga_sdram_softc *sc) { @@ -469,7 +890,7 @@ static inline uint32_t sdram_dfii_pi3_rddata_read(struct sbusfpga_sdram_softc *s /* trng */ #ifndef CSR_TRNG_BASE -#define CSR_TRNG_BASE (CSR_BASE + 0x4000L) +#define CSR_TRNG_BASE (CSR_BASE + 0x5000L) #define CSR_TRNG_CTRL_ADDR (CSR_TRNG_BASE + 0x0L) #define CSR_TRNG_CTRL_SIZE 1 static inline uint32_t trng_ctrl_read(struct sbusfpga_trng_softc *sc) { diff --git a/sbus-to-ztex-gateware-migen/prom_csr.fth b/sbus-to-ztex-gateware-migen/prom_csr.fth index 44c9655..56f4196 100644 --- a/sbus-to-ztex-gateware-migen/prom_csr.fth +++ b/sbus-to-ztex-gateware-migen/prom_csr.fth @@ -1,11 +1,13 @@ \ auto-generated base regions for CSRs in the PROM h# 40000 constant sbusfpga_csraddr_leds -h# 41000 constant sbusfpga_csraddr_ddrphy -h# 42000 constant sbusfpga_csraddr_exchange_with_mem -h# 43000 constant sbusfpga_csraddr_sdram -h# 44000 constant sbusfpga_csraddr_trng +h# 41000 constant sbusfpga_csraddr_curve25519engine +h# 42000 constant sbusfpga_csraddr_ddrphy +h# 43000 constant sbusfpga_csraddr_exchange_with_mem +h# 44000 constant sbusfpga_csraddr_sdram +h# 45000 constant sbusfpga_csraddr_trng h# 80000 constant sbusfpga_regionaddr_usb_host_ctrl h# 0 constant sbusfpga_regionaddr_prom h# 80000000 constant sbusfpga_regionaddr_main_ram h# fc000000 constant sbusfpga_regionaddr_usb_fake_dma +h# a0000 constant sbusfpga_regionaddr_curve25519engine h# 40000 constant sbusfpga_regionaddr_csr diff --git a/sbus-to-ztex-gateware-migen/prom_migen.fth b/sbus-to-ztex-gateware-migen/prom_migen.fth index 1170c1a..e76ac67 100644 --- a/sbus-to-ztex-gateware-migen/prom_migen.fth +++ b/sbus-to-ztex-gateware-migen/prom_migen.fth @@ -24,8 +24,6 @@ my-space constant my-sbus-space : map-in-led ( -- ) my-sbus-address sbusfpga_csraddr_leds + my-sbus-space h# 4 map-in is led-virt ; : map-out-led ( -- ) led-virt h# 4 map-out ; -\ external - : setled! ( pattern -- ) map-in-led led-virt l! ( pattern virt -- ) @@ -160,8 +158,6 @@ my-space constant my-sbus-space : map-in-trng ( -- ) my-sbus-address sbusfpga_csraddr_trng + my-sbus-space h# 8 map-in is trng-virt ; : map-out-trng ( -- ) trng-virt h# 8 map-out ; -\ external - : disabletrng! ( -- ) map-in-trng 1 trng-virt l! ( pattern virt -- ) @@ -170,4 +166,55 @@ my-space constant my-sbus-space disabletrng! + +\ OpenBIOS tokenizer won't accept finish-device without new-device +\ Cheat by using the tokenizer so we can do OpenBoot 2.x siblings +\ tokenizer[ 01 emit-byte h# 27 emit-byte h# 01 emit-byte h# 1f emit-byte ]tokenizer +\ The OpenFirmware tokenizer does accept the 'clean' syntax +finish-device +\ \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ CURVE25519 +new-device + +\ Absolute minimal stuff; name & registers def. +" betrustedc25519e" device-name + +\ one page of CSR registers, plus the memory +\ we might want to replace the slave memory access +\ by another instance of exchange_with_mem ? +\ we split the memory space in two +\ 0x1000 @ 0x0 for the microcode +\ 0x10000 @ 0x10000 for the register file +my-address sbusfpga_csraddr_curve25519engine + my-space xdrphys \ Offset#1 +h# 1000 xdrint xdr+ \ Merge size#1 +my-address sbusfpga_regionaddr_curve25519engine + my-space xdrphys xdr+ \ Merge offset#2 +h# 1000 xdrint xdr+ \ Merge size#2 +my-address sbusfpga_regionaddr_curve25519engine h# 10000 + + my-space xdrphys xdr+ \ Merge offset#3 +h# 10000 xdrint xdr+ \ Merge size#3 +" reg" attribute + +\ we don't support ET or HWORD +h# 7d xdrint " slave-burst-sizes" attribute +h# 7d xdrint " burst-sizes" attribute + +headers +-1 instance value curve25519engine-virt +-1 instance value curve25519engine-microcode-virt +-1 instance value curve25519engine-regfile-virt +my-address constant my-sbus-address +my-space constant my-sbus-space + +: map-in ( adr space size -- virt ) " map-in" $call-parent ; +: map-out ( virt size -- ) " map-out" $call-parent ; + +: map-in-curve25519engine ( -- ) + my-sbus-address sbusfpga_csraddr_curve25519engine + my-sbus-space h# 1000 map-in is curve25519engine-virt + my-sbus-address sbusfpga_regionaddr_curve25519engine + my-sbus-space h# 1000 map-in is curve25519engine-microcode-virt + my-sbus-address sbusfpga_regionaddr_curve25519engine h# 10000 + + my-sbus-space h# 10000 map-in is curve25519engine-regfile-virt +; +: map-out-curve25519engine ( -- ) + curve25519engine-virt h# 1000 map-out + curve25519engine-microcode-virt h# 1000 map-out + curve25519engine-regfile-virt h# 10000 map-out +; + end0 diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index 40333e8..f311d52 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -25,11 +25,13 @@ ADDR_PHYS_LOW = 0 ADDR_PFX_HIGH = ADDR_PHYS_HIGH ADDR_PFX_LOW = 16 ## 64 KiB per prefix ADDR_PFX_LENGTH = 12 #(1 + ADDR_PFX_HIGH - ADDR_PFX_LOW) -ROM_ADDR_PFX = Signal(12, reset = 0) -WISHBONE_CSR_ADDR_PFX = Signal(12, reset = 4) -USBOHCI_ADDR_PFX = Signal(12, reset = 8) -SRAM_ADDR_PFX = Signal(12, reset = 9) -SDRAM_ADDR_PFX = Signal(12, reset = 2048) +ROM_ADDR_PFX = Signal(12, reset = 0x000) +WISHBONE_CSR_ADDR_PFX = Signal(12, reset = 0x004) +USBOHCI_ADDR_PFX = Signal(12, reset = 0x008) +SRAM_ADDR_PFX = Signal(12, reset = 0x009) +ENGINE_ADDR_PFXA = Signal(12, reset = 0x00a) +ENGINE_ADDR_PFXB = Signal(12, reset = 0x00b) +#SDRAM_ADDR_PFX = Signal(12, reset = 2048) wishbone_default_timeout = 120 ## must be > sbus_default_timeout sbus_default_timeout = 100 ## must be below 127 as we can wait twice on it inside the 255 cycles @@ -430,7 +432,8 @@ class SBusFPGABus(Module): (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX) | - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SDRAM_ADDR_PFX)), + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ENGINE_ADDR_PFXA) | + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ENGINE_ADDR_PFXB)), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), @@ -462,8 +465,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), If(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX) | - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)| - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SDRAM_ADDR_PFX)), + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), @@ -500,8 +502,7 @@ class SBusFPGABus(Module): #NextValue(led0123, led0123 | LED_PARITY), NextState("Slave_Error") ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX) | - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)| - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SDRAM_ADDR_PFX)), + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), @@ -548,7 +549,8 @@ class SBusFPGABus(Module): ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX) | - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SDRAM_ADDR_PFX)), + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ENGINE_ADDR_PFXA) | + (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ENGINE_ADDR_PFXB)), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), If(~self.wishbone_master.cyc, NextValue(SBUS_3V3_ACKs_o, ACK_WORD), @@ -574,8 +576,7 @@ class SBusFPGABus(Module): (SBUS_3V3_PPRD_i == 0)), NextValue(sbus_oe_master_in, 1), NextValue(sbus_last_pa, SBUS_3V3_PA_i), - If(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX) | - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SDRAM_ADDR_PFX)), + If(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), If(~self.wishbone_master.cyc, NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), @@ -606,8 +607,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(led0123, led0123 | LED_PARITY), NextState("Slave_Error") - ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX) | - (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SDRAM_ADDR_PFX)), + ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), If(~self.wishbone_master.cyc, NextValue(SBUS_3V3_ACKs_o, ACK_HWORD), diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index e8faeb7..81de884 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -22,6 +22,9 @@ from sbus_to_fpga_trng import * from litedram.frontend.dma import * +from engine import Engine; +from migen.genlib.resetsync import AsyncResetSynchronizer; + import sbus_to_fpga_export; # CRG ---------------------------------------------------------------------------------------------- @@ -37,10 +40,24 @@ class _CRG(Module): self.clock_domains.cd_sbus = ClockDomain() # 16.67-25 MHz SBus, reset'ed by SBus, native SBus clock domain # self.clock_domains.cd_por = ClockDomain() # 48 MHz native, reset'ed by SBus, power-on-reset timer self.clock_domains.cd_usb = ClockDomain() # 48 MHZ PLL, reset'ed by SBus (via pll), for USB controller + self.clock_domains.cd_clk50 = ClockDomain() # 50 MHz for curve25519engine -> eng_clk + self.clock_domains.cd_clk100 = ClockDomain() # 100 MHz for curve25519engine -> mul_clk + self.clock_domains.cd_clk200 = ClockDomain() # 200 MHz for curve25519engine -> rf_clk # # # clk48 = platform.request("clk48") - self.cd_native.clk = clk48 + ###### explanations from betrusted-io/betrusted-soc/betrusted_soc.py + # Note: below feature cannot be used because Litex appends this *after* platform commands! This causes the generated + # clock derived constraints immediately below to fail, because .xdc file is parsed in-order, and the main clock needs + # to be created before the derived clocks. Instead, we use the line afterwards. + platform.add_platform_command("create_clock -name clk48 -period 20.8333 [get_nets clk48]") + # The above constraint must strictly proceed the below create_generated_clock constraints in the .XDC file + # This allows PLLs/MMCMEs to be placed anywhere and reference the input clock + self.clk48_bufg = Signal() + self.specials += Instance("BUFG", i_I=clk48, o_O=self.clk48_bufg) + self.comb += self.cd_native.clk.eq(self.clk48_bufg) + #self.cd_native.clk = clk48 + clk_sbus = platform.request("SBUS_3V3_CLK") self.cd_sbus.clk = clk_sbus rst_sbus = platform.request("SBUS_3V3_RSTs") @@ -49,16 +66,39 @@ class _CRG(Module): ##self.comb += self.cd_sys.rst.eq(~rst_sbus) self.submodules.pll = pll = S7MMCM(speedgrade=-1) - pll.register_clkin(clk48, 48e6) + #pll.register_clkin(clk48, 48e6) + pll.register_clkin(self.clk48_bufg, 48e6) pll.create_clkout(self.cd_sys, sys_clk_freq) + platform.add_platform_command("create_generated_clock -name sysclk [get_pins {{MMCME2_ADV/CLKOUT0}}]") pll.create_clkout(self.cd_sys4x, 4*sys_clk_freq) + platform.add_platform_command("create_generated_clock -name sys4xclk [get_pins {{MMCME2_ADV/CLKOUT1}}]") pll.create_clkout(self.cd_sys4x_dqs, 4*sys_clk_freq, phase=90) + platform.add_platform_command("create_generated_clock -name sys4x90clk [get_pins {{MMCME2_ADV/CLKOUT2}}]") self.comb += pll.reset.eq(~rst_sbus) # | ~por_done platform.add_false_path_constraints(self.cd_native.clk, self.cd_sbus.clk) platform.add_false_path_constraints(self.cd_sys.clk, self.cd_sbus.clk) platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_native.clk) platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_sys.clk) ##platform.add_false_path_constraints(self.cd_native.clk, self.cd_sys.clk) + + + self.submodules.curve25519_pll = curve25519_pll = S7MMCM(speedgrade=-1) + curve25519_clk_freq = 80e6 + #curve25519_pll.register_clkin(clk48, 48e6) + curve25519_pll.register_clkin(self.clk48_bufg, 48e6) + curve25519_pll.create_clkout(self.cd_clk50, curve25519_clk_freq/2, margin=0) + platform.add_platform_command("create_generated_clock -name clk50 [get_pins {{MMCME2_ADV_1/CLKOUT0}}]") + curve25519_pll.create_clkout(self.cd_clk100, curve25519_clk_freq, margin=0) + platform.add_platform_command("create_generated_clock -name clk100 [get_pins {{MMCME2_ADV_1/CLKOUT1}}]") + curve25519_pll.create_clkout(self.cd_clk200, curve25519_clk_freq*2, margin=0) + platform.add_platform_command("create_generated_clock -name clk200 [get_pins {{MMCME2_ADV_1/CLKOUT2}}]") + #self.comb += curve25519_pll.reset.eq(~rst_sbus) # | ~por_done + platform.add_false_path_constraints(self.cd_sys.clk, self.cd_clk50.clk) + platform.add_false_path_constraints(self.cd_sys.clk, self.cd_clk100.clk) + platform.add_false_path_constraints(self.cd_sys.clk, self.cd_clk200.clk) + platform.add_false_path_constraints(self.cd_clk50.clk, self.cd_sys.clk) + platform.add_false_path_constraints(self.cd_clk100.clk, self.cd_sys.clk) + platform.add_false_path_constraints(self.cd_clk200.clk, self.cd_sys.clk) # Power on reset, reset propagate from SBus to SYS # por_count = Signal(16, reset=2**16-1) @@ -71,15 +111,19 @@ class _CRG(Module): # USB self.submodules.usb_pll = usb_pll = S7MMCM(speedgrade=-1) - usb_pll.register_clkin(clk48, 48e6) + #usb_pll.register_clkin(clk48, 48e6) + usb_pll.register_clkin(self.clk48_bufg, 48e6) usb_pll.create_clkout(self.cd_usb, 48e6, margin = 0) + platform.add_platform_command("create_generated_clock -name usbclk [get_pins {{MMCME2_ADV_2/CLKOUT0}}]") self.comb += usb_pll.reset.eq(~rst_sbus) # | ~por_done platform.add_false_path_constraints(self.cd_sys.clk, self.cd_usb.clk) - self.submodules.pll_idelay = pll_idelay = S7PLL(speedgrade=-1) - pll_idelay.register_clkin(clk48, 48e6) + self.submodules.pll_idelay = pll_idelay = S7MMCM(speedgrade=-1) + #pll_idelay.register_clkin(clk48, 48e6) + pll_idelay.register_clkin(self.clk48_bufg, 48e6) pll_idelay.create_clkout(self.cd_idelay, 200e6, margin = 0) - self.comb += pll_idelay.reset.eq(~rst_sbus) # | ~por_done + platform.add_platform_command("create_generated_clock -name idelayclk [get_pins {{MMCME2_ADV_3/CLKOUT0}}]") + self.comb += pll_idelay.reset.eq(~rst_sbus) # | ~por_done self.submodules.idelayctrl = S7IDELAYCTRL(self.cd_idelay) @@ -112,12 +156,13 @@ class SBusFPGA(SoCCore): # The position of the 'usb_fake_dma' is so it overlaps # the virtual address space used by NetBSD DMA allocators self.wb_mem_map = wb_mem_map = { - "prom": 0x00000000, - "csr" : 0x00040000, - "usb_host": 0x00080000, - "usb_shared_mem": 0x00090000, - "main_ram": 0x80000000, - "usb_fake_dma": 0xfc000000, + "prom": 0x00000000, + "csr" : 0x00040000, + "usb_host": 0x00080000, + "usb_shared_mem": 0x00090000, + "curve25519engine": 0x000a0000, + "main_ram": 0x80000000, + "usb_fake_dma": 0xfc000000, } self.mem_map.update(wb_mem_map) self.submodules.crg = _CRG(platform=platform, sys_clk_freq=sys_clk_freq) @@ -229,6 +274,14 @@ class SBusFPGA(SoCCore): self.submodules.trng = NeoRV32TrngWrapper(platform=platform) + # beware the naming, as 'clk50' 'sysclk' 'clk200' are used in the original platform constraints + # the local engine.py was slightly modified to have configurable names, so we can have 'clk50', 'clk100', 'clk200' + # Beware that Engine implicitely runs in 'sys' by default, need to rename that one as well + self.submodules.curve25519engine = ClockDomainsRenamer({"eng_clk":"clk50", "rf_clk":"clk200", "mul_clk":"clk100", "sys":"clk100"})(Engine(platform=platform,prefix=self.mem_map.get("curve25519engine", None))) + self.submodules.curve25519engine_wishbone_cdc = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=self.curve25519engine.bus, cd_master="sys", cd_slave="clk100") + self.bus.add_slave("curve25519engine", self.curve25519engine_wishbone_cdc, SoCRegion(origin=self.mem_map.get("curve25519engine", None), size=0x20000, cached=False)) + #self.bus.add_slave("curve25519engine", self.curve25519engine.bus, SoCRegion(origin=self.mem_map.get("curve25519engine", None), size=0x20000, cached=False)) + def main(): parser = argparse.ArgumentParser(description="SbusFPGA") parser.add_argument("--build", action="store_true", help="Build bitstream") diff --git a/sbus-to-ztex-gateware-migen/ztex213_sbus.py b/sbus-to-ztex-gateware-migen/ztex213_sbus.py index 77dc0fb..1d1f554 100644 --- a/sbus-to-ztex-gateware-migen/ztex213_sbus.py +++ b/sbus-to-ztex-gateware-migen/ztex213_sbus.py @@ -158,4 +158,4 @@ class Platform(XilinxPlatform): def do_finalize(self, fragment): XilinxPlatform.do_finalize(self, fragment) - self.add_period_constraint(self.lookup_request("clk48", loose=True), 1e9/48e6) + #self.add_period_constraint(self.lookup_request("clk48", loose=True), 1e9/48e6) From bae310e0a3efd4ae183e7960609ec5d804e95848 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Thu, 29 Jul 2021 03:59:27 -0400 Subject: [PATCH 54/78] more tests --- .../9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c index 8bbe0fe..da03577 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c @@ -494,6 +494,14 @@ static int init_program(struct sbusfpga_curve25519engine_softc *sc) { aprint_error_dev(sc->sc_dev, "INIT - Curve25519Engine register failure: mplen = 0x%08x\n", x); err = 1; } + const int test_reg_num = 73; + const uint32_t test_reg_value = 0x0C0FFEE0; + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile, test_reg_num, test_reg_value); + delay(1); + if ((x = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile, test_reg_num)) != test_reg_value) { + aprint_error_dev(sc->sc_dev, "INIT - Curve25519Engine register file failure: 0x%08x != 0x%08x\n", x, test_reg_value); + err = 1; + } #endif curve25519engine_power_write(sc, 0); From ae7ba8115f5af5e2a3752c2f9900b5e83efce841 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 21 Aug 2021 02:12:34 -0400 Subject: [PATCH 55/78] add ioctls for testing --- .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c | 150 +++++++++++------- 1 file changed, 90 insertions(+), 60 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c index 0431fc7..a44c77d 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c @@ -48,7 +48,6 @@ __KERNEL_RCSID(0, "$NetBSD$"); #include #include -#include #include #include @@ -104,7 +103,7 @@ static int sbusfpga_sdram_diskstart(device_t self, struct buf *bp); struct dkdriver sbusfpga_sdram_dkdriver = { .d_strategy = sbusfpga_sdram_strategy, .d_minphys = sbusfpga_sdram_minphys, - .d_diskstart = sbusfpga_sdram_diskstart + .d_diskstart = sbusfpga_sdram_diskstart }; extern struct cfdriver sbusfpga_sdram_cd; @@ -112,65 +111,28 @@ extern struct cfdriver sbusfpga_sdram_cd; static int sbusfpga_sdram_read_block(struct sbusfpga_sdram_softc *sc, const u_int32_t block, const u_int32_t blkcnt, void *data); static int sbusfpga_sdram_write_block(struct sbusfpga_sdram_softc *sc, const u_int32_t block, const u_int32_t blkcnt, void *data); -int -sbusfpga_sdram_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) -{ - struct sbusfpga_sdram_softc *sc = device_lookup_private(&sbusfpga_sdram_cd, DISKUNIT(dev)); - int err = 0, err2 = 0; +struct sbusfpga_sdram_rwpg { + u_int32_t pgdata[512]; + u_int32_t checksum[8]; + u_int32_t checksumbis[8]; + u_int32_t pgnum; + u_int32_t last_blk; + u_int32_t last_dma; + u_int32_t dma_wrdone; + u_int32_t vdma_err; +}; +#define SBUSFPGA_READ_PG _IOWR('X', 0, struct sbusfpga_sdram_rwpg) +#define SBUSFPGA_WRITE_PG _IOWR('X', 1, struct sbusfpga_sdram_rwpg) - if (sc == NULL) { - aprint_error("%s:%d: sc == NULL! giving up\n", __PRETTY_FUNCTION__, __LINE__); - return (ENXIO); - } - -#if 0 - switch (cmd) { - /* case VNDIOCCLR: */ - /* case VNDIOCCLR50: */ - case DIOCGDINFO: - case DIOCSDINFO: - case DIOCWDINFO: - case DIOCGPARTINFO: - case DIOCKLABEL: - case DIOCWLABEL: - case DIOCCACHESYNC: -#ifdef __HAVE_OLD_DISKLABEL - case ODIOCGDINFO: - case ODIOCSDINFO: - case ODIOCWDINFO: - case ODIOCGDEFLABEL: -#endif - case DIOCDWEDGE: - case DIOCAWEDGE: - case DIOCLWEDGES: - case DIOCRMWEDGES: - case DIOCMWEDGES: - case DIOCGWEDGEINFO: - case DIOCGDEFLABEL: - err2 = dk_ioctl(&sc->dk, dev, cmd, data, flag, l); - if (err2 != EPASSTHROUGH) - err = err2; - break; - default: - err = EINVAL; - break; - } -#else - err2 = dk_ioctl(&sc->dk, dev, cmd, data, flag, l); - if (err2 != EPASSTHROUGH) - err = err2; - else - err = ENOTTY; -#endif - return(err); -} +static inline void exchange_with_mem_checksum_read(struct sbusfpga_sdram_softc *sc, uint32_t* data); +static inline void exchange_with_mem_checksum_write(struct sbusfpga_sdram_softc *sc, uint32_t* data); int sbusfpga_sdram_open(dev_t dev, int flag, int fmt, struct lwp *l) { struct sbusfpga_sdram_softc *sd = device_lookup_private(&sbusfpga_sdram_cd, DISKUNIT(dev)); struct dk_softc *dksc; - int error; + int error = 0; if (sd == NULL) { aprint_error("%s:%d: sd == NULL! giving up\n", __PRETTY_FUNCTION__, __LINE__); @@ -194,15 +156,18 @@ sbusfpga_sdram_close(dev_t dev, int flag, int fmt, struct lwp *l) { struct sbusfpga_sdram_softc *sd = device_lookup_private(&sbusfpga_sdram_cd, DISKUNIT(dev)); struct dk_softc *dksc; + int error = 0; if (sd == NULL) { aprint_error("%s:%d: sd == NULL! giving up\n", __PRETTY_FUNCTION__, __LINE__); return (ENXIO); } - + dksc = &sd->dk; - return dk_close(dksc, dev, flag, fmt, l); + error = dk_close(dksc, dev, flag, fmt, l); + + return error; } int @@ -359,8 +324,6 @@ sbusfpga_sdram_attach(device_t parent, device_t self, void *aux) } /* we seem OK hardware-wise */ - - dk_init(&sc->dk, self, DKTYPE_FLASH); disk_init(&sc->dk.sc_dkdev, device_xname(sc->dk.sc_dev), &sbusfpga_sdram_dkdriver); dk_attach(&sc->dk); @@ -436,7 +399,6 @@ static void sbusfpga_sdram_set_geometry(struct sbusfpga_sdram_softc *sc) { disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, "sbusfpga_sdram"); } - int sbusfpga_sdram_size(dev_t dev) { struct sbusfpga_sdram_softc *sc = device_lookup_private(&sbusfpga_sdram_cd, DISKUNIT(dev)); @@ -609,6 +571,74 @@ sbusfpga_sdram_diskstart(device_t self, struct buf *bp) #undef CSR_SDPHY_BASE #undef CSR_TRNG_BASE +/* not yet generated */ +static inline void exchange_with_mem_checksum_read(struct sbusfpga_sdram_softc *sc, uint32_t* data) { + int i; + for (i = 0 ; i < 8 ; i++) { // FIXME + data[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 4*i+(CSR_EXCHANGE_WITH_MEM_CHECKSUM_ADDR - CSR_EXCHANGE_WITH_MEM_BASE)); + } +} +static inline void exchange_with_mem_checksum_write(struct sbusfpga_sdram_softc *sc, uint32_t* data) { + int i; + for (i = 0 ; i < 8 ; i++) { // FIXME + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 4*i+(CSR_EXCHANGE_WITH_MEM_CHECKSUM_ADDR - CSR_EXCHANGE_WITH_MEM_BASE), data[i]); + } +} + +int +sbusfpga_sdram_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) +{ + struct sbusfpga_sdram_softc *sc = device_lookup_private(&sbusfpga_sdram_cd, DISKUNIT(dev)); + int err = 0;//, err2 = 0; + + if (sc == NULL) { + aprint_error("%s:%d: sc == NULL! giving up\n", __PRETTY_FUNCTION__, __LINE__); + return (ENXIO); + } + + switch (cmd) { + case SBUSFPGA_READ_PG: { + struct sbusfpga_sdram_rwpg* pg = (struct sbusfpga_sdram_rwpg*)data; + exchange_with_mem_checksum_write(sc, pg->checksum); + err = sbusfpga_sdram_read_block(sc, pg->pgnum * 4, 4, pg->pgdata); + exchange_with_mem_checksum_read(sc, pg->checksum); + delay(1); + exchange_with_mem_checksum_read(sc, pg->checksumbis); + pg->last_blk = exchange_with_mem_last_blk_read(sc); + pg->last_dma = exchange_with_mem_last_dma_read(sc); + pg->dma_wrdone = exchange_with_mem_dma_wrdone_read(sc); + pg->vdma_err = exchange_with_mem_sbus_master_error_virtual_read(sc); + if (err != 0) + err = EIO; + goto done; + } + case SBUSFPGA_WRITE_PG: { + struct sbusfpga_sdram_rwpg* pg = (struct sbusfpga_sdram_rwpg*)data; + exchange_with_mem_checksum_write(sc, pg->checksum); + err = sbusfpga_sdram_write_block(sc, pg->pgnum * 4, 4, pg->pgdata); + exchange_with_mem_checksum_read(sc, pg->checksum); + delay(1); + exchange_with_mem_checksum_read(sc, pg->checksumbis); + pg->last_blk = exchange_with_mem_last_blk_read(sc); + pg->last_dma = exchange_with_mem_last_dma_read(sc); + pg->dma_wrdone = exchange_with_mem_dma_wrdone_read(sc); + pg->vdma_err = exchange_with_mem_sbus_master_error_virtual_read(sc); + if (err != 0) + err = EIO; + goto done; + } + } + + err = dk_ioctl(&sc->dk, dev, cmd, data, flag, l); + /*if (err2 != EPASSTHROUGH) + err = err2; + else + err = ENOTTY;*/ + + done: + return err; +} + #define DMA_STATUS_CHECK_BITS (0x01F) int @@ -652,7 +682,7 @@ dma_init(struct sbusfpga_sdram_softc *sc) { return 0; } - aprint_normal_dev(sc->dk.sc_dev, "DMA: SW -> kernal address is %p, dvma address is 0x%08llx\n", sc->sc_dma_kva, sc->sc_dmamap->dm_segs[0].ds_addr); + aprint_normal_dev(sc->dk.sc_dev, "DMA: SW -> kernel address is %p, dvma address is 0x%08llx\n", sc->sc_dma_kva, sc->sc_dmamap->dm_segs[0].ds_addr); return 1; } From 15559b38b9ad6918c89932aeb536006961c0ca09 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 21 Aug 2021 02:13:08 -0400 Subject: [PATCH 56/78] try to track down the issue... --- .../sys/dev/sbus/sbusfpga_curve25519engine.c | 219 ++++++++++++++++-- 1 file changed, 194 insertions(+), 25 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c index da03577..5875bae 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c @@ -93,6 +93,9 @@ static int start_job(struct sbusfpga_curve25519engine_softc *sc); static int wait_job(struct sbusfpga_curve25519engine_softc *sc); static int read_outputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusfpga_curve25519engine_montgomeryjob *job, const int window); +static int power_on(struct sbusfpga_curve25519engine_softc *sc); +static int power_off(struct sbusfpga_curve25519engine_softc *sc); + #define SBUSFPGA_DO_MONTGOMERYJOB _IOWR(0, 0, struct sbusfpga_curve25519engine_montgomeryjob) int @@ -102,6 +105,9 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st struct sbusfpga_curve25519engine_montgomeryjob* job = (struct sbusfpga_curve25519engine_montgomeryjob*)data; int err = 0; + /* first we need to turn the engine power on ... */ + power_on(sc); + if (!sc->initialized) { if (init_program(sc)) { return ENXIO; @@ -130,6 +136,8 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st err = EINVAL; break; } + + power_off(sc); return(err); } @@ -161,6 +169,7 @@ sbusfpga_curve25519engine_match(device_t parent, cfdata_t cf, void *aux) return (strcmp("betrustedc25519e", sa->sa_name) == 0); } +#if 1 #if 0 static const uint32_t program[192] = {0x00480800, 0x007407cc, @@ -309,6 +318,165 @@ static const uint32_t program[192] = {0x00480800, }; static const uint32_t program_len = 141; #else +static const uint32_t program[192] = {0x00640840, + 0x00680800, + 0x006c0600, + 0x00700840, + 0x00500a40, + 0x00554505, + 0x00500a00, + 0x00554545, + 0x00500940, + 0x00554545, + 0x00500840, + 0x004d4546, + 0x00480800, + 0x007407cc, + 0x007c07cb, + 0x0049d483, + 0x0079b643, + 0x0079e482, + 0x00659783, + 0x006db783, + 0x0079c683, + 0x0079e482, + 0x0069a783, + 0x0071c783, + 0x00480740, + 0x00500640, + 0x00540680, + 0x005806c0, + 0x005c0700, + 0x00015505, + 0x00780008, + 0x0001e006, + 0x005558c6, + 0x00055505, + 0x00780048, + 0x0005e046, + 0x00097585, + 0x00780088, + 0x0009e086, + 0x005d78c6, + 0x000d7585, + 0x007800c8, + 0x000de0c6, + 0x00100007, + 0x00141047, + 0x007458c6, + 0x0019d105, + 0x00780188, + 0x0019e186, + 0x001c3007, + 0x00202047, + 0x002481c5, + 0x00780248, + 0x0025e246, + 0x007488c6, + 0x0029d1c5, + 0x00780288, + 0x0029e286, + 0x002c9247, + 0x0030a287, + 0x00346907, + 0x00385107, + 0x003c5345, + 0x007803c8, + 0x003de3c6, + 0x0040f187, + 0x0044c607, + 0x00500380, + 0x00540400, + 0x005802c0, + 0x005c0440, + 0x00640500, + 0x00680540, + 0x006c0580, + 0x007005c0, + 0x010004c9, + 0x004e14c6, + 0xdf800809, + 0x0079b643, + 0x0079e482, + 0x00659783, + 0x006db783, + 0x0079c683, + 0x0079e482, + 0x0069a783, + 0x0071c783, + 0x00740640, + 0x00780680, + 0x0001e787, + 0x00040007, + 0x00041047, + 0x00081787, + 0x000c2007, + 0x001030c7, + 0x00144087, + 0x00700940, + 0x00185147, + 0x00721706, + 0x01000709, + 0x00186187, + 0xfe000809, + 0x001c5187, + 0x00700980, + 0x002071c7, + 0x00721706, + 0x01000709, + 0x00208207, + 0xfe000809, + 0x00247207, + 0x007009c0, + 0x00289247, + 0x00721706, + 0x01000709, + 0x0028a287, + 0xfe000809, + 0x002c9287, + 0x00700980, + 0x0030b2c7, + 0x00721706, + 0x01000709, + 0x0030c307, + 0xfe000809, + 0x00347307, + 0x00700a00, + 0x0038d347, + 0x00721706, + 0x01000709, + 0x0038e387, + 0xfe000809, + 0x003cd387, + 0x00700a40, + 0x0040f3c7, + 0x00721706, + 0x01000709, + 0x00410407, + 0xfe000809, + 0x0044f407, + 0x00700a00, + 0x00491447, + 0x00721706, + 0x01000709, + 0x00492487, + 0xfe000809, + 0x004cd487, + 0x00700940, + 0x005134c7, + 0x00721706, + 0x01000709, + 0x00514507, + 0xfe000809, + 0x00543507, + 0x007d5747, + 0x0000000a, + 0x0000000a, + 0x0000000a, +}; +static const uint32_t program_len = 153; +#endif +#else static const uint32_t program[16] = { 0x00640a40, 0x00680840, @@ -318,7 +486,6 @@ static const uint32_t program[16] = { static const uint32_t program_len = 3; #endif - /* * Attach all the sub-devices we can find */ @@ -406,6 +573,9 @@ sbusfpga_curve25519engine_attach(device_t parent, device_t self, void *aux) sc->sc_burst, sbsc->sc_burst); + /* first we need to turn the engine power on ... */ + power_on(sc); + if (init_program(sc)) { if (init_program(sc)) { aprint_normal_dev(sc->sc_dev, "INIT - FAILED\n"); @@ -416,6 +586,8 @@ sbusfpga_curve25519engine_attach(device_t parent, device_t self, void *aux) } else { sc->initialized = 1; } + + power_off(sc); } #define CONFIG_CSR_DATA_WIDTH 32 @@ -444,17 +616,24 @@ sbusfpga_curve25519engine_attach(device_t parent, device_t self, void *aux) #undef CSR_SDPHY_BASE #undef CSR_TRNG_BASE +static int power_on(struct sbusfpga_curve25519engine_softc *sc) { + int err = 0; + if ((curve25519engine_power_read(sc) & 1) == 0) { + curve25519engine_power_write(sc, 1); + delay(2); + } + return err; +} +static int power_off(struct sbusfpga_curve25519engine_softc *sc) { + int err = 0; + curve25519engine_power_write(sc, 0); + return err; +} static int init_program(struct sbusfpga_curve25519engine_softc *sc) { /* the microcode is a the beginning */ int err = 0; uint32_t i; - - /* first we need to turn the engine power on ... */ - if ((curve25519engine_power_read(sc) & 1) == 0) { - curve25519engine_power_write(sc, 1); - delay(2); - } for (i = 0 ; i < program_len + 1 ; i++) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_microcode, (i*4), program[i]); @@ -496,15 +675,13 @@ static int init_program(struct sbusfpga_curve25519engine_softc *sc) { } const int test_reg_num = 73; const uint32_t test_reg_value = 0x0C0FFEE0; - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile, test_reg_num, test_reg_value); + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile, 4*test_reg_num, test_reg_value); delay(1); - if ((x = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile, test_reg_num)) != test_reg_value) { + if ((x = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile, 4*test_reg_num)) != test_reg_value) { aprint_error_dev(sc->sc_dev, "INIT - Curve25519Engine register file failure: 0x%08x != 0x%08x\n", x, test_reg_value); err = 1; } #endif - - curve25519engine_power_write(sc, 0); return err; } @@ -518,12 +695,6 @@ static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf aprint_error_dev(sc->sc_dev, "WRITE - Curve25519Engine status: 0x%08x, still running?\n", status); return -ENXIO; } - - /* first we need to turn the engine power on ... */ - if ((curve25519engine_power_read(sc) & 1) == 0) { - curve25519engine_power_write(sc, 1); - delay(2); - } #define REG_BASE(reg) (base + (reg * 32)) @@ -545,12 +716,12 @@ static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf #define REG_BASE(reg) (base + (reg * 32)) #define SUBREG_ADDR(reg, off) (REG_BASE(reg) + (off)*4) for (i = 0 ; i < 8 && !err; i ++) { - if (job->affine_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(24,i))) err = ENXIO; - if (job->x0_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(25,i))) err = ENXIO; - if (job->x0_w[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(26,i))) err = ENXIO; - if (job->x1_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(27,i))) err = ENXIO; - if (job->x1_w[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(28,i))) err = ENXIO; - if (job->scalar[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(31,i))) err = ENXIO; + if (job->affine_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(24,i))) err = EIO; + if (job->x0_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(25,i))) err = EIO; + if (job->x0_w[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(26,i))) err = EIO; + if (job->x1_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(27,i))) err = EIO; + if (job->x1_w[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(28,i))) err = EIO; + if (job->scalar[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(31,i))) err = EIO; delay(1); } if (err) aprint_error_dev(sc->sc_dev, "WRITE - data did not read-write properly\n"); @@ -616,8 +787,6 @@ static int read_outputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf aprint_normal_dev(sc->sc_dev, "READ - Curve25519Engine 19 low 32 bits: 0x%08x\n", bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(19,0))); #undef SUBREG_ADDR #undef REG_BASE - - curve25519engine_power_write(sc, 0); return 0; } From d8914159b1333e571af29c83af6e97c5f73c91fd Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 21 Aug 2021 02:14:04 -0400 Subject: [PATCH 57/78] try to track down the issue... --- .../engine_code/engine_code.rs | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs index d906a40..6f9dff0 100644 --- a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs +++ b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs @@ -1,4 +1,4 @@ -#![recursion_limit="512"] +#![recursion_limit="768"] extern crate engine25519_as; use engine25519_as::*; @@ -20,6 +20,23 @@ fn main() -> std::io::Result<()> { // %19 is the loop counter, starts with 254 (if 0, loop runs exactly once) // I // %31 is the scalar // I // %18 is the swap variable + // START NEW + psa %25, #1 + psa %26, #0 + psa %27, %24 + psa %28, #1 + // #9 is 100 + psa %20, #9 + add %21, %20, %20 + // #8 is 50 + psa %20, #8 + add %21, %21, %20 + // #5 is 5 + psa %20, #5 + add %21, %21, %20 + psa %20, #1 + sub %19, %21, %20 + // END NEW psa %18, #0 // for i in (0..255).rev() @@ -288,8 +305,8 @@ fn main() -> std::io::Result<()> { fin ); let mut pos = 0; - while pos < mcode2.len() { - println!("0x{:08x},", mcode2[pos]); + while pos < mcode.len() { + println!("0x{:08x},", mcode[pos]); pos = pos + 1; } Ok(()) From 67ad40c65ed47ba4228da05f5d129458b8351774 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 21 Aug 2021 02:15:43 -0400 Subject: [PATCH 58/78] (optional) checksumming in SDRAM DMA, change clocking on Engine, ... --- sbus-to-ztex-gateware-migen/netbsd_csr.h | 139 +++++++++++++- .../sbus_to_fpga_blk_dma.py | 77 +++++--- .../sbus_to_fpga_fsm.py | 176 ++++++++++++------ .../sbus_to_fpga_soc.py | 39 ++-- 4 files changed, 323 insertions(+), 108 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/netbsd_csr.h b/sbus-to-ztex-gateware-migen/netbsd_csr.h index 87637be..06a181f 100644 --- a/sbus-to-ztex-gateware-migen/netbsd_csr.h +++ b/sbus-to-ztex-gateware-migen/netbsd_csr.h @@ -1,5 +1,5 @@ //-------------------------------------------------------------------------------- -// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-07-25 05:25:02 +// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-08-20 12:28:27 //-------------------------------------------------------------------------------- #ifndef __GENERATED_CSR_H #define __GENERATED_CSR_H @@ -592,6 +592,63 @@ static inline uint32_t exchange_with_mem_blk_cnt_read(struct sbusfpga_exchange_w static inline void exchange_with_mem_blk_cnt_write(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t v) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x14L, v); } +#define CSR_EXCHANGE_WITH_MEM_BLK_CNT_BLK_CNT_OFFSET 0 +#define CSR_EXCHANGE_WITH_MEM_BLK_CNT_BLK_CNT_SIZE 16 +static inline uint32_t exchange_with_mem_blk_cnt_blk_cnt_extract(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 16)-1); + return ( (oldword >> 0) & mask ); +} +static inline uint32_t exchange_with_mem_blk_cnt_blk_cnt_read(struct sbusfpga_exchange_with_mem_softc *sc) { + uint32_t word = exchange_with_mem_blk_cnt_read(sc); + return exchange_with_mem_blk_cnt_blk_cnt_extract(sc, word); +} +static inline uint32_t exchange_with_mem_blk_cnt_blk_cnt_replace(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 16)-1); + return (oldword & (~(mask << 0))) | (mask & plain_value)<< 0 ; +} +static inline void exchange_with_mem_blk_cnt_blk_cnt_write(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t plain_value) { + uint32_t oldword = exchange_with_mem_blk_cnt_read(sc); + uint32_t newword = exchange_with_mem_blk_cnt_blk_cnt_replace(sc, oldword, plain_value); + exchange_with_mem_blk_cnt_write(sc, newword); +} +#define CSR_EXCHANGE_WITH_MEM_BLK_CNT_RSVD_OFFSET 16 +#define CSR_EXCHANGE_WITH_MEM_BLK_CNT_RSVD_SIZE 15 +static inline uint32_t exchange_with_mem_blk_cnt_rsvd_extract(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 15)-1); + return ( (oldword >> 16) & mask ); +} +static inline uint32_t exchange_with_mem_blk_cnt_rsvd_read(struct sbusfpga_exchange_with_mem_softc *sc) { + uint32_t word = exchange_with_mem_blk_cnt_read(sc); + return exchange_with_mem_blk_cnt_rsvd_extract(sc, word); +} +static inline uint32_t exchange_with_mem_blk_cnt_rsvd_replace(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 15)-1); + return (oldword & (~(mask << 16))) | (mask & plain_value)<< 16 ; +} +static inline void exchange_with_mem_blk_cnt_rsvd_write(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t plain_value) { + uint32_t oldword = exchange_with_mem_blk_cnt_read(sc); + uint32_t newword = exchange_with_mem_blk_cnt_rsvd_replace(sc, oldword, plain_value); + exchange_with_mem_blk_cnt_write(sc, newword); +} +#define CSR_EXCHANGE_WITH_MEM_BLK_CNT_RD_WR_OFFSET 31 +#define CSR_EXCHANGE_WITH_MEM_BLK_CNT_RD_WR_SIZE 1 +static inline uint32_t exchange_with_mem_blk_cnt_rd_wr_extract(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 31) & mask ); +} +static inline uint32_t exchange_with_mem_blk_cnt_rd_wr_read(struct sbusfpga_exchange_with_mem_softc *sc) { + uint32_t word = exchange_with_mem_blk_cnt_read(sc); + return exchange_with_mem_blk_cnt_rd_wr_extract(sc, word); +} +static inline uint32_t exchange_with_mem_blk_cnt_rd_wr_replace(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 1)-1); + return (oldword & (~(mask << 31))) | (mask & plain_value)<< 31 ; +} +static inline void exchange_with_mem_blk_cnt_rd_wr_write(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t plain_value) { + uint32_t oldword = exchange_with_mem_blk_cnt_read(sc); + uint32_t newword = exchange_with_mem_blk_cnt_rd_wr_replace(sc, oldword, plain_value); + exchange_with_mem_blk_cnt_write(sc, newword); +} #define CSR_EXCHANGE_WITH_MEM_LAST_BLK_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x18L) #define CSR_EXCHANGE_WITH_MEM_LAST_BLK_SIZE 1 static inline uint32_t exchange_with_mem_last_blk_read(struct sbusfpga_exchange_with_mem_softc *sc) { @@ -602,21 +659,83 @@ static inline uint32_t exchange_with_mem_last_blk_read(struct sbusfpga_exchange_ static inline uint32_t exchange_with_mem_last_dma_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x1cL); } -#define CSR_EXCHANGE_WITH_MEM_BLK_REM_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x20L) -#define CSR_EXCHANGE_WITH_MEM_BLK_REM_SIZE 1 -static inline uint32_t exchange_with_mem_blk_rem_read(struct sbusfpga_exchange_with_mem_softc *sc) { +#define CSR_EXCHANGE_WITH_MEM_DMA_WRDONE_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x20L) +#define CSR_EXCHANGE_WITH_MEM_DMA_WRDONE_SIZE 1 +static inline uint32_t exchange_with_mem_dma_wrdone_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x20L); } -#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x24L) -#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_SIZE 1 -static inline uint32_t exchange_with_mem_dma_status_read(struct sbusfpga_exchange_with_mem_softc *sc) { +#define CSR_EXCHANGE_WITH_MEM_BLK_REM_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x24L) +#define CSR_EXCHANGE_WITH_MEM_BLK_REM_SIZE 1 +static inline uint32_t exchange_with_mem_blk_rem_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x24L); } -#define CSR_EXCHANGE_WITH_MEM_WR_TOSDRAM_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x28L) -#define CSR_EXCHANGE_WITH_MEM_WR_TOSDRAM_SIZE 1 -static inline uint32_t exchange_with_mem_wr_tosdram_read(struct sbusfpga_exchange_with_mem_softc *sc) { +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x28L) +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_SIZE 1 +static inline uint32_t exchange_with_mem_dma_status_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x28L); } +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_RD_FSM_BUSY_OFFSET 0 +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_RD_FSM_BUSY_SIZE 1 +static inline uint32_t exchange_with_mem_dma_status_rd_fsm_busy_extract(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 0) & mask ); +} +static inline uint32_t exchange_with_mem_dma_status_rd_fsm_busy_read(struct sbusfpga_exchange_with_mem_softc *sc) { + uint32_t word = exchange_with_mem_dma_status_read(sc); + return exchange_with_mem_dma_status_rd_fsm_busy_extract(sc, word); +} +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_WR_FSM_BUSY_OFFSET 1 +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_WR_FSM_BUSY_SIZE 1 +static inline uint32_t exchange_with_mem_dma_status_wr_fsm_busy_extract(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 1) & mask ); +} +static inline uint32_t exchange_with_mem_dma_status_wr_fsm_busy_read(struct sbusfpga_exchange_with_mem_softc *sc) { + uint32_t word = exchange_with_mem_dma_status_read(sc); + return exchange_with_mem_dma_status_wr_fsm_busy_extract(sc, word); +} +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_HAS_WR_DATA_OFFSET 2 +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_HAS_WR_DATA_SIZE 1 +static inline uint32_t exchange_with_mem_dma_status_has_wr_data_extract(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 2) & mask ); +} +static inline uint32_t exchange_with_mem_dma_status_has_wr_data_read(struct sbusfpga_exchange_with_mem_softc *sc) { + uint32_t word = exchange_with_mem_dma_status_read(sc); + return exchange_with_mem_dma_status_has_wr_data_extract(sc, word); +} +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_HAS_REQUESTS_OFFSET 3 +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_HAS_REQUESTS_SIZE 1 +static inline uint32_t exchange_with_mem_dma_status_has_requests_extract(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 3) & mask ); +} +static inline uint32_t exchange_with_mem_dma_status_has_requests_read(struct sbusfpga_exchange_with_mem_softc *sc) { + uint32_t word = exchange_with_mem_dma_status_read(sc); + return exchange_with_mem_dma_status_has_requests_extract(sc, word); +} +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_HAS_RD_DATA_OFFSET 4 +#define CSR_EXCHANGE_WITH_MEM_DMA_STATUS_HAS_RD_DATA_SIZE 1 +static inline uint32_t exchange_with_mem_dma_status_has_rd_data_extract(struct sbusfpga_exchange_with_mem_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 4) & mask ); +} +static inline uint32_t exchange_with_mem_dma_status_has_rd_data_read(struct sbusfpga_exchange_with_mem_softc *sc) { + uint32_t word = exchange_with_mem_dma_status_read(sc); + return exchange_with_mem_dma_status_has_rd_data_extract(sc, word); +} +#define CSR_EXCHANGE_WITH_MEM_WR_TOSDRAM_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x2cL) +#define CSR_EXCHANGE_WITH_MEM_WR_TOSDRAM_SIZE 1 +static inline uint32_t exchange_with_mem_wr_tosdram_read(struct sbusfpga_exchange_with_mem_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x2cL); +} +#define CSR_EXCHANGE_WITH_MEM_SBUS_MASTER_ERROR_VIRTUAL_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x30L) +#define CSR_EXCHANGE_WITH_MEM_SBUS_MASTER_ERROR_VIRTUAL_SIZE 1 +static inline uint32_t exchange_with_mem_sbus_master_error_virtual_read(struct sbusfpga_exchange_with_mem_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x30L); +} +#define CSR_EXCHANGE_WITH_MEM_CHECKSUM_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x34L) +#define CSR_EXCHANGE_WITH_MEM_CHECKSUM_SIZE 8 #endif // CSR_EXCHANGE_WITH_MEM_BASE /* sdram */ diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py index 828f0f5..df1d9ff 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py @@ -10,7 +10,7 @@ from litex.soc.interconnect import wishbone # width of fromsbus_fifo is 'blk_addr_width' + 'burst_size * 32' (blk_addr + data) # the blk_addr does the round-trip to accompany the data class ExchangeWithMem(Module, AutoCSR): - def __init__(self, soc, tosbus_fifo, fromsbus_fifo, fromsbus_req_fifo, dram_dma_writer, dram_dma_reader, burst_size = 8): + def __init__(self, soc, tosbus_fifo, fromsbus_fifo, fromsbus_req_fifo, dram_dma_writer, dram_dma_reader, burst_size = 8, do_checksum = False): #self.wishbone_r_slave = wishbone.Interface(data_width=soc.bus.data_width) #self.wishbone_w_slave = wishbone.Interface(data_width=soc.bus.data_width) self.tosbus_fifo = tosbus_fifo @@ -61,34 +61,59 @@ class ExchangeWithMem(Module, AutoCSR): self.blk_addr = CSRStorage(32, description = "SDRAM Block address to read/write from Wishbone memory (block of size {})".format(data_width)) self.dma_addr = CSRStorage(32, description = "Host Base address where to write/read data (i.e. SPARC Virtual addr)") - self.blk_cnt = CSRStorage(32, write_from_dev=True, description = "How many blk to read/write (max 2^{}-1); bit 31 is RD".format(max_block_bits), reset = 0) + #self.blk_cnt = CSRStorage(32, write_from_dev=True, description = "How many blk to read/write (max 2^{}-1); bit 31 is RD".format(max_block_bits), reset = 0) + self.blk_cnt = CSRStorage(write_from_dev=True, fields = [CSRField("blk_cnt", max_block_bits, description = "How many blk to read/write (max 2^{}-1)".format(max_block_bits)), + CSRField("rsvd", 32 - (max_block_bits + 1), description = "Reserved"), + CSRField("rd_wr", 1, description = "Read/Write selector"), + ]) self.last_blk = CSRStatus(32, description = "Last Blk addr finished on WB side") self.last_dma = CSRStatus(32, description = "Last DMA addr finished on WB side") + self.dma_wrdone = CSRStatus(32, description = "DMA Block written to SDRAM", reset = 0) self.blk_rem = CSRStatus(32, description = "How many block remaining; bit 31 is RD", reset = 0) - self.dma_status = CSRStatus(32, description = "Status register") + self.dma_status = CSRStatus(fields = [CSRField("rd_fsm_busy", 1, description = "Read FSM is doing some work"), + CSRField("wr_fsm_busy", 1, description = "Write FSM is doing some work"), + CSRField("has_wr_data", 1, description = "Data available to write to SDRAM"), + CSRField("has_requests", 1, description = "There's outstanding requests to the SBus"), + CSRField("has_rd_data", 1, description = "Data available to write to SBus"), + ]) self.wr_tosdram = CSRStatus(32, description = "Last address written to SDRAM") + self.sbus_master_error_virtual = CSRStatus(32, description = "Virtual address that failed translation phase") + + if (do_checksum): + self.checksum = CSRStorage(data_width_bits, write_from_dev=True, description = "checksum (XOR)"); + self.submodules.req_r_fsm = req_r_fsm = FSM(reset_state="Reset") self.submodules.req_w_fsm = req_w_fsm = FSM(reset_state="Reset") - # this could use CSRFields... - self.comb += self.dma_status.status[0:1].eq(~req_r_fsm.ongoing("Idle")) # Read FSM Busy - self.comb += self.dma_status.status[1:2].eq(~req_w_fsm.ongoing("Idle")) # Write FSM Busy - self.comb += self.dma_status.status[2:3].eq(self.fromsbus_fifo.readable) # Some data available to write to memory + self.comb += self.dma_status.fields.rd_fsm_busy.eq(~req_r_fsm.ongoing("Idle")) # Read FSM Busy + self.comb += self.dma_status.fields.wr_fsm_busy.eq(~req_w_fsm.ongoing("Idle")) # Write FSM Busy + self.comb += self.dma_status.fields.has_wr_data.eq(self.fromsbus_fifo.readable) # Some data available to write to memory + + # The next two status bits reflect stats in the SBus clock domain self.submodules.fromsbus_req_fifo_readable_sync = PulseSynchronizer("sbus", "sys") fromsbus_req_fifo_readable_in_sys = Signal() self.comb += self.fromsbus_req_fifo_readable_sync.i.eq(self.fromsbus_req_fifo.readable) self.comb += fromsbus_req_fifo_readable_in_sys.eq(self.fromsbus_req_fifo_readable_sync.o) - self.comb += self.dma_status.status[3:4].eq(fromsbus_req_fifo_readable_in_sys) # we still have outstanding requests + + # w/o this extra delay, the driver sees an outdated checksum for some reason... + # there's probably a more fundamental issue :-( + fromsbus_req_fifo_readable_in_sys_cnt = Signal(5) + self.sync += If(fromsbus_req_fifo_readable_in_sys, + fromsbus_req_fifo_readable_in_sys_cnt.eq(0x1F) + ).Else( + If(fromsbus_req_fifo_readable_in_sys_cnt > 0, + fromsbus_req_fifo_readable_in_sys_cnt.eq(fromsbus_req_fifo_readable_in_sys_cnt - 1) + ) + ) + #self.comb += self.dma_status.fields.has_requests.eq(fromsbus_req_fifo_readable_in_sys) # we still have outstanding requests + self.comb += self.dma_status.fields.has_requests.eq(fromsbus_req_fifo_readable_in_sys | (fromsbus_req_fifo_readable_in_sys_cnt != 0)) # we still have outstanding requests, or had recently + self.submodules.tosbus_fifo_readable_sync = PulseSynchronizer("sbus", "sys") tosbus_fifo_readable_in_sys = Signal() self.comb += self.tosbus_fifo_readable_sync.i.eq(self.tosbus_fifo.readable) self.comb += tosbus_fifo_readable_in_sys.eq(self.tosbus_fifo_readable_sync.o) - self.comb += self.dma_status.status[4:5].eq(tosbus_fifo_readable_in_sys) # there's still data to be sent to memory; this will drop before the last SBus Master Cycle is finished, but then the SBus is busy so the host won't be able to read the status before the cycle is finished so we're good - - self.comb += self.dma_status.status[8:9].eq(req_r_fsm.ongoing("ReqFromMemory")) - self.comb += self.dma_status.status[9:10].eq(req_r_fsm.ongoing("WaitForData")) - self.comb += self.dma_status.status[10:11].eq(req_r_fsm.ongoing("QueueReqToMemory")) + self.comb += self.dma_status.fields.has_rd_data.eq(tosbus_fifo_readable_in_sys) # there's still data to be sent to memory; this will drop before the last SBus Master Cycle is finished, but then the SBus is busy so the host won't be able to read the status before the cycle is finished so we're good #self.comb += self.dma_status.status[16:17].eq(self.wishbone_w_master.cyc) # show the WB iface status (W) #self.comb += self.dma_status.status[17:18].eq(self.wishbone_w_master.stb) @@ -106,17 +131,17 @@ class ExchangeWithMem(Module, AutoCSR): NextState("Idle") ) req_r_fsm.act("Idle", - If(((self.blk_cnt.storage[0:max_block_bits] != 0) & # checking self.blk_cnt.re might be too transient ? -> need to auto-reset - (~self.blk_cnt.storage[31:32])), # !read -> write + If(((self.blk_cnt.fields.blk_cnt != 0) & # checking self.blk_cnt.re might be too transient ? -> need to auto-reset + (~self.blk_cnt.fields.rd_wr)), # !read -> write NextValue(local_r_addr, self.blk_addr.storage), NextValue(dma_r_addr, self.dma_addr.storage), - NextValue(self.blk_rem.status, Cat(self.blk_cnt.storage[0:max_block_bits], Signal(32-max_block_bits, reset = 0))), + NextValue(self.blk_rem.status, Cat(self.blk_cnt.fields.blk_cnt, Signal(32-max_block_bits, reset = 0))), NextState("ReqFromMemory") - ).Elif(((self.blk_cnt.storage[0:max_block_bits] != 0) & # checking self.blk_cnt.re might be too transient ? -> need to auto-reset - (self.blk_cnt.storage[31:32])), # read + ).Elif(((self.blk_cnt.fields.blk_cnt != 0) & # checking self.blk_cnt.re might be too transient ? -> need to auto-reset + (self.blk_cnt.fields.rd_wr)), # read NextValue(local_r_addr, self.blk_addr.storage), NextValue(dma_r_addr, self.dma_addr.storage), - NextValue(self.blk_rem.status, Cat(self.blk_cnt.storage[0:max_block_bits], Signal(32-max_block_bits, reset = 0))), + NextValue(self.blk_rem.status, Cat(self.blk_cnt.fields.blk_cnt, Signal(32-max_block_bits, reset = 0))), NextState("QueueReqToMemory") ) ) @@ -128,10 +153,13 @@ class ExchangeWithMem(Module, AutoCSR): ) ) req_r_fsm.act("WaitForData", - If(self.dram_dma_reader.source.valid & - self.tosbus_fifo.writable, + If(self.dram_dma_reader.source.valid & self.tosbus_fifo.writable, self.tosbus_fifo.we.eq(1), self.tosbus_fifo.din.eq(Cat(dma_r_addr, self.dram_dma_reader.source.data)), + If(do_checksum, + self.checksum.we.eq(1), + self.checksum.dat_w.eq(self.checksum.storage ^ self.dram_dma_reader.source.data), + ), self.dram_dma_reader.source.ready.eq(1), NextValue(self.last_blk.status, local_r_addr), NextValue(self.last_dma.status, dma_r_addr), @@ -203,7 +231,12 @@ class ExchangeWithMem(Module, AutoCSR): self.dram_dma_writer.sink.valid.eq(1), NextValue(self.wr_tosdram.status, self.fromsbus_fifo.dout[0:blk_addr_width]), If(self.dram_dma_writer.sink.ready, - self.fromsbus_fifo.re.eq(1) + self.fromsbus_fifo.re.eq(1), + NextValue(self.dma_wrdone.status, self.dma_wrdone.status + 1), + If(do_checksum, + self.checksum.we.eq(1), + self.checksum.dat_w.eq(self.checksum.storage ^ self.fromsbus_fifo.dout[blk_addr_width:(blk_addr_width + data_width_bits)]), + ) ) ) ) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index f311d52..8d5a274 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -274,15 +274,16 @@ class SBusFPGABus(Module): SBUS_3V3_PA_i = Signal(28) self.comb += SBUS_3V3_PA_i.eq(pad_SBUS_3V3_PA) - p_data = Signal(32) # data to read/write - - data_read_addr = Signal(30) # first addr of req. when reading from WB - data_read_enable = Signal() # start enqueuing req. to read from WB + p_data = Signal(32) # data to read/write in Slave mode + # buffers when someone inside issues a DMA write request to go over SBus master_data = Signal(32) # could be merged with p_data - master_data_src_tosbus_fifo = Signal() - master_data_src_fromsbus_fifo = Signal() master_addr = Signal(30) # could be meged with data_read_addr + # FIXME, ugly + # we're handling a request from the FIFO (not wishbone) - write to host + master_data_src_tosbus_fifo = Signal() + # we're handling a request from the FIFO (not wishbone) - read from host + master_data_src_fromsbus_fifo = Signal() master_size = Signal(4) master_idx = Signal(2) @@ -298,10 +299,10 @@ class SBusFPGABus(Module): #self.submodules.led_display = LedDisplay(platform.request_all("user_led")) - self.sync += platform.request("user_led", 4).eq(self.wishbone_slave.cyc) - self.sync += platform.request("user_led", 5).eq(self.wishbone_slave.stb) - self.sync += platform.request("user_led", 6).eq(self.wishbone_slave.we) - self.sync += platform.request("user_led", 7).eq(self.wishbone_slave.ack) + #self.sync += platform.request("user_led", 4).eq(self.wishbone_slave.cyc) + #self.sync += platform.request("user_led", 5).eq(self.wishbone_slave.stb) + #self.sync += platform.request("user_led", 6).eq(self.wishbone_slave.we) + #self.sync += platform.request("user_led", 7).eq(self.wishbone_slave.ack) #self.sync += platform.request("user_led", 0).eq(self.wishbone_slave.err) #led4 = platform.request("user_led", 4) #led5 = platform.request("user_led", 5) @@ -342,11 +343,12 @@ class SBusFPGABus(Module): # platform.request("user_led", 2).eq(cycle_busmaster[6]), # platform.request("user_led", 3).eq(cycle_busmaster[7])) + # Read buffering when a DMA read request is issued by Wishbone self.master_read_buffer_data = Array(Signal(32) for a in range(4)) self.master_read_buffer_addr = Signal(28) self.master_read_buffer_done = Array(Signal() for a in range(4)) self.master_read_buffer_read = Array(Signal() for a in range(4)) - self.master_read_buffer_start = Signal() + self.master_read_buffer_start = Signal(reset = 0) #self.sync += platform.request("user_led", 1).eq(self.master_read_buffer_start) @@ -363,23 +365,37 @@ class SBusFPGABus(Module): # slave_fsm.ongoing("Master_Read_Finish") | # slave_fsm.ongoing("Master_Write") | # slave_fsm.ongoing("Master_Write_Final")) - self.sync += platform.request("user_led", 2).eq(slave_fsm.ongoing("Slave_Do_Read") | - slave_fsm.ongoing("Slave_Ack_Read_Reg_Burst") | - slave_fsm.ongoing("Slave_Ack_Read_Reg_Burst_Wait_For_Data") | - slave_fsm.ongoing("Slave_Ack_Read_Reg_Burst_Wait_For_Wishbone") | - slave_fsm.ongoing("Slave_Ack_Read_Reg_HWord") | - slave_fsm.ongoing("Slave_Ack_Read_Reg_HWord_Wait_For_Data") | - slave_fsm.ongoing("Slave_Ack_Read_Reg_HWord_Wait_For_Wishbone") | - slave_fsm.ongoing("Slave_Ack_Read_Reg_Byte") | - slave_fsm.ongoing("Slave_Ack_Read_Reg_Byte_Wait_For_Data") | - slave_fsm.ongoing("Slave_Ack_Read_Reg_Byte_Wait_For_Wishbone")) - self.sync += platform.request("user_led", 3).eq(slave_fsm.ongoing("Slave_Ack_Reg_Write_Burst") | - slave_fsm.ongoing("Slave_Ack_Reg_Write_Final") | - slave_fsm.ongoing("Slave_Ack_Reg_Write_Burst_Wait_For_Wishbone") | - slave_fsm.ongoing("Slave_Ack_Reg_Write_HWord") | - slave_fsm.ongoing("Slave_Ack_Reg_Write_HWord_Wait_For_Wishbone") | - slave_fsm.ongoing("Slave_Ack_Reg_Write_Byte") | - slave_fsm.ongoing("Slave_Ack_Reg_Write_Byte_Wait_For_Wishbone")) + #self.sync += platform.request("user_led", 2).eq(slave_fsm.ongoing("Slave_Do_Read") | + # slave_fsm.ongoing("Slave_Ack_Read_Reg_Burst") | + # slave_fsm.ongoing("Slave_Ack_Read_Reg_Burst_Wait_For_Data") | + # slave_fsm.ongoing("Slave_Ack_Read_Reg_Burst_Wait_For_Wishbone") | + # slave_fsm.ongoing("Slave_Ack_Read_Reg_HWord") | + # slave_fsm.ongoing("Slave_Ack_Read_Reg_HWord_Wait_For_Data") | + # slave_fsm.ongoing("Slave_Ack_Read_Reg_HWord_Wait_For_Wishbone") | + # slave_fsm.ongoing("Slave_Ack_Read_Reg_Byte") | + # slave_fsm.ongoing("Slave_Ack_Read_Reg_Byte_Wait_For_Data") | + # slave_fsm.ongoing("Slave_Ack_Read_Reg_Byte_Wait_For_Wishbone")) + #self.sync += platform.request("user_led", 3).eq(slave_fsm.ongoing("Slave_Ack_Reg_Write_Burst") | + # slave_fsm.ongoing("Slave_Ack_Reg_Write_Final") | + # slave_fsm.ongoing("Slave_Ack_Reg_Write_Burst_Wait_For_Wishbone") | + # slave_fsm.ongoing("Slave_Ack_Reg_Write_HWord") | + # slave_fsm.ongoing("Slave_Ack_Reg_Write_HWord_Wait_For_Wishbone") | + # slave_fsm.ongoing("Slave_Ack_Reg_Write_Byte") | + # slave_fsm.ongoing("Slave_Ack_Reg_Write_Byte_Wait_For_Wishbone")) + + master_error_seen = Signal(4, reset = 0) + self.sync += platform.request("user_led", 0).eq(master_error_seen[0:1]) + self.sync += platform.request("user_led", 1).eq(master_error_seen[1:2]) + self.sync += platform.request("user_led", 2).eq(master_error_seen[2:3]) + self.sync += platform.request("user_led", 3).eq(master_error_seen[3:4]) + master_error_details = Signal(4, reset = 0) + self.sync += platform.request("user_led", 4).eq(master_error_details[0:1]) + self.sync += platform.request("user_led", 5).eq(master_error_details[1:2]) + self.sync += platform.request("user_led", 6).eq(master_error_details[2:3]) + self.sync += platform.request("user_led", 7).eq(master_error_details[3:4]) + + self.sbus_master_last_virtual = Signal(32) # last VDMA address put on the bus in master mode + self.sbus_master_error_virtual = Signal(32) # this gets exported to a Wishbone CSR in exchange_with_mem #self.sync += platform.request("user_led", 5).eq(~slave_fsm.ongoing("Idle")) #self.sync += platform.request("user_led", 6).eq(master_data_src_tosbus_fifo) @@ -665,40 +681,47 @@ class SBusFPGABus(Module): self.wishbone_slave.dat_w[ 0: 8])), Case(self.wishbone_slave.sel, { 0xf: [NextValue(burst_counter, 0), - NextValue(burst_limit_m1, 0), ## only single word for now - NextValue(master_size, SIZ_WORD), - NextValue(SBUS_3V3_SIZ_o, SIZ_WORD), - NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), + NextValue(burst_limit_m1, 0), ## only single word for now + NextValue(master_size, SIZ_WORD), + NextValue(SBUS_3V3_SIZ_o, SIZ_WORD), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), + NextValue(self.sbus_master_last_virtual, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), ], 0x1: [NextValue(master_idx, 3), - NextValue(master_size, SIZ_BYTE), - NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), - NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), + NextValue(master_size, SIZ_BYTE), + NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), + NextValue(self.sbus_master_last_virtual, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), ], 0x2: [NextValue(master_idx, 2), - NextValue(master_size, SIZ_BYTE), - NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), - NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 1), self.wishbone_slave.adr)), + NextValue(master_size, SIZ_BYTE), + NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 1), self.wishbone_slave.adr)), + NextValue(self.sbus_master_last_virtual, Cat(Signal(2, reset = 1), self.wishbone_slave.adr)), ], 0x4: [NextValue(master_idx, 1), - NextValue(master_size, SIZ_BYTE), - NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), - NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 2), self.wishbone_slave.adr)), + NextValue(master_size, SIZ_BYTE), + NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 2), self.wishbone_slave.adr)), + NextValue(self.sbus_master_last_virtual, Cat(Signal(2, reset = 2), self.wishbone_slave.adr)), ], 0x8: [NextValue(master_idx, 0), - NextValue(master_size, SIZ_BYTE), - NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), - NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 3), self.wishbone_slave.adr)), + NextValue(master_size, SIZ_BYTE), + NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 3), self.wishbone_slave.adr)), + NextValue(self.sbus_master_last_virtual, Cat(Signal(2, reset = 3), self.wishbone_slave.adr)), ], 0x3: [NextValue(master_idx, 2), - NextValue(master_size, SIZ_HWORD), - NextValue(SBUS_3V3_SIZ_o, SIZ_HWORD), - NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), + NextValue(master_size, SIZ_HWORD), + NextValue(SBUS_3V3_SIZ_o, SIZ_HWORD), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), + NextValue(self.sbus_master_last_virtual, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), ], 0xc: [NextValue(master_idx, 0), - NextValue(master_size, SIZ_HWORD), - NextValue(SBUS_3V3_SIZ_o, SIZ_HWORD), - NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 2), self.wishbone_slave.adr)), + NextValue(master_size, SIZ_HWORD), + NextValue(SBUS_3V3_SIZ_o, SIZ_HWORD), + NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 2), self.wishbone_slave.adr)), + NextValue(self.sbus_master_last_virtual, Cat(Signal(2, reset = 2), self.wishbone_slave.adr)), ], "default":[NextValue(burst_counter, 0), # FIXME if it happens! NextValue(burst_limit_m1, 0), ## only single word for now @@ -730,6 +753,7 @@ class SBusFPGABus(Module): NextValue(burst_counter, 0), NextValue(burst_limit_m1, 3), ## only quadword word for now NextValue(SBUS_3V3_D_o, Cat(Signal(4, reset = 0), self.master_read_buffer_addr)), + NextValue(self.sbus_master_last_virtual, Cat(Signal(4, reset = 0), self.master_read_buffer_addr)), NextValue(SBUS_3V3_PPRD_o, 1), NextValue(SBUS_3V3_SIZ_o, SIZ_BURST4), NextValue(master_we, 0), @@ -750,6 +774,7 @@ class SBusFPGABus(Module): NextValue(burst_counter, 0), NextValue(burst_limit_m1, burst_size - 1), NextValue(SBUS_3V3_D_o, self.tosbus_fifo.dout[0:32]), + NextValue(self.sbus_master_last_virtual, self.tosbus_fifo.dout[0:32]), NextValue(master_addr, self.tosbus_fifo.dout[2:32]), NextValue(master_data, self.tosbus_fifo.dout[32:64]), NextValue(fifo_buffer, self.tosbus_fifo.dout[32:]), @@ -784,6 +809,7 @@ class SBusFPGABus(Module): NextValue(burst_counter, 0), NextValue(burst_limit_m1, burst_size - 1), NextValue(SBUS_3V3_D_o, self.fromsbus_req_fifo.dout[blk_addr_width:blk_addr_width+32]), + NextValue(self.sbus_master_last_virtual, self.fromsbus_req_fifo.dout[blk_addr_width:blk_addr_width+32]), NextValue(fifo_blk_addr, self.fromsbus_req_fifo.dout[0:blk_addr_width]), NextValue(master_data_src_fromsbus_fifo, 1), self.fromsbus_req_fifo.re.eq(1), @@ -1199,15 +1225,20 @@ class SBusFPGABus(Module): ), Case(SBUS_3V3_ACKs_i, { ACK_ERR: ## ouch - [NextValue(wishbone_slave_timeout, wishbone_default_timeout), - NextValue(self.wishbone_slave.err, 1), + [If(~master_data_src_tosbus_fifo & ~master_data_src_fromsbus_fifo, + NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + ), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), + NextValue(master_error_seen, 1), NextState("Idle")], ACK_RERUN: ### dunno how to handle that yet, - [NextValue(wishbone_slave_timeout, wishbone_default_timeout), - NextValue(self.wishbone_slave.err, 1), + [If(~master_data_src_tosbus_fifo & ~master_data_src_fromsbus_fifo, + NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + ), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), @@ -1240,19 +1271,38 @@ class SBusFPGABus(Module): [NextState("Master_Read") ## redundant ], ACK_RERUN: ### burst not handled - [NextValue(wishbone_slave_timeout, wishbone_default_timeout), - NextValue(self.wishbone_slave.err, 1), + [If(~master_data_src_tosbus_fifo & ~master_data_src_fromsbus_fifo, + NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + ), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), NextState("Idle") ], - "default": ## ACK_ERRS or other ### burst not handled - [NextValue(wishbone_slave_timeout, wishbone_default_timeout), - NextValue(self.wishbone_slave.err, 1), + ACK_ERR: ## ### burst not handled + [If(~master_data_src_tosbus_fifo & ~master_data_src_fromsbus_fifo, + NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + ), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), + NextValue(master_error_seen, 8), + NextValue(master_error_details, burst_counter), + NextValue(self.sbus_master_error_virtual, self.sbus_master_last_virtual), + NextState("Idle") + ], + "default": ## other ### burst not handled + [If(~master_data_src_tosbus_fifo & ~master_data_src_fromsbus_fifo, + NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + ), + NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextValue(master_error_seen, 4), + NextValue(master_error_details, Cat(SBUS_3V3_ACKs_i, Signal(1, reset = 0))), NextState("Idle") ], }) @@ -1302,6 +1352,7 @@ class SBusFPGABus(Module): [NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), + NextValue(master_error_seen, 1), NextState("Idle") ], }), @@ -1372,7 +1423,8 @@ class SBusFPGABus(Module): [NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), - NextState("Idle") + NextValue(master_error_seen, 1), + NextState("Idle"), ], }) ) @@ -1463,8 +1515,8 @@ class SBusFPGABus(Module): # ##### Slave read buffering FSM #### last_read_word_idx = Signal(2) self.submodules.wishbone_slave_read_buffering_fsm = wishbone_slave_read_buffering_fsm = FSM(reset_state="Reset") - self.sync += platform.request("user_led", 0).eq(~wishbone_slave_read_buffering_fsm.ongoing("Idle")) - self.sync += platform.request("user_led", 1).eq(self.master_read_buffer_done[last_read_word_idx]) + #self.sync += platform.request("user_led", 0).eq(~wishbone_slave_read_buffering_fsm.ongoing("Idle")) + #self.sync += platform.request("user_led", 1).eq(self.master_read_buffer_done[last_read_word_idx]) wishbone_slave_read_buffering_fsm.act("Reset", NextState("Idle") ) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 81de884..afe5324 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -23,6 +23,7 @@ from sbus_to_fpga_trng import * from litedram.frontend.dma import * from engine import Engine; +from migen.genlib.cdc import PulseSynchronizer, BusSynchronizer from migen.genlib.resetsync import AsyncResetSynchronizer; import sbus_to_fpga_export; @@ -40,9 +41,10 @@ class _CRG(Module): self.clock_domains.cd_sbus = ClockDomain() # 16.67-25 MHz SBus, reset'ed by SBus, native SBus clock domain # self.clock_domains.cd_por = ClockDomain() # 48 MHz native, reset'ed by SBus, power-on-reset timer self.clock_domains.cd_usb = ClockDomain() # 48 MHZ PLL, reset'ed by SBus (via pll), for USB controller - self.clock_domains.cd_clk50 = ClockDomain() # 50 MHz for curve25519engine -> eng_clk - self.clock_domains.cd_clk100 = ClockDomain() # 100 MHz for curve25519engine -> mul_clk - self.clock_domains.cd_clk200 = ClockDomain() # 200 MHz for curve25519engine -> rf_clk + self.clock_domains.cd_clk50 = ClockDomain() # 50 MHz (gated) for curve25519engine -> eng_clk + self.clock_domains.cd_clk100 = ClockDomain() # 100 MHz for curve25519engine -> sys_clk + self.clock_domains.cd_clk100_gated = ClockDomain() # 100 MHz (gated) for curve25519engine -> mul_clk + self.clock_domains.cd_clk200 = ClockDomain() # 200 MHz (gated) for curve25519engine -> rf_clk # # # clk48 = platform.request("clk48") @@ -76,21 +78,22 @@ class _CRG(Module): platform.add_platform_command("create_generated_clock -name sys4x90clk [get_pins {{MMCME2_ADV/CLKOUT2}}]") self.comb += pll.reset.eq(~rst_sbus) # | ~por_done platform.add_false_path_constraints(self.cd_native.clk, self.cd_sbus.clk) - platform.add_false_path_constraints(self.cd_sys.clk, self.cd_sbus.clk) platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_native.clk) - platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_sys.clk) + #platform.add_false_path_constraints(self.cd_sys.clk, self.cd_sbus.clk) + #platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_sys.clk) ##platform.add_false_path_constraints(self.cd_native.clk, self.cd_sys.clk) - self.submodules.curve25519_pll = curve25519_pll = S7MMCM(speedgrade=-1) curve25519_clk_freq = 80e6 + self.curve25519_on = Signal() #curve25519_pll.register_clkin(clk48, 48e6) curve25519_pll.register_clkin(self.clk48_bufg, 48e6) - curve25519_pll.create_clkout(self.cd_clk50, curve25519_clk_freq/2, margin=0) + curve25519_pll.create_clkout(self.cd_clk50, curve25519_clk_freq/2, margin=0, ce=curve25519_pll.locked & self.curve25519_on) platform.add_platform_command("create_generated_clock -name clk50 [get_pins {{MMCME2_ADV_1/CLKOUT0}}]") - curve25519_pll.create_clkout(self.cd_clk100, curve25519_clk_freq, margin=0) + curve25519_pll.create_clkout(self.cd_clk100, curve25519_clk_freq, margin=0, ce=curve25519_pll.locked, + gated_replicas={self.cd_clk100_gated : curve25519_pll.locked & self.curve25519_on}) platform.add_platform_command("create_generated_clock -name clk100 [get_pins {{MMCME2_ADV_1/CLKOUT1}}]") - curve25519_pll.create_clkout(self.cd_clk200, curve25519_clk_freq*2, margin=0) + curve25519_pll.create_clkout(self.cd_clk200, curve25519_clk_freq*2, margin=0, ce=curve25519_pll.locked & self.curve25519_on) platform.add_platform_command("create_generated_clock -name clk200 [get_pins {{MMCME2_ADV_1/CLKOUT2}}]") #self.comb += curve25519_pll.reset.eq(~rst_sbus) # | ~por_done platform.add_false_path_constraints(self.cd_sys.clk, self.cd_clk50.clk) @@ -234,9 +237,9 @@ class SBusFPGA(SoCCore): # burst_size=16 should work on Ultra systems, but then they probably should go for 64-bits ET as well... # Older systems are probably limited to burst_size=4, (it should always be available) burst_size=8 - self.submodules.tosbus_fifo = ClockDomainsRenamer({"read": "sbus", "write": "sys"})(AsyncFIFOBuffered(width=(32+burst_size*32), depth=4)) - self.submodules.fromsbus_fifo = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(AsyncFIFOBuffered(width=((30-log2_int(burst_size))+burst_size*32), depth=4)) - self.submodules.fromsbus_req_fifo = ClockDomainsRenamer({"read": "sbus", "write": "sys"})(AsyncFIFOBuffered(width=((30-log2_int(burst_size))+32), depth=16)) + self.submodules.tosbus_fifo = ClockDomainsRenamer({"read": "sbus", "write": "sys"})(AsyncFIFOBuffered(width=(32+burst_size*32), depth=burst_size)) + self.submodules.fromsbus_fifo = ClockDomainsRenamer({"write": "sbus", "read": "sys"})(AsyncFIFOBuffered(width=((30-log2_int(burst_size))+burst_size*32), depth=burst_size)) + self.submodules.fromsbus_req_fifo = ClockDomainsRenamer({"read": "sbus", "write": "sys"})(AsyncFIFOBuffered(width=((30-log2_int(burst_size))+32), depth=burst_size)) self.submodules.dram_dma_writer = LiteDRAMDMAWriter(port=self.sdram.crossbar.get_port(mode="write", data_width=burst_size*32), fifo_depth=4, @@ -252,7 +255,8 @@ class SBusFPGA(SoCCore): fromsbus_req_fifo=self.fromsbus_req_fifo, dram_dma_writer=self.dram_dma_writer, dram_dma_reader=self.dram_dma_reader, - burst_size=burst_size) + burst_size=burst_size, + do_checksum = True) _sbus_bus = SBusFPGABus(platform=self.platform, hold_reset=hold_reset, @@ -269,6 +273,10 @@ class SBusFPGA(SoCCore): self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_slave_sys, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) #self.bus.add_master(name="mem_read_master", master=self.exchange_with_mem.wishbone_r_slave) #self.bus.add_master(name="mem_write_master", master=self.exchange_with_mem.wishbone_w_slave) + + self.submodules.sbus_master_error_virtual_sync = BusSynchronizer(width=32, idomain="sbus", odomain="sys") + self.comb += self.sbus_master_error_virtual_sync.i.eq(self.sbus_bus.sbus_master_error_virtual) + self.comb += self.exchange_with_mem.sbus_master_error_virtual.status.eq(self.sbus_master_error_virtual_sync.o) #self.add_sdcard() @@ -277,10 +285,13 @@ class SBusFPGA(SoCCore): # beware the naming, as 'clk50' 'sysclk' 'clk200' are used in the original platform constraints # the local engine.py was slightly modified to have configurable names, so we can have 'clk50', 'clk100', 'clk200' # Beware that Engine implicitely runs in 'sys' by default, need to rename that one as well - self.submodules.curve25519engine = ClockDomainsRenamer({"eng_clk":"clk50", "rf_clk":"clk200", "mul_clk":"clk100", "sys":"clk100"})(Engine(platform=platform,prefix=self.mem_map.get("curve25519engine", None))) + self.submodules.curve25519engine = ClockDomainsRenamer({"eng_clk":"clk50", "rf_clk":"clk200", "mul_clk":"clk100_gated", "sys":"clk100"})(Engine(platform=platform,prefix=self.mem_map.get("curve25519engine", None))) self.submodules.curve25519engine_wishbone_cdc = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=self.curve25519engine.bus, cd_master="sys", cd_slave="clk100") self.bus.add_slave("curve25519engine", self.curve25519engine_wishbone_cdc, SoCRegion(origin=self.mem_map.get("curve25519engine", None), size=0x20000, cached=False)) #self.bus.add_slave("curve25519engine", self.curve25519engine.bus, SoCRegion(origin=self.mem_map.get("curve25519engine", None), size=0x20000, cached=False)) + self.submodules.curve25519_on_sync = PulseSynchronizer("clk100", "sys") + self.comb += self.curve25519_on_sync.i.eq(self.curve25519engine.power.fields.on) + self.comb += self.crg.curve25519_on.eq(self.curve25519_on_sync.o) def main(): parser = argparse.ArgumentParser(description="SbusFPGA") From 2111020a0cfa3d2ce4ad87fd7a4f8382a5491966 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 21 Aug 2021 07:41:02 -0400 Subject: [PATCH 59/78] add an SBus statistics module (and discover a bug in the sbus lave timeouts and a lot of slave re-run) --- .../sys/dev/sbus/sbusfpga_curve25519engine.c | 2 + .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c | 2 + .../9.0/usr/src/sys/dev/sbus/sbusfpga_trng.c | 6 +- sbus-to-ztex-gateware-migen/netbsd_csr.h | 88 +++++++++++++- sbus-to-ztex-gateware-migen/prom_csr.fth | 5 +- sbus-to-ztex-gateware-migen/prom_migen.fth | 27 +++++ .../sbus_to_fpga_blk_dma.py | 7 +- .../sbus_to_fpga_fsm.py | 110 +++++++++++++++++- .../sbus_to_fpga_soc.py | 6 +- 9 files changed, 237 insertions(+), 16 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c index 5875bae..bfbcc19 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c @@ -596,6 +596,7 @@ sbusfpga_curve25519engine_attach(device_t parent, device_t self, void *aux) //#define CSR_CURVE25519ENGINE_BASE #define CSR_DDRPHY_BASE #define CSR_EXCHANGE_WITH_MEM_BASE +#define CSR_SBUS_BUS_STAT_BASE #define CSR_SDRAM_BASE #define CSR_SDBLOCK2MEM_BASE #define CSR_SDCORE_BASE @@ -608,6 +609,7 @@ sbusfpga_curve25519engine_attach(device_t parent, device_t self, void *aux) //#undef CSR_CURVE25519ENGINE_BASE #undef CSR_DDRPHY_BASE #undef CSR_EXCHANGE_WITH_MEM_BASE +#undef CSR_SBUS_BUS_STAT_BASE #undef CSR_SDRAM_BASE #undef CSR_SDBLOCK2MEM_BASE #undef CSR_SDCORE_BASE diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c index a44c77d..3aa313c 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c @@ -547,6 +547,7 @@ sbusfpga_sdram_diskstart(device_t self, struct buf *bp) //#define CSR_DDRPHY_BASE //#define CSR_SDRAM_BASE //#define CSR_EXCHANGE_WITH_MEM_BASE +#define CSR_SBUS_BUS_STAT_BASE #define CSR_SDBLOCK2MEM_BASE #define CSR_SDCORE_BASE #define CSR_SDIRQ_BASE @@ -564,6 +565,7 @@ sbusfpga_sdram_diskstart(device_t self, struct buf *bp) //#undef CSR_DDRPHY_BASE //#undef CSR_SDRAM_BASE //#undef CSR_EXCHANGE_WITH_MEM_BASE +#undef CSR_SBUS_BUS_STAT_BASE #undef CSR_SDBLOCK2MEM_BASE #undef CSR_SDCORE_BASE #undef CSR_SDIRQ_BASE diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.c index 6218781..96b7c85 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_trng.c @@ -112,6 +112,7 @@ sbusfpga_trng_match(device_t parent, cfdata_t cf, void *aux) #define CSR_CURVE25519ENGINE_BASE #define CSR_DDRPHY_BASE #define CSR_EXCHANGE_WITH_MEM_BASE +#define CSR_SBUS_BUS_STAT_BASE #define CSR_SDRAM_BASE #define CSR_SDBLOCK2MEM_BASE #define CSR_SDCORE_BASE @@ -124,6 +125,7 @@ sbusfpga_trng_match(device_t parent, cfdata_t cf, void *aux) #undef CSR_CURVE25519ENGINE_BASE #undef CSR_DDRPHY_BASE #undef CSR_EXCHANGE_WITH_MEM_BASE +#undef CSR_SBUS_BUS_STAT_BASE #undef CSR_SDRAM_BASE #undef CSR_SDBLOCK2MEM_BASE #undef CSR_SDCORE_BASE @@ -145,7 +147,7 @@ sbusfpga_trng_getentropy(size_t nbytes, void *cookie) { } else { failure ++; if (failure > (1+(dbytes/4))) { // something going on - aprint_normal_dev(sc->sc_dev, "out of entropy after %zd / %zd bytes\n", dbytes, nbytes); + device_printf(sc->sc_dev, "out of entropy after %zd / %zd bytes\n", dbytes, nbytes); return; } delay(1); @@ -153,7 +155,7 @@ sbusfpga_trng_getentropy(size_t nbytes, void *cookie) { if (((dbytes%32)==0) && (nbytes > dbytes)) delay(1); // let the hardware breathes if the OS needs a lof of bytes } - aprint_normal_dev(sc->sc_dev, "gathered %zd bytes [%d]\n", dbytes, failure); + device_printf(sc->sc_dev, "gathered %zd bytes [%d]\n", dbytes, failure); } /* diff --git a/sbus-to-ztex-gateware-migen/netbsd_csr.h b/sbus-to-ztex-gateware-migen/netbsd_csr.h index 06a181f..f6fd470 100644 --- a/sbus-to-ztex-gateware-migen/netbsd_csr.h +++ b/sbus-to-ztex-gateware-migen/netbsd_csr.h @@ -1,5 +1,5 @@ //-------------------------------------------------------------------------------- -// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-08-20 12:28:27 +// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-08-21 07:21:33 //-------------------------------------------------------------------------------- #ifndef __GENERATED_CSR_H #define __GENERATED_CSR_H @@ -738,9 +738,91 @@ static inline uint32_t exchange_with_mem_sbus_master_error_virtual_read(struct s #define CSR_EXCHANGE_WITH_MEM_CHECKSUM_SIZE 8 #endif // CSR_EXCHANGE_WITH_MEM_BASE +/* sbus_bus_stat */ +#ifndef CSR_SBUS_BUS_STAT_BASE +#define CSR_SBUS_BUS_STAT_BASE (CSR_BASE + 0x4000L) +#define CSR_SBUS_BUS_STAT_STAT_CTRL_ADDR (CSR_SBUS_BUS_STAT_BASE + 0x0L) +#define CSR_SBUS_BUS_STAT_STAT_CTRL_SIZE 1 +static inline uint32_t sbus_bus_stat_stat_ctrl_read(struct sbusfpga_sbus_bus_stat_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0x0L); +} +static inline void sbus_bus_stat_stat_ctrl_write(struct sbusfpga_sbus_bus_stat_softc *sc, uint32_t v) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0x0L, v); +} +#define CSR_SBUS_BUS_STAT_STAT_CTRL_UPDATE_OFFSET 0 +#define CSR_SBUS_BUS_STAT_STAT_CTRL_UPDATE_SIZE 1 +static inline uint32_t sbus_bus_stat_stat_ctrl_update_extract(struct sbusfpga_sbus_bus_stat_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 0) & mask ); +} +static inline uint32_t sbus_bus_stat_stat_ctrl_update_read(struct sbusfpga_sbus_bus_stat_softc *sc) { + uint32_t word = sbus_bus_stat_stat_ctrl_read(sc); + return sbus_bus_stat_stat_ctrl_update_extract(sc, word); +} +static inline uint32_t sbus_bus_stat_stat_ctrl_update_replace(struct sbusfpga_sbus_bus_stat_softc *sc, uint32_t oldword, uint32_t plain_value) { + uint32_t mask = ((1 << 1)-1); + return (oldword & (~(mask << 0))) | (mask & plain_value)<< 0 ; +} +static inline void sbus_bus_stat_stat_ctrl_update_write(struct sbusfpga_sbus_bus_stat_softc *sc, uint32_t plain_value) { + uint32_t oldword = sbus_bus_stat_stat_ctrl_read(sc); + uint32_t newword = sbus_bus_stat_stat_ctrl_update_replace(sc, oldword, plain_value); + sbus_bus_stat_stat_ctrl_write(sc, newword); +} +#define CSR_SBUS_BUS_STAT_LIVE_STAT_CYCLE_COUNTER_ADDR (CSR_SBUS_BUS_STAT_BASE + 0x4L) +#define CSR_SBUS_BUS_STAT_LIVE_STAT_CYCLE_COUNTER_SIZE 1 +static inline uint32_t sbus_bus_stat_live_stat_cycle_counter_read(struct sbusfpga_sbus_bus_stat_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0x4L); +} +#define CSR_SBUS_BUS_STAT_STAT_CYCLE_COUNTER_ADDR (CSR_SBUS_BUS_STAT_BASE + 0x8L) +#define CSR_SBUS_BUS_STAT_STAT_CYCLE_COUNTER_SIZE 1 +static inline uint32_t sbus_bus_stat_stat_cycle_counter_read(struct sbusfpga_sbus_bus_stat_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0x8L); +} +#define CSR_SBUS_BUS_STAT_STAT_SLAVE_START_COUNTER_ADDR (CSR_SBUS_BUS_STAT_BASE + 0xcL) +#define CSR_SBUS_BUS_STAT_STAT_SLAVE_START_COUNTER_SIZE 1 +static inline uint32_t sbus_bus_stat_stat_slave_start_counter_read(struct sbusfpga_sbus_bus_stat_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0xcL); +} +#define CSR_SBUS_BUS_STAT_STAT_SLAVE_DONE_COUNTER_ADDR (CSR_SBUS_BUS_STAT_BASE + 0x10L) +#define CSR_SBUS_BUS_STAT_STAT_SLAVE_DONE_COUNTER_SIZE 1 +static inline uint32_t sbus_bus_stat_stat_slave_done_counter_read(struct sbusfpga_sbus_bus_stat_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0x10L); +} +#define CSR_SBUS_BUS_STAT_STAT_SLAVE_ERROR_COUNTER_ADDR (CSR_SBUS_BUS_STAT_BASE + 0x14L) +#define CSR_SBUS_BUS_STAT_STAT_SLAVE_ERROR_COUNTER_SIZE 1 +static inline uint32_t sbus_bus_stat_stat_slave_error_counter_read(struct sbusfpga_sbus_bus_stat_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0x14L); +} +#define CSR_SBUS_BUS_STAT_STAT_SLAVE_EARLY_ERROR_COUNTER_ADDR (CSR_SBUS_BUS_STAT_BASE + 0x18L) +#define CSR_SBUS_BUS_STAT_STAT_SLAVE_EARLY_ERROR_COUNTER_SIZE 1 +static inline uint32_t sbus_bus_stat_stat_slave_early_error_counter_read(struct sbusfpga_sbus_bus_stat_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0x18L); +} +#define CSR_SBUS_BUS_STAT_STAT_MASTER_START_COUNTER_ADDR (CSR_SBUS_BUS_STAT_BASE + 0x1cL) +#define CSR_SBUS_BUS_STAT_STAT_MASTER_START_COUNTER_SIZE 1 +static inline uint32_t sbus_bus_stat_stat_master_start_counter_read(struct sbusfpga_sbus_bus_stat_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0x1cL); +} +#define CSR_SBUS_BUS_STAT_STAT_MASTER_DONE_COUNTER_ADDR (CSR_SBUS_BUS_STAT_BASE + 0x20L) +#define CSR_SBUS_BUS_STAT_STAT_MASTER_DONE_COUNTER_SIZE 1 +static inline uint32_t sbus_bus_stat_stat_master_done_counter_read(struct sbusfpga_sbus_bus_stat_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0x20L); +} +#define CSR_SBUS_BUS_STAT_STAT_MASTER_ERROR_COUNTER_ADDR (CSR_SBUS_BUS_STAT_BASE + 0x24L) +#define CSR_SBUS_BUS_STAT_STAT_MASTER_ERROR_COUNTER_SIZE 1 +static inline uint32_t sbus_bus_stat_stat_master_error_counter_read(struct sbusfpga_sbus_bus_stat_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0x24L); +} +#define CSR_SBUS_BUS_STAT_STAT_MASTER_RERUN_COUNTER_ADDR (CSR_SBUS_BUS_STAT_BASE + 0x28L) +#define CSR_SBUS_BUS_STAT_STAT_MASTER_RERUN_COUNTER_SIZE 1 +static inline uint32_t sbus_bus_stat_stat_master_rerun_counter_read(struct sbusfpga_sbus_bus_stat_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0x28L); +} +#endif // CSR_SBUS_BUS_STAT_BASE + /* sdram */ #ifndef CSR_SDRAM_BASE -#define CSR_SDRAM_BASE (CSR_BASE + 0x4000L) +#define CSR_SDRAM_BASE (CSR_BASE + 0x5000L) #define CSR_SDRAM_DFII_CONTROL_ADDR (CSR_SDRAM_BASE + 0x0L) #define CSR_SDRAM_DFII_CONTROL_SIZE 1 static inline uint32_t sdram_dfii_control_read(struct sbusfpga_sdram_softc *sc) { @@ -1009,7 +1091,7 @@ static inline uint32_t sdram_dfii_pi3_rddata_read(struct sbusfpga_sdram_softc *s /* trng */ #ifndef CSR_TRNG_BASE -#define CSR_TRNG_BASE (CSR_BASE + 0x5000L) +#define CSR_TRNG_BASE (CSR_BASE + 0x6000L) #define CSR_TRNG_CTRL_ADDR (CSR_TRNG_BASE + 0x0L) #define CSR_TRNG_CTRL_SIZE 1 static inline uint32_t trng_ctrl_read(struct sbusfpga_trng_softc *sc) { diff --git a/sbus-to-ztex-gateware-migen/prom_csr.fth b/sbus-to-ztex-gateware-migen/prom_csr.fth index 56f4196..39cc271 100644 --- a/sbus-to-ztex-gateware-migen/prom_csr.fth +++ b/sbus-to-ztex-gateware-migen/prom_csr.fth @@ -3,8 +3,9 @@ h# 40000 constant sbusfpga_csraddr_leds h# 41000 constant sbusfpga_csraddr_curve25519engine h# 42000 constant sbusfpga_csraddr_ddrphy h# 43000 constant sbusfpga_csraddr_exchange_with_mem -h# 44000 constant sbusfpga_csraddr_sdram -h# 45000 constant sbusfpga_csraddr_trng +h# 44000 constant sbusfpga_csraddr_sbus_bus_stat +h# 45000 constant sbusfpga_csraddr_sdram +h# 46000 constant sbusfpga_csraddr_trng h# 80000 constant sbusfpga_regionaddr_usb_host_ctrl h# 0 constant sbusfpga_regionaddr_prom h# 80000000 constant sbusfpga_regionaddr_main_ram diff --git a/sbus-to-ztex-gateware-migen/prom_migen.fth b/sbus-to-ztex-gateware-migen/prom_migen.fth index e76ac67..a85283b 100644 --- a/sbus-to-ztex-gateware-migen/prom_migen.fth +++ b/sbus-to-ztex-gateware-migen/prom_migen.fth @@ -217,4 +217,31 @@ my-space constant my-sbus-space curve25519engine-regfile-virt h# 10000 map-out ; +\ OpenBIOS tokenizer won't accept finish-device without new-device +\ Cheat by using the tokenizer so we can do OpenBoot 2.x siblings +\ tokenizer[ 01 emit-byte h# 27 emit-byte h# 01 emit-byte h# 1f emit-byte ]tokenizer +\ The OpenFirmware tokenizer does accept the 'clean' syntax +finish-device +\ \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ STAT +new-device + +\ Absolute minimal stuff; name & registers def. +" RDOL,sbusstat" device-name + +my-address sbusfpga_csraddr_sbus_bus_stat + my-space h# 100 reg +\ we don't support ET or HWORD +h# 7d xdrint " slave-burst-sizes" attribute +h# 7d xdrint " burst-sizes" attribute + +headers +-1 instance value sbus_bus_stat-virt +my-address constant my-sbus-address +my-space constant my-sbus-space + +: map-in ( adr space size -- virt ) " map-in" $call-parent ; +: map-out ( virt size -- ) " map-out" $call-parent ; + +: map-in-sbus_bus_stat ( -- ) my-sbus-address sbusfpga_csraddr_sbus_bus_stat + my-sbus-space h# 100 map-in is sbus_bus_stat-virt ; +: map-out-sbus_bus_stat ( -- ) sbus_bus_stat-virt h# 100 map-out ; + end0 diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py index df1d9ff..e2b29c7 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py @@ -1,6 +1,6 @@ from migen import * from migen.genlib.fifo import * -from migen.genlib.cdc import PulseSynchronizer +from migen.genlib.cdc import BusSynchronizer from litex.soc.interconnect.csr import * from litex.soc.interconnect import wishbone @@ -91,13 +91,14 @@ class ExchangeWithMem(Module, AutoCSR): self.comb += self.dma_status.fields.has_wr_data.eq(self.fromsbus_fifo.readable) # Some data available to write to memory # The next two status bits reflect stats in the SBus clock domain - self.submodules.fromsbus_req_fifo_readable_sync = PulseSynchronizer("sbus", "sys") + self.submodules.fromsbus_req_fifo_readable_sync = BusSynchronizer(width = 1, idomain = "sbus", odomain = "sys") fromsbus_req_fifo_readable_in_sys = Signal() self.comb += self.fromsbus_req_fifo_readable_sync.i.eq(self.fromsbus_req_fifo.readable) self.comb += fromsbus_req_fifo_readable_in_sys.eq(self.fromsbus_req_fifo_readable_sync.o) # w/o this extra delay, the driver sees an outdated checksum for some reason... # there's probably a more fundamental issue :-( + # note: replaced PulseSynchronizer with BusSynchronizer, should I retry w/o this ? fromsbus_req_fifo_readable_in_sys_cnt = Signal(5) self.sync += If(fromsbus_req_fifo_readable_in_sys, fromsbus_req_fifo_readable_in_sys_cnt.eq(0x1F) @@ -109,7 +110,7 @@ class ExchangeWithMem(Module, AutoCSR): #self.comb += self.dma_status.fields.has_requests.eq(fromsbus_req_fifo_readable_in_sys) # we still have outstanding requests self.comb += self.dma_status.fields.has_requests.eq(fromsbus_req_fifo_readable_in_sys | (fromsbus_req_fifo_readable_in_sys_cnt != 0)) # we still have outstanding requests, or had recently - self.submodules.tosbus_fifo_readable_sync = PulseSynchronizer("sbus", "sys") + self.submodules.tosbus_fifo_readable_sync = BusSynchronizer(width = 1, idomain = "sbus", odomain = "sys") tosbus_fifo_readable_in_sys = Signal() self.comb += self.tosbus_fifo_readable_sync.i.eq(self.tosbus_fifo.readable) self.comb += tosbus_fifo_readable_in_sys.eq(self.tosbus_fifo_readable_sync.o) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index 8d5a274..793c452 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -291,9 +291,9 @@ class SBusFPGABus(Module): sbus_wishbone_le = Signal() - wishbone_master_timeout = Signal(6) - wishbone_slave_timeout = Signal(6) - sbus_slave_timeout = Signal(6) + wishbone_master_timeout = Signal(log2_int(wishbone_default_timeout, False)) + wishbone_slave_timeout = Signal(log2_int(wishbone_default_timeout, False)) + sbus_slave_timeout = Signal(log2_int(sbus_default_timeout, False)) sbus_master_throttle = Signal(2) @@ -400,6 +400,16 @@ class SBusFPGABus(Module): #self.sync += platform.request("user_led", 5).eq(~slave_fsm.ongoing("Idle")) #self.sync += platform.request("user_led", 6).eq(master_data_src_tosbus_fifo) #self.sync += platform.request("user_led", 7).eq(master_data_src_fromsbus_fifo) + + stat_slave_start_counter = Signal(32) + stat_slave_done_counter = Signal(32) + stat_slave_rerun_counter = Signal(32) + stat_slave_early_error_counter = Signal(32) + + stat_master_start_counter = Signal(32) + stat_master_done_counter = Signal(32) + stat_master_error_counter = Signal(32) + stat_master_rerun_counter = Signal(32) slave_fsm.act("Reset", #NextValue(self.led_display.value, 0x0000000000), @@ -443,6 +453,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(led0123, led0123 | LED_PARITY), + NextValue(stat_slave_early_error_counter, stat_slave_early_error_counter + 1), NextState("Slave_Error") ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | @@ -453,6 +464,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), + NextValue(stat_slave_start_counter, stat_slave_start_counter + 1), If(self.wishbone_master.cyc == 0, NextValue(self.wishbone_master.cyc, 1), NextValue(self.wishbone_master.stb, 1), @@ -472,6 +484,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(led0123, led0123 | LED_ADDRESS), + NextValue(stat_slave_early_error_counter, stat_slave_early_error_counter + 1), NextState("Slave_Error") ) ).Elif(((SBUS_3V3_SELs_i == 0) & @@ -485,6 +498,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), + NextValue(stat_slave_start_counter, stat_slave_start_counter + 1), If(self.wishbone_master.cyc == 0, NextValue(self.wishbone_master.cyc, 1), NextValue(self.wishbone_master.stb, 1), @@ -504,6 +518,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(led0123, led0123 | LED_ADDRESS), + NextValue(stat_slave_early_error_counter, stat_slave_early_error_counter + 1), NextState("Slave_Error") ) ).Elif(((SBUS_3V3_SELs_i == 0) & @@ -516,12 +531,14 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(led0123, led0123 | LED_PARITY), + NextValue(stat_slave_early_error_counter, stat_slave_early_error_counter + 1), NextState("Slave_Error") ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ROM_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), # need to wait for data, don't ACK yet NextValue(SBUS_3V3_ERRs_o, 1), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), + NextValue(stat_slave_start_counter, stat_slave_start_counter + 1), If(self.wishbone_master.cyc == 0, NextValue(self.wishbone_master.cyc, 1), NextValue(self.wishbone_master.stb, 1), @@ -541,6 +558,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(led0123, led0123 | LED_ADDRESS), + NextValue(stat_slave_early_error_counter, stat_slave_early_error_counter + 1), NextState("Slave_Error") ) ).Elif(((SBUS_3V3_SELs_i == 0) & @@ -561,6 +579,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(led0123, led0123 | LED_PARITY), + NextValue(stat_slave_early_error_counter, stat_slave_early_error_counter + 1), NextState("Slave_Error") ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == WISHBONE_CSR_ADDR_PFX) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == USBOHCI_ADDR_PFX) | @@ -568,6 +587,7 @@ class SBusFPGABus(Module): (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ENGINE_ADDR_PFXA) | (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == ENGINE_ADDR_PFXB)), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), + NextValue(stat_slave_start_counter, stat_slave_start_counter + 1), If(~self.wishbone_master.cyc, NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextValue(SBUS_3V3_ERRs_o, 1), @@ -584,6 +604,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(led0123, led0123 | LED_ADDRESS), + NextValue(stat_slave_early_error_counter, stat_slave_early_error_counter + 1), NextState("Slave_Error") ) ).Elif(((SBUS_3V3_SELs_i == 0) & @@ -594,6 +615,7 @@ class SBusFPGABus(Module): NextValue(sbus_last_pa, SBUS_3V3_PA_i), If(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), + NextValue(stat_slave_start_counter, stat_slave_start_counter + 1), If(~self.wishbone_master.cyc, NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), NextValue(SBUS_3V3_ERRs_o, 1), @@ -610,6 +632,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(led0123, led0123 | LED_ADDRESS), + NextValue(stat_slave_early_error_counter, stat_slave_early_error_counter + 1), NextState("Slave_Error") ) ).Elif(((SBUS_3V3_SELs_i == 0) & @@ -622,9 +645,11 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(led0123, led0123 | LED_PARITY), + NextValue(stat_slave_early_error_counter, stat_slave_early_error_counter + 1), NextState("Slave_Error") ).Elif(((SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), NextValue(sbus_wishbone_le, (SBUS_3V3_PA_i[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH] == SRAM_ADDR_PFX)), + NextValue(stat_slave_start_counter, stat_slave_start_counter + 1), If(~self.wishbone_master.cyc, NextValue(SBUS_3V3_ACKs_o, ACK_HWORD), NextValue(SBUS_3V3_ERRs_o, 1), @@ -641,6 +666,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_ERR), NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(led0123, led0123 | LED_ADDRESS), + NextValue(stat_slave_early_error_counter, stat_slave_early_error_counter + 1), NextState("Slave_Error") ) ).Elif(self.wishbone_slave.cyc & @@ -736,7 +762,8 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_PPRD_o, 0), NextValue(master_we, 1), #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0x00), self.wishbone_slave.adr)), - #NextValue(self.led_display.value, Cat(Signal(8, reset = LED_M_WRITE), Signal(2, reset = 0), self.wishbone_slave.adr)), + #NextValue(self.led_display.value, Cat(Signal(8, reset = LED_M_WRITE), Signal(2, reset = 0), self.wishbone_slave.adr)), + NextValue(stat_master_start_counter, stat_master_start_counter + 1), NextState("Master_Translation") ).Elif(SBUS_3V3_BGs_i & self.master_read_buffer_start & @@ -759,6 +786,7 @@ class SBusFPGABus(Module): NextValue(master_we, 0), #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0x00), self.wishbone_slave.adr)), #NextValue(self.led_display.value, Cat(Signal(8, reset = LED_M_READ), Signal(2, reset = 0), self.master_read_buffer_addr)), + NextValue(stat_master_start_counter, stat_master_start_counter + 1), NextState("Master_Translation") ).Elif(SBUS_3V3_BGs_i & self.tosbus_fifo.readable & @@ -792,6 +820,7 @@ class SBusFPGABus(Module): }), NextValue(SBUS_3V3_PPRD_o, 0), NextValue(master_we, 1), + NextValue(stat_master_start_counter, stat_master_start_counter + 1), NextState("Master_Translation") ).Elif(SBUS_3V3_BGs_i & self.fromsbus_req_fifo.readable & @@ -825,6 +854,7 @@ class SBusFPGABus(Module): }), NextValue(SBUS_3V3_PPRD_o, 1), NextValue(master_we, 0), + NextValue(stat_master_start_counter, stat_master_start_counter + 1), NextState("Master_Translation") ).Elif(((SBUS_3V3_SELs_i == 0) & (SBUS_3V3_ASs_i == 0)), @@ -833,6 +863,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(self.led_display.value, 0x000000000F | Cat(Signal(8, reset = 0x00), SBUS_3V3_PA_i, SBUS_3V3_SIZ_i, SBUS_3V3_PPRD_i)), #NextValue(led0123, led0123 | LED_UNKNOWNREQ), + NextValue(stat_slave_early_error_counter, stat_slave_early_error_counter + 1), NextState("Slave_Error") ).Elif(~SBUS_3V3_BGs_i, ### ouch we got the bus but nothing more to do ?!? @@ -849,6 +880,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), If(((SBUS_3V3_ASs_i == 1) | ((SBUS_3V3_ASs_i == 0) & (SBUS_3V3_SELs_i == 1))), + NextValue(stat_slave_done_counter, stat_slave_done_counter + 1), NextState("Idle") ) ) @@ -897,6 +929,7 @@ class SBusFPGABus(Module): NextValue(wishbone_master_timeout, 0), NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), #NextValue(led0123, LED_RERUN | LED_RERUN_WORD | LED_RERUN_LATE), + NextValue(stat_slave_rerun_counter, stat_slave_rerun_counter + 1), NextState("Slave_Error") ) ) @@ -915,6 +948,7 @@ class SBusFPGABus(Module): ).Elif(sbus_slave_timeout == 0, ### this is taking too long NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), #NextValue(led0123, LED_RERUN | LED_RERUN_WORD), + NextValue(stat_slave_rerun_counter, stat_slave_rerun_counter + 1), NextState("Slave_Error") ) ) @@ -957,6 +991,7 @@ class SBusFPGABus(Module): NextValue(wishbone_master_timeout, 0), NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), #NextValue(led0123, LED_RERUN | LED_RERUN_LATE), + NextValue(stat_slave_rerun_counter, stat_slave_rerun_counter + 1), NextState("Slave_Error") ) ) @@ -975,6 +1010,7 @@ class SBusFPGABus(Module): ).Elif(sbus_slave_timeout == 0, ### this is taking too long NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), #NextValue(led0123, LED_RERUN), + NextValue(stat_slave_rerun_counter, stat_slave_rerun_counter + 1), NextState("Slave_Error") ) ) @@ -1015,6 +1051,7 @@ class SBusFPGABus(Module): NextValue(wishbone_master_timeout, 0), NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), #NextValue(led0123, LED_RERUN | LED_RERUN_LATE), + NextValue(stat_slave_rerun_counter, stat_slave_rerun_counter + 1), NextState("Slave_Error") ) ) @@ -1033,6 +1070,7 @@ class SBusFPGABus(Module): ).Elif(sbus_slave_timeout == 0, ### this is taking too long NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), #NextValue(led0123, LED_RERUN), + NextValue(stat_slave_rerun_counter, stat_slave_rerun_counter + 1), NextState("Slave_Error") ) ) @@ -1070,6 +1108,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), If(((SBUS_3V3_ASs_i == 1) | ((SBUS_3V3_ASs_i == 0) & (SBUS_3V3_SELs_i == 1))), + NextValue(stat_slave_done_counter, stat_slave_done_counter + 1), NextState("Idle") ) ) @@ -1083,6 +1122,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), #NextValue(self.led_display.value, Cat(Signal(8, reset = LED_RERUN | LED_RERUN_WRITE | LED_RERUN_WORD), sbus_last_pa, Signal(4, reset = 0))), #NextValue(led0123, LED_RERUN | LED_RERUN_WRITE | LED_RERUN_WORD), + NextValue(stat_slave_rerun_counter, stat_slave_rerun_counter + 1), NextState("Slave_Error") ) ) @@ -1125,6 +1165,7 @@ class SBusFPGABus(Module): ).Elif(sbus_slave_timeout == 0, ### this is taking too long NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), #NextValue(led0123, LED_RERUN | LED_RERUN_WRITE), + NextValue(stat_slave_rerun_counter, stat_slave_rerun_counter + 1), NextState("Slave_Error") ) ) @@ -1167,6 +1208,7 @@ class SBusFPGABus(Module): ).Elif(sbus_slave_timeout == 0, ### this is taking too long NextValue(SBUS_3V3_ACKs_o, ACK_RERUN), #NextValue(led0123, LED_RERUN | LED_RERUN_WRITE), + NextValue(stat_slave_rerun_counter, stat_slave_rerun_counter + 1), NextState("Slave_Error") ) ) @@ -1233,6 +1275,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), NextValue(master_error_seen, 1), + NextValue(stat_master_error_counter, stat_master_error_counter + 1), NextState("Idle")], ACK_RERUN: ### dunno how to handle that yet, [If(~master_data_src_tosbus_fifo & ~master_data_src_fromsbus_fifo, @@ -1242,6 +1285,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), + NextValue(stat_master_rerun_counter, stat_master_rerun_counter + 1), NextState("Idle")], ACK_IDLE: [If(master_we, @@ -1278,6 +1322,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), + NextValue(stat_master_rerun_counter, stat_master_rerun_counter + 1), NextState("Idle") ], ACK_ERR: ## ### burst not handled @@ -1290,6 +1335,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_master_in, 0), NextValue(master_error_seen, 8), NextValue(master_error_details, burst_counter), + NextValue(stat_master_error_counter, stat_master_error_counter + 1), NextValue(self.sbus_master_error_virtual, self.sbus_master_last_virtual), NextState("Idle") ], @@ -1302,6 +1348,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), NextValue(master_error_seen, 4), + NextValue(stat_master_error_counter, stat_master_error_counter + 1), NextValue(master_error_details, Cat(SBUS_3V3_ACKs_i, Signal(1, reset = 0))), NextState("Idle") ], @@ -1346,6 +1393,7 @@ class SBusFPGABus(Module): [NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), + NextValue(stat_master_rerun_counter, stat_master_rerun_counter + 1), NextState("Idle") ], "default": @@ -1353,6 +1401,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), NextValue(master_error_seen, 1), + NextValue(stat_master_error_counter, stat_master_error_counter + 1), NextState("Idle") ], }), @@ -1368,6 +1417,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), + NextValue(stat_master_done_counter, stat_master_done_counter + 1), NextState("Idle") ) slave_fsm.act("Master_Write", @@ -1417,6 +1467,7 @@ class SBusFPGABus(Module): [NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), + NextValue(stat_master_rerun_counter, stat_master_rerun_counter + 1), NextState("Idle") ], "default": ## ACK_ERRS or other @@ -1424,6 +1475,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), NextValue(master_error_seen, 1), + NextValue(stat_master_error_counter, stat_master_error_counter + 1), NextState("Idle"), ], }) @@ -1434,6 +1486,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), NextValue(sbus_master_throttle, 3), + NextValue(stat_master_done_counter, stat_master_done_counter + 1), NextState("Idle") ) # ##### FINISHED ##### @@ -1652,3 +1705,52 @@ class SBusFPGABus(Module): # NextState("Idle"), # ) #) + + + self.stat_cycle_counter = Signal(32) + self.buf_stat_cycle_counter = Signal(32) + self.buf_stat_slave_start_counter = Signal(32) + self.buf_stat_slave_done_counter = Signal(32) + self.buf_stat_slave_rerun_counter = Signal(32) + self.buf_stat_slave_early_error_counter = Signal(32) + self.buf_stat_master_start_counter = Signal(32) + self.buf_stat_master_done_counter = Signal(32) + self.buf_stat_master_error_counter = Signal(32) + self.buf_stat_master_rerun_counter = Signal(32) + self.stat_update = Signal() + stat_update_prev = Signal() + + self.sync += stat_update_prev.eq(self.stat_update) + + self.sync += self.stat_cycle_counter.eq(self.stat_cycle_counter + 1) + self.sync += If(~stat_update_prev & self.stat_update, ## raising edge: copy to buffer and reset active + self.buf_stat_cycle_counter.eq(self.stat_cycle_counter), + self.buf_stat_slave_start_counter.eq(stat_slave_start_counter), + self.buf_stat_slave_done_counter.eq(stat_slave_done_counter), + self.buf_stat_slave_rerun_counter.eq(stat_slave_rerun_counter), + self.buf_stat_slave_early_error_counter.eq(stat_slave_early_error_counter), + self.buf_stat_master_start_counter.eq(stat_master_start_counter), + self.buf_stat_master_done_counter.eq(stat_master_done_counter), + self.buf_stat_master_error_counter.eq(stat_master_error_counter), + self.buf_stat_master_rerun_counter.eq(stat_master_rerun_counter), + self.stat_cycle_counter.eq(0), + stat_slave_start_counter.eq(0), + stat_slave_done_counter.eq(0), + stat_slave_rerun_counter.eq(0), + stat_slave_early_error_counter.eq(0), + stat_master_start_counter.eq(0), + stat_master_done_counter.eq(0), + stat_master_error_counter.eq(0), + stat_master_rerun_counter.eq(0), + ) + self.sync += If(stat_update_prev & ~self.stat_update, ## falling edge: reset buffer + self.buf_stat_cycle_counter.eq(0), + self.buf_stat_slave_start_counter.eq(0), + self.buf_stat_slave_done_counter.eq(0), + self.buf_stat_slave_rerun_counter.eq(0), + self.buf_stat_slave_early_error_counter.eq(0), + self.buf_stat_master_start_counter.eq(0), + self.buf_stat_master_done_counter.eq(0), + self.buf_stat_master_error_counter.eq(0), + self.buf_stat_master_rerun_counter.eq(0), + ) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index afe5324..e3cb722 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -17,13 +17,14 @@ from litedram.modules import MT41J128M16 from litedram.phy import s7ddrphy from sbus_to_fpga_fsm import * +from sbus_to_fpga_fsmstat import * from sbus_to_fpga_blk_dma import * from sbus_to_fpga_trng import * from litedram.frontend.dma import * from engine import Engine; -from migen.genlib.cdc import PulseSynchronizer, BusSynchronizer +from migen.genlib.cdc import BusSynchronizer from migen.genlib.resetsync import AsyncResetSynchronizer; import sbus_to_fpga_export; @@ -268,6 +269,7 @@ class SBusFPGA(SoCCore): burst_size=burst_size) #self.submodules.sbus_bus = _sbus_bus self.submodules.sbus_bus = ClockDomainsRenamer("sbus")(_sbus_bus) + self.submodules.sbus_bus_stat = SBusFPGABusStat(sbus_bus = self.sbus_bus) self.bus.add_master(name="SBusBridgeToWishbone", master=wishbone_master_sys) self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_slave_sys, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) @@ -289,7 +291,7 @@ class SBusFPGA(SoCCore): self.submodules.curve25519engine_wishbone_cdc = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=self.curve25519engine.bus, cd_master="sys", cd_slave="clk100") self.bus.add_slave("curve25519engine", self.curve25519engine_wishbone_cdc, SoCRegion(origin=self.mem_map.get("curve25519engine", None), size=0x20000, cached=False)) #self.bus.add_slave("curve25519engine", self.curve25519engine.bus, SoCRegion(origin=self.mem_map.get("curve25519engine", None), size=0x20000, cached=False)) - self.submodules.curve25519_on_sync = PulseSynchronizer("clk100", "sys") + self.submodules.curve25519_on_sync = BusSynchronizer(width = 1, idomain = "clk100", odomain = "sys") self.comb += self.curve25519_on_sync.i.eq(self.curve25519engine.power.fields.on) self.comb += self.crg.curve25519_on.eq(self.curve25519_on_sync.o) From 890033a0fecd91c3633664c206e4ee41de67ab27 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 21 Aug 2021 10:11:41 -0400 Subject: [PATCH 60/78] move the last errored address to sbusfpga_stat ; add missing stat driver --- .../9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c | 3 - .../9.0/usr/src/sys/dev/sbus/sbusfpga_stat.c | 223 ++++++++++++++++++ .../9.0/usr/src/sys/dev/sbus/sbusfpga_stat.h | 44 ++++ sbus-to-ztex-gateware-migen/netbsd_csr.h | 20 +- .../sbus_to_fpga_blk_dma.py | 10 +- .../sbus_to_fpga_fsm.py | 75 +++--- .../sbus_to_fpga_soc.py | 4 - 7 files changed, 321 insertions(+), 58 deletions(-) create mode 100644 NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.c create mode 100644 NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.h diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c index 3aa313c..c99832b 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_sdram.c @@ -119,7 +119,6 @@ struct sbusfpga_sdram_rwpg { u_int32_t last_blk; u_int32_t last_dma; u_int32_t dma_wrdone; - u_int32_t vdma_err; }; #define SBUSFPGA_READ_PG _IOWR('X', 0, struct sbusfpga_sdram_rwpg) #define SBUSFPGA_WRITE_PG _IOWR('X', 1, struct sbusfpga_sdram_rwpg) @@ -609,7 +608,6 @@ sbusfpga_sdram_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l pg->last_blk = exchange_with_mem_last_blk_read(sc); pg->last_dma = exchange_with_mem_last_dma_read(sc); pg->dma_wrdone = exchange_with_mem_dma_wrdone_read(sc); - pg->vdma_err = exchange_with_mem_sbus_master_error_virtual_read(sc); if (err != 0) err = EIO; goto done; @@ -624,7 +622,6 @@ sbusfpga_sdram_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l pg->last_blk = exchange_with_mem_last_blk_read(sc); pg->last_dma = exchange_with_mem_last_dma_read(sc); pg->dma_wrdone = exchange_with_mem_dma_wrdone_read(sc); - pg->vdma_err = exchange_with_mem_sbus_master_error_virtual_read(sc); if (err != 0) err = EIO; goto done; diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.c new file mode 100644 index 0000000..00be73e --- /dev/null +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.c @@ -0,0 +1,223 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2020 Romain Dolbeau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include + +#include + +#include + +int sbusfpga_stat_print(void *, const char *); +int sbusfpga_stat_match(device_t, cfdata_t, void *); +void sbusfpga_stat_attach(device_t, device_t, void *); + +CFATTACH_DECL_NEW(sbusfpga_stat, sizeof(struct sbusfpga_sbus_bus_stat_softc), + sbusfpga_stat_match, sbusfpga_stat_attach, NULL, NULL); + +dev_type_open(sbusfpga_stat_open); +dev_type_close(sbusfpga_stat_close); + + + +const struct cdevsw sbusfpga_stat_cdevsw = { + .d_open = sbusfpga_stat_open, + .d_close = sbusfpga_stat_close, + .d_read = noread, + .d_write = nowrite, + .d_ioctl = noioctl, + .d_stop = nostop, + .d_tty = notty, + .d_poll = nopoll, + .d_mmap = nommap, + .d_kqfilter = nokqfilter, + .d_discard = nodiscard, + .d_flag = 0 +}; + +extern struct cfdriver sbusfpga_stat_cd; +int +sbusfpga_stat_open(dev_t dev, int flags, int mode, struct lwp *l) +{ + return (0); +} + +int +sbusfpga_stat_close(dev_t dev, int flags, int mode, struct lwp *l) +{ + return (0); +} + +int +sbusfpga_stat_print(void *aux, const char *busname) +{ + + sbus_print(aux, busname); + return (UNCONF); +} + +int +sbusfpga_stat_match(device_t parent, cfdata_t cf, void *aux) +{ + struct sbus_attach_args *sa = (struct sbus_attach_args *)aux; + + return (strcmp("RDOL,sbusstat", sa->sa_name) == 0); +} + +#define CONFIG_CSR_DATA_WIDTH 32 +// define CSR_LEDS_BASE & others to avoid defining the CSRs of HW we don't handle +#define CSR_LEDS_BASE +#define CSR_CURVE25519ENGINE_BASE +#define CSR_DDRPHY_BASE +#define CSR_EXCHANGE_WITH_MEM_BASE +// #define CSR_SBUS_BUS_STAT_BASE +#define CSR_SDRAM_BASE +#define CSR_SDBLOCK2MEM_BASE +#define CSR_SDCORE_BASE +#define CSR_SDIRQ_BASE +#define CSR_SDMEM2BLOCK_BASE +#define CSR_SDPHY_BASE +#define CSR_TRNG_BASE +#include "dev/sbus/litex_csr.h" +#undef CSR_LEDS_BASE +#undef CSR_CURVE25519ENGINE_BASE +#undef CSR_DDRPHY_BASE +#undef CSR_EXCHANGE_WITH_MEM_BASE +// #undef CSR_SBUS_BUS_STAT_BASE +#undef CSR_SDRAM_BASE +#undef CSR_SDBLOCK2MEM_BASE +#undef CSR_SDCORE_BASE +#undef CSR_SDIRQ_BASE +#undef CSR_SDMEM2BLOCK_BASE +#undef CSR_SDPHY_BASE +//#undef CSR_TRNG_BASE + + +static void sbusfpga_stat_display(void *); + +/* + * Attach all the sub-devices we can find + */ +void +sbusfpga_stat_attach(device_t parent, device_t self, void *aux) +{ + struct sbus_attach_args *sa = aux; + struct sbusfpga_sbus_bus_stat_softc *sc = device_private(self); + struct sbus_softc *sbsc = device_private(parent); + int node; + int sbusburst; + + sc->sc_bustag = sa->sa_bustag; + sc->sc_dev = self; + + if (sbus_bus_map(sc->sc_bustag, sa->sa_slot, sa->sa_offset, sa->sa_size, + BUS_SPACE_MAP_LINEAR, &sc->sc_bhregs_sbus_bus_stat) != 0) { + aprint_error(": cannot map registers\n"); + return; + } + + sc->sc_bufsiz = sa->sa_size; + + node = sc->sc_node = sa->sa_node; + + /* + * Get transfer burst size from PROM + */ + sbusburst = sbsc->sc_burst; + if (sbusburst == 0) + sbusburst = SBUS_BURST_32 - 1; /* 1->16 */ + + sc->sc_burst = prom_getpropint(node, "burst-sizes", -1); + if (sc->sc_burst == -1) + /* take SBus burst sizes */ + sc->sc_burst = sbusburst; + + /* Clamp at parent's burst sizes */ + sc->sc_burst &= sbusburst; + + aprint_normal("\n"); + aprint_normal_dev(self, "nid 0x%x, bustag %p, burst 0x%x (parent 0x%0x)\n", + sc->sc_node, + sc->sc_bustag, + sc->sc_burst, + sbsc->sc_burst); + + sc->sc_delay = 5 * hz; // five seconds + + callout_init(&sc->sc_display, CALLOUT_MPSAFE); + callout_setfunc(&sc->sc_display, sbusfpga_stat_display, sc); + callout_schedule(&sc->sc_display, sc->sc_delay); +} + +static void sbusfpga_stat_display(void *args) { + struct sbusfpga_sbus_bus_stat_softc *sc = args; + unsigned int c = sbus_bus_stat_stat_cycle_counter_read(sc), c2; + int count; + sbus_bus_stat_stat_ctrl_write(sc, 1); + delay(1); + count = 0; + while (count < 10 && ((c2 = sbus_bus_stat_stat_cycle_counter_read(sc)) == c)) { + count ++; + delay(1); + } + if ((c2 == c) || (c2 == 0)){ + device_printf(sc->sc_dev, "Statistics didn't update\n"); + } else { + device_printf(sc->sc_dev, "%u: slave %u %u %u %u\n", + c2, + sbus_bus_stat_stat_slave_start_counter_read(sc), + sbus_bus_stat_stat_slave_done_counter_read(sc), + sbus_bus_stat_stat_slave_rerun_counter_read(sc), + sbus_bus_stat_stat_slave_early_error_counter_read(sc)); + device_printf(sc->sc_dev, "%u: master %u %u %u %u (0x%08x)\n", + c2, + sbus_bus_stat_stat_master_start_counter_read(sc), + sbus_bus_stat_stat_master_done_counter_read(sc), + sbus_bus_stat_stat_master_error_counter_read(sc), + sbus_bus_stat_stat_master_rerun_counter_read(sc), + sbus_bus_stat_sbus_master_error_virtual_read(sc)); + } + sbus_bus_stat_stat_ctrl_write(sc, 0); + callout_schedule(&sc->sc_display, sc->sc_delay); +} diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.h b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.h new file mode 100644 index 0000000..2ab4c48 --- /dev/null +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.h @@ -0,0 +1,44 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2020 Romain Dolbeau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SBUSFPGA_STAT_H_ +#define _SBUSFPGA_STAT_H_ + +struct sbusfpga_sbus_bus_stat_softc { + device_t sc_dev; /* us as a device */ + u_int sc_rev; /* revision */ + int sc_node; /* PROM node ID */ + int sc_burst; /* DVMA burst size in effect */ + bus_space_tag_t sc_bustag; /* bus tag */ + bus_space_handle_t sc_bhregs_sbus_bus_stat; /* bus handle */ + int sc_bufsiz; /* Size of buffer */ + callout_t sc_display; + int sc_delay; +}; + +#endif /* _SBUSFPGA_STAT_H_ */ diff --git a/sbus-to-ztex-gateware-migen/netbsd_csr.h b/sbus-to-ztex-gateware-migen/netbsd_csr.h index f6fd470..4f44e70 100644 --- a/sbus-to-ztex-gateware-migen/netbsd_csr.h +++ b/sbus-to-ztex-gateware-migen/netbsd_csr.h @@ -1,5 +1,5 @@ //-------------------------------------------------------------------------------- -// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-08-21 07:21:33 +// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-08-21 08:42:06 //-------------------------------------------------------------------------------- #ifndef __GENERATED_CSR_H #define __GENERATED_CSR_H @@ -729,12 +729,7 @@ static inline uint32_t exchange_with_mem_dma_status_has_rd_data_read(struct sbus static inline uint32_t exchange_with_mem_wr_tosdram_read(struct sbusfpga_exchange_with_mem_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x2cL); } -#define CSR_EXCHANGE_WITH_MEM_SBUS_MASTER_ERROR_VIRTUAL_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x30L) -#define CSR_EXCHANGE_WITH_MEM_SBUS_MASTER_ERROR_VIRTUAL_SIZE 1 -static inline uint32_t exchange_with_mem_sbus_master_error_virtual_read(struct sbusfpga_exchange_with_mem_softc *sc) { - return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_exchange_with_mem, 0x30L); -} -#define CSR_EXCHANGE_WITH_MEM_CHECKSUM_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x34L) +#define CSR_EXCHANGE_WITH_MEM_CHECKSUM_ADDR (CSR_EXCHANGE_WITH_MEM_BASE + 0x30L) #define CSR_EXCHANGE_WITH_MEM_CHECKSUM_SIZE 8 #endif // CSR_EXCHANGE_WITH_MEM_BASE @@ -788,9 +783,9 @@ static inline uint32_t sbus_bus_stat_stat_slave_start_counter_read(struct sbusfp static inline uint32_t sbus_bus_stat_stat_slave_done_counter_read(struct sbusfpga_sbus_bus_stat_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0x10L); } -#define CSR_SBUS_BUS_STAT_STAT_SLAVE_ERROR_COUNTER_ADDR (CSR_SBUS_BUS_STAT_BASE + 0x14L) -#define CSR_SBUS_BUS_STAT_STAT_SLAVE_ERROR_COUNTER_SIZE 1 -static inline uint32_t sbus_bus_stat_stat_slave_error_counter_read(struct sbusfpga_sbus_bus_stat_softc *sc) { +#define CSR_SBUS_BUS_STAT_STAT_SLAVE_RERUN_COUNTER_ADDR (CSR_SBUS_BUS_STAT_BASE + 0x14L) +#define CSR_SBUS_BUS_STAT_STAT_SLAVE_RERUN_COUNTER_SIZE 1 +static inline uint32_t sbus_bus_stat_stat_slave_rerun_counter_read(struct sbusfpga_sbus_bus_stat_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0x14L); } #define CSR_SBUS_BUS_STAT_STAT_SLAVE_EARLY_ERROR_COUNTER_ADDR (CSR_SBUS_BUS_STAT_BASE + 0x18L) @@ -818,6 +813,11 @@ static inline uint32_t sbus_bus_stat_stat_master_error_counter_read(struct sbusf static inline uint32_t sbus_bus_stat_stat_master_rerun_counter_read(struct sbusfpga_sbus_bus_stat_softc *sc) { return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0x28L); } +#define CSR_SBUS_BUS_STAT_SBUS_MASTER_ERROR_VIRTUAL_ADDR (CSR_SBUS_BUS_STAT_BASE + 0x2cL) +#define CSR_SBUS_BUS_STAT_SBUS_MASTER_ERROR_VIRTUAL_SIZE 1 +static inline uint32_t sbus_bus_stat_sbus_master_error_virtual_read(struct sbusfpga_sbus_bus_stat_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_sbus_bus_stat, 0x2cL); +} #endif // CSR_SBUS_BUS_STAT_BASE /* sdram */ diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py index e2b29c7..61d3eac 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py @@ -23,10 +23,10 @@ class ExchangeWithMem(Module, AutoCSR): data_width_bits = burst_size * 32 blk_addr_width = 32 - log2_int(data_width) # 27 for burst_size == 8 - assert(len(self.dram_dma_writer.sink.data == data_width_bits)) - assert(len(self.dram_dma_reader.source.data == data_width_bits)) - assert(len(self.dram_dma_writer.sink.address == blk_addr_width)) - assert(len(self.dram_dma_reader.sink.address == blk_addr_width)) + assert(len(self.dram_dma_writer.sink.data) == data_width_bits) + assert(len(self.dram_dma_reader.source.data) == data_width_bits) + assert(len(self.dram_dma_writer.sink.address) == blk_addr_width) + assert(len(self.dram_dma_reader.sink.address) == blk_addr_width) #self.wishbone_r_master = wishbone.Interface(data_width=data_width_bits) #self.wishbone_w_master = wishbone.Interface(data_width=data_width_bits) @@ -77,8 +77,6 @@ class ExchangeWithMem(Module, AutoCSR): CSRField("has_rd_data", 1, description = "Data available to write to SBus"), ]) self.wr_tosdram = CSRStatus(32, description = "Last address written to SDRAM") - - self.sbus_master_error_virtual = CSRStatus(32, description = "Virtual address that failed translation phase") if (do_checksum): self.checksum = CSRStorage(data_width_bits, write_from_dev=True, description = "checksum (XOR)"); diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index 793c452..8b92f7f 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -35,6 +35,7 @@ ENGINE_ADDR_PFXB = Signal(12, reset = 0x00b) wishbone_default_timeout = 120 ## must be > sbus_default_timeout sbus_default_timeout = 100 ## must be below 127 as we can wait twice on it inside the 255 cycles +sbus_default_master_throttle = 3 def siz_is_word(siz): return (SIZ_WORD == siz) | (SIZ_BURST2 == siz) | (SIZ_BURST4 == siz) | (SIZ_BURST8 == siz) | (SIZ_BURST16 == siz) @@ -295,7 +296,7 @@ class SBusFPGABus(Module): wishbone_slave_timeout = Signal(log2_int(wishbone_default_timeout, False)) sbus_slave_timeout = Signal(log2_int(sbus_default_timeout, False)) - sbus_master_throttle = Signal(2) + sbus_master_throttle = Signal(log2_int(sbus_default_master_throttle, False)) #self.submodules.led_display = LedDisplay(platform.request_all("user_led")) @@ -382,20 +383,6 @@ class SBusFPGABus(Module): # slave_fsm.ongoing("Slave_Ack_Reg_Write_HWord_Wait_For_Wishbone") | # slave_fsm.ongoing("Slave_Ack_Reg_Write_Byte") | # slave_fsm.ongoing("Slave_Ack_Reg_Write_Byte_Wait_For_Wishbone")) - - master_error_seen = Signal(4, reset = 0) - self.sync += platform.request("user_led", 0).eq(master_error_seen[0:1]) - self.sync += platform.request("user_led", 1).eq(master_error_seen[1:2]) - self.sync += platform.request("user_led", 2).eq(master_error_seen[2:3]) - self.sync += platform.request("user_led", 3).eq(master_error_seen[3:4]) - master_error_details = Signal(4, reset = 0) - self.sync += platform.request("user_led", 4).eq(master_error_details[0:1]) - self.sync += platform.request("user_led", 5).eq(master_error_details[1:2]) - self.sync += platform.request("user_led", 6).eq(master_error_details[2:3]) - self.sync += platform.request("user_led", 7).eq(master_error_details[3:4]) - - self.sbus_master_last_virtual = Signal(32) # last VDMA address put on the bus in master mode - self.sbus_master_error_virtual = Signal(32) # this gets exported to a Wishbone CSR in exchange_with_mem #self.sync += platform.request("user_led", 5).eq(~slave_fsm.ongoing("Idle")) #self.sync += platform.request("user_led", 6).eq(master_data_src_tosbus_fifo) @@ -411,6 +398,9 @@ class SBusFPGABus(Module): stat_master_error_counter = Signal(32) stat_master_rerun_counter = Signal(32) + sbus_master_last_virtual = Signal(32) # last VDMA address put on the bus in master mode + sbus_master_error_virtual = Signal(32) + slave_fsm.act("Reset", #NextValue(self.led_display.value, 0x0000000000), NextValue(sbus_oe_data, 0), @@ -711,43 +701,43 @@ class SBusFPGABus(Module): NextValue(master_size, SIZ_WORD), NextValue(SBUS_3V3_SIZ_o, SIZ_WORD), NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), - NextValue(self.sbus_master_last_virtual, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), + NextValue(sbus_master_last_virtual, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), ], 0x1: [NextValue(master_idx, 3), NextValue(master_size, SIZ_BYTE), NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), - NextValue(self.sbus_master_last_virtual, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), + NextValue(sbus_master_last_virtual, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), ], 0x2: [NextValue(master_idx, 2), NextValue(master_size, SIZ_BYTE), NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 1), self.wishbone_slave.adr)), - NextValue(self.sbus_master_last_virtual, Cat(Signal(2, reset = 1), self.wishbone_slave.adr)), + NextValue(sbus_master_last_virtual, Cat(Signal(2, reset = 1), self.wishbone_slave.adr)), ], 0x4: [NextValue(master_idx, 1), NextValue(master_size, SIZ_BYTE), NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 2), self.wishbone_slave.adr)), - NextValue(self.sbus_master_last_virtual, Cat(Signal(2, reset = 2), self.wishbone_slave.adr)), + NextValue(sbus_master_last_virtual, Cat(Signal(2, reset = 2), self.wishbone_slave.adr)), ], 0x8: [NextValue(master_idx, 0), NextValue(master_size, SIZ_BYTE), NextValue(SBUS_3V3_SIZ_o, SIZ_BYTE), NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 3), self.wishbone_slave.adr)), - NextValue(self.sbus_master_last_virtual, Cat(Signal(2, reset = 3), self.wishbone_slave.adr)), + NextValue(sbus_master_last_virtual, Cat(Signal(2, reset = 3), self.wishbone_slave.adr)), ], 0x3: [NextValue(master_idx, 2), NextValue(master_size, SIZ_HWORD), NextValue(SBUS_3V3_SIZ_o, SIZ_HWORD), NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), - NextValue(self.sbus_master_last_virtual, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), + NextValue(sbus_master_last_virtual, Cat(Signal(2, reset = 0), self.wishbone_slave.adr)), ], 0xc: [NextValue(master_idx, 0), NextValue(master_size, SIZ_HWORD), NextValue(SBUS_3V3_SIZ_o, SIZ_HWORD), NextValue(SBUS_3V3_D_o, Cat(Signal(2, reset = 2), self.wishbone_slave.adr)), - NextValue(self.sbus_master_last_virtual, Cat(Signal(2, reset = 2), self.wishbone_slave.adr)), + NextValue(sbus_master_last_virtual, Cat(Signal(2, reset = 2), self.wishbone_slave.adr)), ], "default":[NextValue(burst_counter, 0), # FIXME if it happens! NextValue(burst_limit_m1, 0), ## only single word for now @@ -780,7 +770,7 @@ class SBusFPGABus(Module): NextValue(burst_counter, 0), NextValue(burst_limit_m1, 3), ## only quadword word for now NextValue(SBUS_3V3_D_o, Cat(Signal(4, reset = 0), self.master_read_buffer_addr)), - NextValue(self.sbus_master_last_virtual, Cat(Signal(4, reset = 0), self.master_read_buffer_addr)), + NextValue(sbus_master_last_virtual, Cat(Signal(4, reset = 0), self.master_read_buffer_addr)), NextValue(SBUS_3V3_PPRD_o, 1), NextValue(SBUS_3V3_SIZ_o, SIZ_BURST4), NextValue(master_we, 0), @@ -802,7 +792,7 @@ class SBusFPGABus(Module): NextValue(burst_counter, 0), NextValue(burst_limit_m1, burst_size - 1), NextValue(SBUS_3V3_D_o, self.tosbus_fifo.dout[0:32]), - NextValue(self.sbus_master_last_virtual, self.tosbus_fifo.dout[0:32]), + NextValue(sbus_master_last_virtual, self.tosbus_fifo.dout[0:32]), NextValue(master_addr, self.tosbus_fifo.dout[2:32]), NextValue(master_data, self.tosbus_fifo.dout[32:64]), NextValue(fifo_buffer, self.tosbus_fifo.dout[32:]), @@ -838,7 +828,7 @@ class SBusFPGABus(Module): NextValue(burst_counter, 0), NextValue(burst_limit_m1, burst_size - 1), NextValue(SBUS_3V3_D_o, self.fromsbus_req_fifo.dout[blk_addr_width:blk_addr_width+32]), - NextValue(self.sbus_master_last_virtual, self.fromsbus_req_fifo.dout[blk_addr_width:blk_addr_width+32]), + NextValue(sbus_master_last_virtual, self.fromsbus_req_fifo.dout[blk_addr_width:blk_addr_width+32]), NextValue(fifo_blk_addr, self.fromsbus_req_fifo.dout[0:blk_addr_width]), NextValue(master_data_src_fromsbus_fifo, 1), self.fromsbus_req_fifo.re.eq(1), @@ -1274,8 +1264,8 @@ class SBusFPGABus(Module): NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), - NextValue(master_error_seen, 1), NextValue(stat_master_error_counter, stat_master_error_counter + 1), + NextValue(sbus_master_error_virtual, sbus_master_last_virtual), NextState("Idle")], ACK_RERUN: ### dunno how to handle that yet, [If(~master_data_src_tosbus_fifo & ~master_data_src_fromsbus_fifo, @@ -1333,10 +1323,8 @@ class SBusFPGABus(Module): NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), - NextValue(master_error_seen, 8), - NextValue(master_error_details, burst_counter), NextValue(stat_master_error_counter, stat_master_error_counter + 1), - NextValue(self.sbus_master_error_virtual, self.sbus_master_last_virtual), + NextValue(sbus_master_error_virtual, sbus_master_last_virtual), NextState("Idle") ], "default": ## other ### burst not handled @@ -1347,9 +1335,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), - NextValue(master_error_seen, 4), NextValue(stat_master_error_counter, stat_master_error_counter + 1), - NextValue(master_error_details, Cat(SBUS_3V3_ACKs_i, Signal(1, reset = 0))), NextState("Idle") ], }) @@ -1396,11 +1382,18 @@ class SBusFPGABus(Module): NextValue(stat_master_rerun_counter, stat_master_rerun_counter + 1), NextState("Idle") ], + ACK_ERR: + [NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextValue(stat_master_error_counter, stat_master_error_counter + 1), + NextValue(sbus_master_error_virtual, sbus_master_last_virtual), + NextState("Idle") + ], "default": [NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), - NextValue(master_error_seen, 1), NextValue(stat_master_error_counter, stat_master_error_counter + 1), NextState("Idle") ], @@ -1417,6 +1410,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), + NextValue(sbus_master_throttle, sbus_default_master_throttle), NextValue(stat_master_done_counter, stat_master_done_counter + 1), NextState("Idle") ) @@ -1470,11 +1464,18 @@ class SBusFPGABus(Module): NextValue(stat_master_rerun_counter, stat_master_rerun_counter + 1), NextState("Idle") ], - "default": ## ACK_ERRS or other + ACK_ERR: ## ACK_ERRS or other + [NextValue(sbus_oe_data, 0), + NextValue(sbus_oe_slave_in, 0), + NextValue(sbus_oe_master_in, 0), + NextValue(stat_master_error_counter, stat_master_error_counter + 1), + NextValue(sbus_master_error_virtual, sbus_master_last_virtual), + NextState("Idle"), + ], + "default": ## other [NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), - NextValue(master_error_seen, 1), NextValue(stat_master_error_counter, stat_master_error_counter + 1), NextState("Idle"), ], @@ -1485,7 +1486,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), - NextValue(sbus_master_throttle, 3), + NextValue(sbus_master_throttle, sbus_default_master_throttle), NextValue(stat_master_done_counter, stat_master_done_counter + 1), NextState("Idle") ) @@ -1717,6 +1718,7 @@ class SBusFPGABus(Module): self.buf_stat_master_done_counter = Signal(32) self.buf_stat_master_error_counter = Signal(32) self.buf_stat_master_rerun_counter = Signal(32) + self.buf_sbus_master_error_virtual = Signal(32) self.stat_update = Signal() stat_update_prev = Signal() @@ -1733,6 +1735,7 @@ class SBusFPGABus(Module): self.buf_stat_master_done_counter.eq(stat_master_done_counter), self.buf_stat_master_error_counter.eq(stat_master_error_counter), self.buf_stat_master_rerun_counter.eq(stat_master_rerun_counter), + self.buf_sbus_master_error_virtual.eq(sbus_master_error_virtual), self.stat_cycle_counter.eq(0), stat_slave_start_counter.eq(0), stat_slave_done_counter.eq(0), @@ -1742,6 +1745,7 @@ class SBusFPGABus(Module): stat_master_done_counter.eq(0), stat_master_error_counter.eq(0), stat_master_rerun_counter.eq(0), + sbus_master_error_virtual.eq(0), ) self.sync += If(stat_update_prev & ~self.stat_update, ## falling edge: reset buffer self.buf_stat_cycle_counter.eq(0), @@ -1753,4 +1757,5 @@ class SBusFPGABus(Module): self.buf_stat_master_done_counter.eq(0), self.buf_stat_master_error_counter.eq(0), self.buf_stat_master_rerun_counter.eq(0), + self.buf_sbus_master_error_virtual.eq(0), ) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index e3cb722..6b49585 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -275,10 +275,6 @@ class SBusFPGA(SoCCore): self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_slave_sys, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) #self.bus.add_master(name="mem_read_master", master=self.exchange_with_mem.wishbone_r_slave) #self.bus.add_master(name="mem_write_master", master=self.exchange_with_mem.wishbone_w_slave) - - self.submodules.sbus_master_error_virtual_sync = BusSynchronizer(width=32, idomain="sbus", odomain="sys") - self.comb += self.sbus_master_error_virtual_sync.i.eq(self.sbus_bus.sbus_master_error_virtual) - self.comb += self.exchange_with_mem.sbus_master_error_virtual.status.eq(self.sbus_master_error_virtual_sync.o) #self.add_sdcard() From 8e94597b3a8eea39bb85d0cce14042e1ece625b3 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 22 Aug 2021 03:54:00 -0400 Subject: [PATCH 61/78] Enable everything (using a Wishbone Crossbar instead of a Shared), some changes to Sbus timeouts, seems that everything play nice together now, also change Engine code to need fewer inputs --- .../sys/dev/sbus/sbusfpga_curve25519engine.c | 367 ++---------------- README.md | 29 +- sbus-to-ztex-gateware-migen/engine.py | 1 + .../engine_code/engine_code.rs | 273 ++++++++++++- sbus-to-ztex-gateware-migen/netbsd_csr.h | 2 +- .../sbus_to_fpga_blk_dma.py | 2 - .../sbus_to_fpga_fsm.py | 19 +- .../sbus_to_fpga_soc.py | 2 +- 8 files changed, 316 insertions(+), 379 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c index bfbcc19..106c6d1 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c @@ -79,10 +79,10 @@ const struct cdevsw sbusfpga_c29e_cdevsw = { extern struct cfdriver sbusfpga_c29e_cd; struct sbusfpga_curve25519engine_montgomeryjob { - uint32_t x0_u[8]; - uint32_t x0_w[8]; - uint32_t x1_u[8]; - uint32_t x1_w[8]; + /* uint32_t x0_u[8]; */ + /* uint32_t x0_w[8]; */ + /* uint32_t x1_u[8]; */ + /* uint32_t x1_w[8]; */ uint32_t affine_u[8]; uint32_t scalar[8]; }; @@ -123,7 +123,7 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st err = start_job(sc); if (err) return err; - delay(10); + delay(1); err = wait_job(sc); if (err) return err; @@ -169,322 +169,8 @@ sbusfpga_curve25519engine_match(device_t parent, cfdata_t cf, void *aux) return (strcmp("betrustedc25519e", sa->sa_name) == 0); } -#if 1 -#if 0 -static const uint32_t program[192] = {0x00480800, - 0x007407cc, - 0x007c07cb, - 0x0049d483, - 0x0079b643, - 0x0079e482, - 0x00659783, - 0x006db783, - 0x0079c683, - 0x0079e482, - 0x0069a783, - 0x0071c783, - 0x00480740, - 0x00500640, - 0x00540680, - 0x005806c0, - 0x005c0700, - 0x00015505, - 0x00780008, - 0x0001e006, - 0x005558c6, - 0x00055505, - 0x00780048, - 0x0005e046, - 0x00097585, - 0x00780088, - 0x0009e086, - 0x005d78c6, - 0x000d7585, - 0x007800c8, - 0x000de0c6, - 0x00100007, - 0x00141047, - 0x007458c6, - 0x0019d105, - 0x00780188, - 0x0019e186, - 0x001c3007, - 0x00202047, - 0x002481c5, - 0x00780248, - 0x0025e246, - 0x007488c6, - 0x0029d1c5, - 0x00780288, - 0x0029e286, - 0x002c9247, - 0x0030a287, - 0x00346907, - 0x00385107, - 0x003c5345, - 0x007803c8, - 0x003de3c6, - 0x0040f187, - 0x0044c607, - 0x00500380, - 0x00540400, - 0x005802c0, - 0x005c0440, - 0x00640500, - 0x00680540, - 0x006c0580, - 0x007005c0, - 0x010004c9, - 0x004e14c6, - 0xdf800809, - 0x0079b643, - 0x0079e482, - 0x00659783, - 0x006db783, - 0x0079c683, - 0x0079e482, - 0x0069a783, - 0x0071c783, - 0x00740640, - 0x00780680, - 0x0001e787, - 0x00040007, - 0x00041047, - 0x00081787, - 0x000c2007, - 0x001030c7, - 0x00144087, - 0x00700940, - 0x00185147, - 0x00721706, - 0x01000709, - 0x00186187, - 0xfe000809, - 0x001c5187, - 0x00700980, - 0x002071c7, - 0x00721706, - 0x01000709, - 0x00208207, - 0xfe000809, - 0x00247207, - 0x007009c0, - 0x00289247, - 0x00721706, - 0x01000709, - 0x0028a287, - 0xfe000809, - 0x002c9287, - 0x00700980, - 0x0030b2c7, - 0x00721706, - 0x01000709, - 0x0030c307, - 0xfe000809, - 0x00347307, - 0x00700a00, - 0x0038d347, - 0x00721706, - 0x01000709, - 0x0038e387, - 0xfe000809, - 0x003cd387, - 0x00700a40, - 0x0040f3c7, - 0x00721706, - 0x01000709, - 0x00410407, - 0xfe000809, - 0x0044f407, - 0x00700a00, - 0x00491447, - 0x00721706, - 0x01000709, - 0x00492487, - 0xfe000809, - 0x004cd487, - 0x00700940, - 0x005134c7, - 0x00721706, - 0x01000709, - 0x00514507, - 0xfe000809, - 0x00543507, - 0x007d5747, - 0x0000000a, - 0x0000000a, - 0x0000000a, - 0x0000000a, -}; -static const uint32_t program_len = 141; -#else -static const uint32_t program[192] = {0x00640840, - 0x00680800, - 0x006c0600, - 0x00700840, - 0x00500a40, - 0x00554505, - 0x00500a00, - 0x00554545, - 0x00500940, - 0x00554545, - 0x00500840, - 0x004d4546, - 0x00480800, - 0x007407cc, - 0x007c07cb, - 0x0049d483, - 0x0079b643, - 0x0079e482, - 0x00659783, - 0x006db783, - 0x0079c683, - 0x0079e482, - 0x0069a783, - 0x0071c783, - 0x00480740, - 0x00500640, - 0x00540680, - 0x005806c0, - 0x005c0700, - 0x00015505, - 0x00780008, - 0x0001e006, - 0x005558c6, - 0x00055505, - 0x00780048, - 0x0005e046, - 0x00097585, - 0x00780088, - 0x0009e086, - 0x005d78c6, - 0x000d7585, - 0x007800c8, - 0x000de0c6, - 0x00100007, - 0x00141047, - 0x007458c6, - 0x0019d105, - 0x00780188, - 0x0019e186, - 0x001c3007, - 0x00202047, - 0x002481c5, - 0x00780248, - 0x0025e246, - 0x007488c6, - 0x0029d1c5, - 0x00780288, - 0x0029e286, - 0x002c9247, - 0x0030a287, - 0x00346907, - 0x00385107, - 0x003c5345, - 0x007803c8, - 0x003de3c6, - 0x0040f187, - 0x0044c607, - 0x00500380, - 0x00540400, - 0x005802c0, - 0x005c0440, - 0x00640500, - 0x00680540, - 0x006c0580, - 0x007005c0, - 0x010004c9, - 0x004e14c6, - 0xdf800809, - 0x0079b643, - 0x0079e482, - 0x00659783, - 0x006db783, - 0x0079c683, - 0x0079e482, - 0x0069a783, - 0x0071c783, - 0x00740640, - 0x00780680, - 0x0001e787, - 0x00040007, - 0x00041047, - 0x00081787, - 0x000c2007, - 0x001030c7, - 0x00144087, - 0x00700940, - 0x00185147, - 0x00721706, - 0x01000709, - 0x00186187, - 0xfe000809, - 0x001c5187, - 0x00700980, - 0x002071c7, - 0x00721706, - 0x01000709, - 0x00208207, - 0xfe000809, - 0x00247207, - 0x007009c0, - 0x00289247, - 0x00721706, - 0x01000709, - 0x0028a287, - 0xfe000809, - 0x002c9287, - 0x00700980, - 0x0030b2c7, - 0x00721706, - 0x01000709, - 0x0030c307, - 0xfe000809, - 0x00347307, - 0x00700a00, - 0x0038d347, - 0x00721706, - 0x01000709, - 0x0038e387, - 0xfe000809, - 0x003cd387, - 0x00700a40, - 0x0040f3c7, - 0x00721706, - 0x01000709, - 0x00410407, - 0xfe000809, - 0x0044f407, - 0x00700a00, - 0x00491447, - 0x00721706, - 0x01000709, - 0x00492487, - 0xfe000809, - 0x004cd487, - 0x00700940, - 0x005134c7, - 0x00721706, - 0x01000709, - 0x00514507, - 0xfe000809, - 0x00543507, - 0x007d5747, - 0x0000000a, - 0x0000000a, - 0x0000000a, -}; -static const uint32_t program_len = 153; -#endif -#else -static const uint32_t program[16] = { - 0x00640a40, - 0x00680840, - 0x0000000a, - 0x0000000a -}; -static const uint32_t program_len = 3; -#endif +static const uint32_t program[192] = {0x00640840, 0x00680800, 0x006c0600, 0x00700840, 0x004c0a80, 0x00480800, 0x007407cc, 0x007c07cb, 0x0049d483, 0x0079b643, 0x0079e482, 0x00659783, 0x006db783, 0x0079c683, 0x0079e482, 0x0069a783, 0x0071c783, 0x00480740, 0x0001a645, 0x00780008, 0x0001e006, 0x0069a8c6, 0x0005a645, 0x00780048, 0x0005e046, 0x0009c6c5, 0x00780088, 0x0009e086, 0x0071c8c6, 0x000dc6c5, 0x007800c8, 0x000de0c6, 0x00100007, 0x00141047, 0x007458c6, 0x0019d105, 0x00780188, 0x0019e186, 0x001c3007, 0x00202047, 0x002481c5, 0x00780248, 0x0025e246, 0x007488c6, 0x0029d1c5, 0x00780288, 0x0029e286, 0x006c9247, 0x0030a287, 0x00346907, 0x00645107, 0x003c5345, 0x007803c8, 0x003de3c6, 0x0068f187, 0x0070c607, 0x010004c9, 0x004e14c6, 0xe5800809, 0x0079b643, 0x0079e482, 0x00659783, 0x006db783, 0x0079c683, 0x0079e482, 0x0069a783, 0x0071c783, 0x00740640, 0x00780680, 0x0001e787, 0x00040007, 0x00041047, 0x00081787, 0x000c2007, 0x001030c7, 0x00144087, 0x00700940, 0x00185147, 0x00721706, 0x01000709, 0x00186187, 0xfe000809, 0x001c5187, 0x00700980, 0x002071c7, 0x00721706, 0x01000709, 0x00208207, 0xfe000809, 0x00247207, 0x007009c0, 0x00289247, 0x00721706, 0x01000709, 0x0028a287, 0xfe000809, 0x002c9287, 0x00700980, 0x0030b2c7, 0x00721706, 0x01000709, 0x0030c307, 0xfe000809, 0x00347307, 0x00700a00, 0x0038d347, 0x00721706, 0x01000709, 0x0038e387, 0xfe000809, 0x003cd387, 0x00700a40, 0x0040f3c7, 0x00721706, 0x01000709, 0x00410407, 0xfe000809, 0x0044f407, 0x00700a00, 0x00491447, 0x00721706, 0x01000709, 0x00492487, 0xfe000809, 0x004cd487, 0x00700940, 0x005134c7, 0x00721706, 0x01000709, 0x00514507, 0xfe000809, 0x00543507, 0x007d5747, 0x0000000a, 0x0000000a, 0x0000000a}; +static const uint32_t program_len = 134; /* * Attach all the sub-devices we can find @@ -622,7 +308,7 @@ static int power_on(struct sbusfpga_curve25519engine_softc *sc) { int err = 0; if ((curve25519engine_power_read(sc) & 1) == 0) { curve25519engine_power_write(sc, 1); - delay(2); + delay(1); } return err; } @@ -698,18 +384,17 @@ static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf return -ENXIO; } - #define REG_BASE(reg) (base + (reg * 32)) #define SUBREG_ADDR(reg, off) (REG_BASE(reg) + (off)*4) for (i = 0 ; i < 8 ; i ++) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(24,i), job->affine_u[i]); - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(25,i), job->x0_u[i]); - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(26,i), job->x0_w[i]); - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(27,i), job->x1_u[i]); - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(28,i), job->x1_w[i]); + /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(25,i), job->x0_u[i]); */ + /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(26,i), job->x0_w[i]); */ + /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(27,i), job->x1_u[i]); */ + /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(28,i), job->x1_w[i]); */ bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(31,i), job->scalar[i]); - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(19,i), ((i == 0) ? 254 : 0)); - delay(1); + /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(19,i), ((i == 0) ? 254 : 0)); */ + /* delay(1); */ } #undef SUBREG_ADDR #undef REG_BASE @@ -719,12 +404,12 @@ static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf #define SUBREG_ADDR(reg, off) (REG_BASE(reg) + (off)*4) for (i = 0 ; i < 8 && !err; i ++) { if (job->affine_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(24,i))) err = EIO; - if (job->x0_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(25,i))) err = EIO; - if (job->x0_w[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(26,i))) err = EIO; - if (job->x1_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(27,i))) err = EIO; - if (job->x1_w[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(28,i))) err = EIO; + /* if (job->x0_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(25,i))) err = EIO; */ + /* if (job->x0_w[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(26,i))) err = EIO; */ + /* if (job->x1_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(27,i))) err = EIO; */ + /* if (job->x1_w[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(28,i))) err = EIO; */ if (job->scalar[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(31,i))) err = EIO; - delay(1); + /* delay(1); */ } if (err) aprint_error_dev(sc->sc_dev, "WRITE - data did not read-write properly\n"); #undef SUBREG_ADDR @@ -752,7 +437,7 @@ static int wait_job(struct sbusfpga_curve25519engine_softc *sc) { while ((status & 1) && (count < 50)) { aprint_normal_dev(sc->sc_dev, "WAIT - ongoing, Curve25519Engine status: 0x%08x [%d]\n", status, count); count ++; - delay(20); + delay(1); status = curve25519engine_status_read(sc); } //curve25519engine_control_write(sc, 0); @@ -778,13 +463,13 @@ static int read_outputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf #define REG_BASE(reg) (base + (reg * 32)) #define SUBREG_ADDR(reg, off) (REG_BASE(reg) + (off)*4) for (i = 0 ; i < 8 ; i ++) { - job->affine_u[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(24,i)); - job->x0_u[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(25,i)); - job->x0_w[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(26,i)); - job->x1_u[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(27,i)); - job->x1_w[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(28,i)); + /* job->affine_u[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(24,i)); */ + /* job->x0_u[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(25,i)); */ + /* job->x0_w[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(26,i)); */ + /* job->x1_u[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(27,i)); */ + /* job->x1_w[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(28,i)); */ job->scalar[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(31,i)); - delay(1); + /* delay(1); */ } aprint_normal_dev(sc->sc_dev, "READ - Curve25519Engine 19 low 32 bits: 0x%08x\n", bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(19,0))); #undef SUBREG_ADDR diff --git a/README.md b/README.md index 7f6d26c..a969c94 100644 --- a/README.md +++ b/README.md @@ -14,27 +14,44 @@ To save on PCB cost, the board is smaller than a 'true' SBus board; the hardware 2021-07-18: The old VHDL gateware has been replaced by a new Migen-based gateware, see below for details. -Short version: the board enables a 256 MiB SDRAM disk (for fast swapping) and a USH OHCI host controller (for USB peripherals). +2021-08-22: Short version: the board enables a 256 MiB SDRAM disk (for fast swapping), a TRNG, a USB OHCI host controller (for USB peripherals) and a Curve25519 accelerator. ## The hardware Directory 'sbus-to-ztex' -The custom board is a SBus-compliant (I hope...) board, designed to receive a [ZTex USB-FPGA Module 2.13](https://www.ztex.de/usb-fpga-2/usb-fpga-2.13.e.html) as a daughterboard. The ZTex module contains the actual FPGA (Artix-7), some RAM, programming hardware, etc. The SBus board contains level-shifters ICs to interface between the SBus signals and the FPGA, a serial header, some Leds, a JTAG header, and a micro-sd card slot. +The custom board is a SBus-compliant (I hope...) board, designed to receive a [ZTex USB-FPGA Module 2.13](https://www.ztex.de/usb-fpga-2/usb-fpga-2.13.e.html) as a daughterboard. The ZTex module contains the actual FPGA (Artix-7), some RAM, programming hardware, etc. The SBus board contains level-shifters ICs to interface between the SBus signals and the FPGA, a serial header, some Leds, a JTAG header, and a micro-sd card slot. It only connects interrupt line 7 (highest priority) and 1 (lowest priority), which was a mistake (more interrupts are needed and 7 is too high-priority to use at this stage, so just the level 1 is usable), but otherwise supports every SBus feature except the optional parity (i.e. it can do both slave and master modes). The PCB was designed with Kicad 5.0 ## The gateware (Migen) -The gateware was rewritten from scrach in the Migen language, choosen because that's what [Litex](https://github.com/enjoy-digital/litex/) uses. +### Intro + +The gateware was rewritten from scratch in the Migen language, choosen because that's what [Litex](https://github.com/enjoy-digital/litex/) uses. It implements a simple CPU-less Litex SoC built around a Wishbone bus, with a bridge between the SBus and the Wishbone. -A ROM, a SDRAM controller (litedram to the on-board DDR3) and an USB OHCI (host controller, using the Litex wrapper around the [SpinalHDL](https://github.com/SpinalHDL/SpinalHDL) implementation) are connected to that bus. +A ROM, a SDRAM controller ([litedram](https://github.com/enjoy-digital/litedram) to the on-board DDR3), a TRNG (using the [NeoRV32](https://github.com/stnolting/neorv32) TRNG), an USB OHCI (host controller, using the Litex wrapper around the [SpinalHDL](https://github.com/SpinalHDL/SpinalHDL) implementation) and a Curve25519 Crypto Engine (taken from the [Betrusted.IO](https://betrusted.io/) project) are connected to that bus. + +### Details + Master access to the SBus by the host are routed to the Wishbone to access the various CSRs / control registers of the devices. -The USB OHCI DMA is bridged from the Wishbone to the SBus by having the physical addresses of the Wishbone (that match the virtual addresses from NetBSD DVMA allocations) to the bridge. Reads are buffered by block of 16 bytes; currently writes are unbuffered (and somwhat slow, as they need a full SBus master cycle for every transaction of 32 bits or less). The standard NetBSD OHCI driver is used, with just a small custom SBus-OHCI driver mirroring the PCI-OHCI one. +The ROM doesn't do much beyond exposing the devices' existence and specifications to the host. -The SDRAM has its own custom DMA controller, using native Litedram DMA to the memory, and some FIFO to/from the SBus. A custom NetBSD driver exposes it as a drive on which you can swap. It might also be usable as a 'fast', volatile disk, but I haven't tried that yet. +The SDRAM has its own custom DMA controller, using native Litedram DMA to the memory, and some FIFO to/from the SBus. A custom NetBSD driver exposes it as a drive on which you can swap. It's also usable as a 'fast', volatile disk (for e.g. /tmp or similar temporary filesystem). It could use a interrupt line, but the only usable one in the current HW design is in use by the USB. + +The TRNG has a NetBSD driver to add entropy to the entropy pool. + +The USB OHCI DMA is bridged from the Wishbone to the SBus by having the physical addresses of the Wishbone (that match the virtual addresses from NetBSD DVMA allocations) to the bridge. Reads are buffered by block of 16 bytes; currently writes are unbuffered (and somewhat slow, as they need a full SBus master cycle for every transaction of 32 bits or less). The standard NetBSD OHCI driver is used, with just a small custom SBus-OHCI driver mirroring the PCI-OHCI one. It uses the interrupt level 1 available on the board. As the board has no USB connectors, the D+ and D- lines are routed to the Serial header pins, those (and GND) are connected to a pair of pins of [Dolu1990's USB PMod](https://github.com/Dolu1990/pmod_usb_host_x4), and the associated USB port is connected to an external self-powered USB hub (which is the one supplying the VBus). It's quite ugly but it works (of course I should redesign the PCB with a proper USB connector and a VBus). + +The Curve25519 Engine currently exposes an IOCTL to do the computation, which has yet to be integrated usefully in e.g. OpenSSL. It could use a interrupt line, but the only usable one in the current HW design is in use by the USB. + +### Special Notes + +Currently the design uses a Wishbone Crossbar Interconnect from Litex instead of a Shared Interconnect, as for some reason using a Shared Interconnect causes issues between devices (disabling the USB OHCI seem also to solve the issue, it generates a lot of cycles on the buses). I might be misusing Wishbone. With the Crossbar, all devices are usable simultaneously. + +As not everything lives in the same clock domain, the design also use a Wishbone CDC, a wrapper around the one from [Verilog Wishbone Components](https://github.com/alexforencich/verilog-wishbone). ## The gateware (VHDL, obsolete) diff --git a/sbus-to-ztex-gateware-migen/engine.py b/sbus-to-ztex-gateware-migen/engine.py index 9938a75..43c0626 100644 --- a/sbus-to-ztex-gateware-migen/engine.py +++ b/sbus-to-ztex-gateware-migen/engine.py @@ -228,6 +228,7 @@ class Curve25519Const(Module, AutoDoc): 7: [20, "twenty", "The number 20 (for pow22501)"], 8: [50, "fifty", "The number 50 (for pow22501)"], 9: [100, "one hundred", "The number 100 (for pow22501)"], + 10: [254, "two hundred fifty four", "The number 254 (iteration count)"], } self.adr = Signal(5) self.const = Signal(256) diff --git a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs index 6f9dff0..0182e10 100644 --- a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs +++ b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs @@ -20,23 +20,6 @@ fn main() -> std::io::Result<()> { // %19 is the loop counter, starts with 254 (if 0, loop runs exactly once) // I // %31 is the scalar // I // %18 is the swap variable - // START NEW - psa %25, #1 - psa %26, #0 - psa %27, %24 - psa %28, #1 - // #9 is 100 - psa %20, #9 - add %21, %20, %20 - // #8 is 50 - psa %20, #8 - add %21, %21, %20 - // #5 is 5 - psa %20, #5 - add %21, %21, %20 - psa %20, #1 - sub %19, %21, %20 - // END NEW psa %18, #0 // for i in (0..255).rev() @@ -284,6 +267,255 @@ fn main() -> std::io::Result<()> { mul %31, %29, %21 fin // finish execution ); + let mcode_upd = assemble_engine25519!( + start: + // P.U in %20 + // P.W in %21 + // Q.U in %22 + // Q.W in %23 + // affine_PmQ in %24 // I + // %30 is the TRD scratch register and cswap dummy + // %29 is the subtraction temporary value register and k_t + // x0.U in %25 // !I + // x0.W in %26 // !I + // x1.U in %27 // !I + // x1.W in %28 // !I + // %19 is the loop counter, starts with 254 (if 0, loop runs exactly once) // I + // %31 is the scalar // I + // %18 is the swap variable + psa %25, #1 + psa %26, #0 + psa %27, %24 + psa %28, #1 + // #10 is 254 in my Engine + psa %19, #10 + psa %18, #0 + + // for i in (0..255).rev() + mainloop: + // let choice: u8 = (bits[i + 1] ^ bits[i]) as u8; + // ProjectivePoint::conditional_swap(&mut x0, &mut x1, choice.into()); + xbt %29, %31 // orignally[k_t = (k>>t) & 1] now[k_t = k[254]] + shl %31, %31 // k = k<<1 + xor %18, %18, %29 // swap ^= k_t + + // cswap x0.U (%25), x1.U (%27) + xor %30, %25, %27 + msk %30, %18, %30 + xor %25, %30, %25 + xor %27, %30, %27 + // cswap x0.W (%26), x1.W (%28) + xor %30, %26, %28 + msk %30, %18, %30 + xor %26, %30, %26 + xor %28, %30, %28 + + psa %18, %29 // swap = k_t + + // differential_add_and_double(&mut x0, &mut x1, &affine_u); + // affine_u is already in %24 + + // let t0 = &P.U + &P.W; + add %0, %25, %26 + trd %30, %0 + sub %0, %0, %30 + // let t1 = &P.U - &P.W; + sub %26, #3, %26 // negate &P.W using #FIELDPRIME (#3) + add %1, %25, %26 + trd %30, %1 + sub %1, %1, %30 + // let t2 = &Q.U + &Q.W; + add %2, %27, %28 + trd %30, %2 + sub %2, %2, %30 + // let t3 = &Q.U - &Q.W; + sub %28, #3, %28 + add %3, %27, %28 + trd %30, %3 + sub %3, %3, %30 + // let t4 = t0.square(); // (U_P + W_P)^2 = U_P^2 + 2 U_P W_P + W_P^2 + mul %4, %0, %0 + // let t5 = t1.square(); // (U_P - W_P)^2 = U_P^2 - 2 U_P W_P + W_P^2 + mul %5, %1, %1 + // let t6 = &t4 - &t5; // 4 U_P W_P + sub %29, #3, %5 + add %6, %4, %29 + trd %30, %6 + sub %6, %6, %30 + // let t7 = &t0 * &t3; // (U_P + W_P) (U_Q - W_Q) = U_P U_Q + W_P U_Q - U_P W_Q - W_P W_Q + mul %7, %0, %3 + // let t8 = &t1 * &t2; // (U_P - W_P) (U_Q + W_Q) = U_P U_Q - W_P U_Q + U_P W_Q - W_P W_Q + mul %8, %1, %2 + // let t9 = &t7 + &t8; // 2 (U_P U_Q - W_P W_Q) + add %9, %7, %8 + trd %30, %9 + sub %9, %9, %30 + // let t10 = &t7 - &t8; // 2 (W_P U_Q - U_P W_Q) + sub %29, #3, %8 + add %10, %7, %29 + trd %30, %10 + sub %10, %10, %30 + // let t11 = t9.square(); // 4 (U_P U_Q - W_P W_Q)^2 + mul %27, %9, %9 + // let t12 = t10.square(); // 4 (W_P U_Q - U_P W_Q)^2 + mul %12, %10, %10 + // let t13 = &APLUS2_OVER_FOUR * &t6; // (A + 2) U_P U_Q + mul %13, #4, %6 // #4 is A+2/4 + // let t14 = &t4 * &t5; // ((U_P + W_P)(U_P - W_P))^2 = (U_P^2 - W_P^2)^2 + mul %25, %4, %5 + // let t15 = &t13 + &t5; // (U_P - W_P)^2 + (A + 2) U_P W_P + add %15, %13, %5 + trd %30, %15 + sub %15, %15, %30 + // let t16 = &t6 * &t15; // 4 (U_P W_P) ((U_P - W_P)^2 + (A + 2) U_P W_P) + mul %26, %6, %15 + // let t17 = affine_PmQ * &t12; // U_D * 4 (W_P U_Q - U_P W_Q)^2 + mul %28, %24, %12 // affine_PmQ loaded into %24 + + brz end, %19 // if loop counter is 0, quit + sub %19, %19, #1 // subtract one from the loop counter and run again + brz mainloop, #0 // go back to the top + end: + // ProjectivePoint::conditional_swap(&mut x0, &mut x1, Choice::from(bits[0] as u8)); + // cswap x0.U (%25), x1.U (%27) + xor %30, %25, %27 + msk %30, %18, %30 + xor %25, %30, %25 + xor %27, %30, %27 + // cswap x0.W (%26), x1.W (%28) + xor %30, %26, %28 + msk %30, %18, %30 + xor %26, %30, %26 + xor %28, %30, %28 + + // AFFINE SPLICE -- pass arguments to the affine block + psa %29, %25 + psa %30, %26 + // W.invert() in %21 + // U in %29 + // W in %30 + // result in %31 + // loop counter in %28 + + // from FieldElement.invert() + // let (t19, t3) = self.pow22501(); // t19: 249..0 ; t3: 3,1,0 + // let t0 = self.square(); // 1 e_0 = 2^1 + mul %0, %30, %30 // self is W, e.g. %30 + // let t1 = t0.square().square(); // 3 e_1 = 2^3 + mul %1, %0, %0 + mul %1, %1, %1 + // let t2 = self * &t1; // 3,0 e_2 = 2^3 + 2^0 + mul %2, %30, %1 + // let t3 = &t0 * &t2; // 3,1,0 + mul %3, %0, %2 + // let t4 = t3.square(); // 4,2,1 + mul %4, %3, %3 + // let t5 = &t2 * &t4; // 4,3,2,1,0 + mul %5, %2, %4 + + // let t6 = t5.pow2k(5); // 9,8,7,6,5 + psa %28, #5 // coincidentally, constant #5 is the number 5 + mul %6, %5, %5 + pow2k_5: + sub %28, %28, #1 // %28 = %28 - 1 + brz pow2k_5_exit, %28 + mul %6, %6, %6 + brz pow2k_5, #0 + pow2k_5_exit: + // let t7 = &t6 * &t5; // 9,8,7,6,5,4,3,2,1,0 + mul %7, %6, %5 + + // let t8 = t7.pow2k(10); // 19..10 + psa %28, #6 // constant #6 is the number 10 + mul %8, %7, %7 + pow2k_10: + sub %28, %28, #1 + brz pow2k_10_exit, %28 + mul %8, %8, %8 + brz pow2k_10, #0 + pow2k_10_exit: + // let t9 = &t8 * &t7; // 19..0 + mul %9, %8, %7 + + // let t10 = t9.pow2k(20); // 39..20 + psa %28, #7 // constant #7 is the number 20 + mul %10, %9, %9 + pow2k_20: + sub %28, %28, #1 + brz pow2k_20_exit, %28 + mul %10, %10, %10 + brz pow2k_20, #0 + pow2k_20_exit: + // let t11 = &t10 * &t9; // 39..0 + mul %11, %10, %9 + + // let t12 = t11.pow2k(10); // 49..10 + psa %28, #6 // constant #6 is the number 10 + mul %12, %11, %11 + pow2k_10b: + sub %28, %28, #1 + brz pow2k_10b_exit, %28 + mul %12, %12, %12 + brz pow2k_10b, #0 + pow2k_10b_exit: + // let t13 = &t12 * &t7; // 49..0 + mul %13, %12, %7 + + // let t14 = t13.pow2k(50); // 99..50 + psa %28, #8 // constant #8 is the number 50 + mul %14, %13, %13 + pow2k_50a: + sub %28, %28, #1 + brz pow2k_50a_exit, %28 + mul %14, %14, %14 + brz pow2k_50a, #0 + pow2k_50a_exit: + // let t15 = &t14 * &t13; // 99..0 + mul %15, %14, %13 + + // let t16 = t15.pow2k(100); // 199..100 + psa %28, #9 // constant #9 is the number 100 + mul %16, %15, %15 + pow2k_100: + sub %28, %28, #1 + brz pow2k_100_exit, %28 + mul %16, %16, %16 + brz pow2k_100, #0 + pow2k_100_exit: + // let t17 = &t16 * &t15; // 199..0 + mul %17, %16, %15 + + // let t18 = t17.pow2k(50); // 249..50 + psa %28, #8 // constant #8 is the number 50 + mul %18, %17, %17 + pow2k_50b: + sub %28, %28, #1 + brz pow2k_50b_exit, %28 + mul %18, %18, %18 + brz pow2k_50b, #0 + pow2k_50b_exit: + // let t19 = &t18 * &t13; // 249..0 + mul %19, %18, %13 + //(t19, t3) // just a return value, values are already there, do nothing + + //let t20 = t19.pow2k(5); // 254..5 + psa %28, #5 + mul %20, %19, %19 + pow2k_5_last: + sub %28, %28, #1 + brz pow2k_5_last_exit, %28 + mul %20, %20, %20 + brz pow2k_5_last, #0 + pow2k_5_last_exit: + + //let t21 = &t20 * &t3; // 254..5,3,1,0 + mul %21, %20, %3 + + // u = &self.U * &self.W.invert() + mul %31, %29, %21 + fin // finish execution + ); + let mcode2 = assemble_engine25519!( start: // P.U in %20 @@ -302,11 +534,14 @@ fn main() -> std::io::Result<()> { // %18 is the swap variable psa %25, #9 psa %26, #1 + mul %27, %25, %26 + mul %28, %25, %25 + mul %31, %24, %24 fin ); let mut pos = 0; - while pos < mcode.len() { - println!("0x{:08x},", mcode[pos]); + while pos < mcode_upd.len() { + println!("0x{:08x},", mcode_upd[pos]); pos = pos + 1; } Ok(()) diff --git a/sbus-to-ztex-gateware-migen/netbsd_csr.h b/sbus-to-ztex-gateware-migen/netbsd_csr.h index 4f44e70..8fd6f36 100644 --- a/sbus-to-ztex-gateware-migen/netbsd_csr.h +++ b/sbus-to-ztex-gateware-migen/netbsd_csr.h @@ -1,5 +1,5 @@ //-------------------------------------------------------------------------------- -// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-08-21 08:42:06 +// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-08-22 03:23:02 //-------------------------------------------------------------------------------- #ifndef __GENERATED_CSR_H #define __GENERATED_CSR_H diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py index 61d3eac..af0fe63 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_blk_dma.py @@ -25,8 +25,6 @@ class ExchangeWithMem(Module, AutoCSR): assert(len(self.dram_dma_writer.sink.data) == data_width_bits) assert(len(self.dram_dma_reader.source.data) == data_width_bits) - assert(len(self.dram_dma_writer.sink.address) == blk_addr_width) - assert(len(self.dram_dma_reader.sink.address) == blk_addr_width) #self.wishbone_r_master = wishbone.Interface(data_width=data_width_bits) #self.wishbone_w_master = wishbone.Interface(data_width=data_width_bits) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index 8b92f7f..ce1462c 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -33,8 +33,8 @@ ENGINE_ADDR_PFXA = Signal(12, reset = 0x00a) ENGINE_ADDR_PFXB = Signal(12, reset = 0x00b) #SDRAM_ADDR_PFX = Signal(12, reset = 2048) -wishbone_default_timeout = 120 ## must be > sbus_default_timeout -sbus_default_timeout = 100 ## must be below 127 as we can wait twice on it inside the 255 cycles +wishbone_default_timeout = 120 ## +sbus_default_timeout = 50 ## must be below 255 sbus_default_master_throttle = 3 def siz_is_word(siz): @@ -582,6 +582,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_WORD), NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), + NextValue(sbus_slave_timeout, sbus_default_timeout), NextState("Slave_Ack_Reg_Write_Burst") ).Else( NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), @@ -610,6 +611,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_BYTE), NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), + NextValue(sbus_slave_timeout, sbus_default_timeout), NextState("Slave_Ack_Reg_Write_Byte") ).Else( NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), @@ -644,6 +646,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_HWORD), NextValue(SBUS_3V3_ERRs_o, 1), #NextValue(self.led_display.value, 0x0000000010 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), + NextValue(sbus_slave_timeout, sbus_default_timeout), NextState("Slave_Ack_Reg_Write_HWord") ).Else( NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), @@ -888,7 +891,6 @@ class SBusFPGABus(Module): NextValue(self.wishbone_master.sel, 2**len(self.wishbone_master.sel)-1), NextValue(self.wishbone_master.we, 0), NextValue(wishbone_master_timeout, wishbone_default_timeout), - NextValue(sbus_slave_timeout, sbus_default_timeout), NextValue(self.wishbone_master.adr, Cat(index_with_wrap(burst_counter+1, burst_limit_m1, sbus_last_pa[ADDR_PHYS_LOW+2:ADDR_PHYS_LOW+6]), # 4 bits, adr FIXME sbus_last_pa[ADDR_PHYS_LOW+6:ADDR_PFX_LOW], # 10 bits, adr sbus_last_pa[ADDR_PFX_LOW:ADDR_PFX_LOW+ADDR_PFX_LENGTH], # 12 bits, adr @@ -932,7 +934,6 @@ class SBusFPGABus(Module): NextValue(self.wishbone_master.we, 0), NextValue(self.wishbone_master.adr, Cat(sbus_last_pa[2:28], Signal(4, reset = 0))), NextValue(wishbone_master_timeout, wishbone_default_timeout), - NextValue(sbus_slave_timeout, sbus_slave_timeout), #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), NextState("Slave_Ack_Read_Reg_Burst_Wait_For_Data") ).Elif(sbus_slave_timeout == 0, ### this is taking too long @@ -994,7 +995,6 @@ class SBusFPGABus(Module): NextValue(self.wishbone_master.we, 0), NextValue(self.wishbone_master.adr, Cat(sbus_last_pa[2:28], Signal(4, reset = 0))), NextValue(wishbone_master_timeout, wishbone_default_timeout), - NextValue(sbus_slave_timeout, sbus_slave_timeout), #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), NextState("Slave_Ack_Read_Reg_HWord_Wait_For_Data") ).Elif(sbus_slave_timeout == 0, ### this is taking too long @@ -1054,7 +1054,6 @@ class SBusFPGABus(Module): NextValue(self.wishbone_master.we, 0), NextValue(self.wishbone_master.adr, Cat(sbus_last_pa[2:28], Signal(4, reset = 0))), NextValue(wishbone_master_timeout, wishbone_default_timeout), - NextValue(sbus_slave_timeout, sbus_slave_timeout), #NextValue(self.led_display.value, 0x0000000000 | Cat(Signal(8, reset = 0), SBUS_3V3_PA_i, Signal(4, reset = 0))), NextState("Slave_Ack_Read_Reg_Byte_Wait_For_Data") ).Elif(sbus_slave_timeout == 0, ### this is taking too long @@ -1088,8 +1087,9 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), NextState("Slave_Ack_Reg_Write_Final") ).Else( - NextValue(SBUS_3V3_ACKs_o, ACK_WORD), - NextValue(burst_counter, burst_counter + 1) + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextValue(burst_counter, burst_counter + 1), + NextState("Slave_Ack_Reg_Write_Burst_Wait_For_Wishbone"), ) ) slave_fsm.act("Slave_Ack_Reg_Write_Final", @@ -1204,12 +1204,13 @@ class SBusFPGABus(Module): ) # ##### SLAVE ERROR ##### slave_fsm.act("Slave_Error", - NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), + NextValue(SBUS_3V3_ACKs_o, ACK_IDLE), #NextValue(self.led_display.value, 0x0000000080 | self.led_display.value), If(((SBUS_3V3_ASs_i == 1) | ((SBUS_3V3_ASs_i == 0) & (SBUS_3V3_SELs_i == 1))), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), + NextValue(sbus_slave_timeout, 0), NextState("Idle") ) ) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 6b49585..777ae4d 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -85,7 +85,7 @@ class _CRG(Module): ##platform.add_false_path_constraints(self.cd_native.clk, self.cd_sys.clk) self.submodules.curve25519_pll = curve25519_pll = S7MMCM(speedgrade=-1) - curve25519_clk_freq = 80e6 + curve25519_clk_freq = 90e6 self.curve25519_on = Signal() #curve25519_pll.register_clkin(clk48, 48e6) curve25519_pll.register_clkin(self.clk48_bufg, 48e6) From 6d142636c2553d2d20a21cac1963caca10aa6b90 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 22 Aug 2021 09:08:48 -0400 Subject: [PATCH 62/78] Clean-up the master code (src) --- .../sys/dev/sbus/sbusfpga_curve25519engine.c | 13 +- sbus-to-ztex-gateware-migen/netbsd_csr.h | 2 +- .../sbus_to_fpga_fsm.py | 205 ++++++++++++------ 3 files changed, 145 insertions(+), 75 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c index 106c6d1..2e0e000 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c @@ -105,9 +105,6 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st struct sbusfpga_curve25519engine_montgomeryjob* job = (struct sbusfpga_curve25519engine_montgomeryjob*)data; int err = 0; - /* first we need to turn the engine power on ... */ - power_on(sc); - if (!sc->initialized) { if (init_program(sc)) { return ENXIO; @@ -137,19 +134,25 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st break; } - power_off(sc); return(err); } int sbusfpga_curve25519engine_open(dev_t dev, int flags, int mode, struct lwp *l) { + struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, minor(dev)); + /* first we need to turn the engine power on ... */ + power_on(sc); + return (0); } int sbusfpga_curve25519engine_close(dev_t dev, int flags, int mode, struct lwp *l) { + struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, minor(dev)); + power_off(sc); + return (0); } @@ -325,7 +328,7 @@ static int init_program(struct sbusfpga_curve25519engine_softc *sc) { for (i = 0 ; i < program_len + 1 ; i++) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_microcode, (i*4), program[i]); - if ((i%8)==7) + if ((i%16)==15) delay(1); } diff --git a/sbus-to-ztex-gateware-migen/netbsd_csr.h b/sbus-to-ztex-gateware-migen/netbsd_csr.h index 8fd6f36..277fb33 100644 --- a/sbus-to-ztex-gateware-migen/netbsd_csr.h +++ b/sbus-to-ztex-gateware-migen/netbsd_csr.h @@ -1,5 +1,5 @@ //-------------------------------------------------------------------------------- -// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-08-22 03:23:02 +// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-08-22 07:40:46 //-------------------------------------------------------------------------------- #ifndef __GENERATED_CSR_H #define __GENERATED_CSR_H diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py index ce1462c..e3204f0 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsm.py @@ -280,11 +280,14 @@ class SBusFPGABus(Module): # buffers when someone inside issues a DMA write request to go over SBus master_data = Signal(32) # could be merged with p_data master_addr = Signal(30) # could be meged with data_read_addr - # FIXME, ugly - # we're handling a request from the FIFO (not wishbone) - write to host - master_data_src_tosbus_fifo = Signal() - # we're handling a request from the FIFO (not wishbone) - read from host - master_data_src_fromsbus_fifo = Signal() + + MASTER_SRC_INV = 0 + MASTER_SRC_BLKDMAFIFO = 1 + MASTER_SRC_WISHBONE = 2 + MASTER_SRC_WISHBONEBUF = 3 + master_src = Signal(2) + master_src_retry = Signal(1) # reset after each successful master cycle + master_size = Signal(4) master_idx = Signal(2) @@ -385,8 +388,6 @@ class SBusFPGABus(Module): # slave_fsm.ongoing("Slave_Ack_Reg_Write_Byte_Wait_For_Wishbone")) #self.sync += platform.request("user_led", 5).eq(~slave_fsm.ongoing("Idle")) - #self.sync += platform.request("user_led", 6).eq(master_data_src_tosbus_fifo) - #self.sync += platform.request("user_led", 7).eq(master_data_src_fromsbus_fifo) stat_slave_start_counter = Signal(32) stat_slave_done_counter = Signal(32) @@ -671,6 +672,26 @@ class SBusFPGABus(Module): (wishbone_slave_timeout == 0), ## sel == 0 so nothing to write, don't acquire the SBus NextValue(self.wishbone_slave.ack, 1), + ).Elif(SBUS_3V3_BGs_i & ## highest priority are retries, otherwise we'd lose the data + master_src_retry & + (master_we == 0) & + (master_src == MASTER_SRC_BLKDMAFIFO) & + (sbus_master_throttle == 0), + NextValue(SBUS_3V3_BRs_o, 0) + ).Elif(~SBUS_3V3_BGs_i & ## highest priority are retries, otherwise we'd lose the data + master_src_retry & + (master_we == 0) & + (master_src == MASTER_SRC_BLKDMAFIFO), + NextValue(sbus_wishbone_le, 0), # checkme + NextValue(SBUS_3V3_BRs_o, 1), # relinquish the request + NextValue(sbus_oe_data, 1), ## output data (at least for @ during translation) + NextValue(sbus_oe_slave_in, 1), ## PPRD, SIZ becomes output + NextValue(sbus_oe_master_in, 0), ## ERRs, ACKs are input + NextValue(burst_counter, 0), + NextValue(SBUS_3V3_D_o, sbus_master_last_virtual), + NextValue(SBUS_3V3_PPRD_o, 1), + #NextValue(stat_master_start_counter, stat_master_start_counter + 1), + NextState("Master_Translation"), ).Elif(SBUS_3V3_BGs_i & self.wishbone_slave.cyc & self.wishbone_slave.stb & @@ -698,6 +719,7 @@ class SBusFPGABus(Module): self.wishbone_slave.dat_w[16:24], self.wishbone_slave.dat_w[ 8:16], self.wishbone_slave.dat_w[ 0: 8])), + NextValue(master_src, MASTER_SRC_WISHBONE), Case(self.wishbone_slave.sel, { 0xf: [NextValue(burst_counter, 0), NextValue(burst_limit_m1, 0), ## only single word for now @@ -749,7 +771,6 @@ class SBusFPGABus(Module): #NextValue(led0123, self.wishbone_slave.sel) ] }), -# NextValue(master_data, self.wishbone_slave.dat_w), NextValue(self.wishbone_slave.ack, 1), NextValue(wishbone_slave_timeout, wishbone_default_timeout), NextValue(SBUS_3V3_PPRD_o, 0), @@ -774,6 +795,7 @@ class SBusFPGABus(Module): NextValue(burst_limit_m1, 3), ## only quadword word for now NextValue(SBUS_3V3_D_o, Cat(Signal(4, reset = 0), self.master_read_buffer_addr)), NextValue(sbus_master_last_virtual, Cat(Signal(4, reset = 0), self.master_read_buffer_addr)), + NextValue(master_src, MASTER_SRC_WISHBONEBUF), NextValue(SBUS_3V3_PPRD_o, 1), NextValue(SBUS_3V3_SIZ_o, SIZ_BURST4), NextValue(master_we, 0), @@ -799,7 +821,7 @@ class SBusFPGABus(Module): NextValue(master_addr, self.tosbus_fifo.dout[2:32]), NextValue(master_data, self.tosbus_fifo.dout[32:64]), NextValue(fifo_buffer, self.tosbus_fifo.dout[32:]), - NextValue(master_data_src_tosbus_fifo, 1), + NextValue(master_src, MASTER_SRC_BLKDMAFIFO), self.tosbus_fifo.re.eq(1), Case(burst_size, { 2 : [NextValue(SBUS_3V3_SIZ_o, SIZ_BURST2), @@ -833,7 +855,7 @@ class SBusFPGABus(Module): NextValue(SBUS_3V3_D_o, self.fromsbus_req_fifo.dout[blk_addr_width:blk_addr_width+32]), NextValue(sbus_master_last_virtual, self.fromsbus_req_fifo.dout[blk_addr_width:blk_addr_width+32]), NextValue(fifo_blk_addr, self.fromsbus_req_fifo.dout[0:blk_addr_width]), - NextValue(master_data_src_fromsbus_fifo, 1), + NextValue(master_src, MASTER_SRC_BLKDMAFIFO), self.fromsbus_req_fifo.re.eq(1), Case(burst_size, { 2 : [NextValue(SBUS_3V3_SIZ_o, SIZ_BURST2), @@ -1250,18 +1272,26 @@ class SBusFPGABus(Module): master_data[16:32],)), }) }), - If(master_data_src_tosbus_fifo, - NextValue(master_data, fifo_buffer[32:64]), # 0:32 is on the bus already - ), + Case(master_src, { + MASTER_SRC_BLKDMAFIFO: + [NextValue(master_data, fifo_buffer[32:64]), # 0:32 is on the bus already + ], + }), ).Else( NextValue(sbus_oe_data, 0) ), Case(SBUS_3V3_ACKs_i, { ACK_ERR: ## ouch - [If(~master_data_src_tosbus_fifo & ~master_data_src_fromsbus_fifo, - NextValue(wishbone_slave_timeout, wishbone_default_timeout), - NextValue(self.wishbone_slave.err, 1), - ), + [Case(master_src, { + MASTER_SRC_WISHBONE: + [NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + ], + MASTER_SRC_WISHBONEBUF: + [NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + ], + }), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), @@ -1269,10 +1299,16 @@ class SBusFPGABus(Module): NextValue(sbus_master_error_virtual, sbus_master_last_virtual), NextState("Idle")], ACK_RERUN: ### dunno how to handle that yet, - [If(~master_data_src_tosbus_fifo & ~master_data_src_fromsbus_fifo, - NextValue(wishbone_slave_timeout, wishbone_default_timeout), - NextValue(self.wishbone_slave.err, 1), - ), + [Case(master_src, { + MASTER_SRC_WISHBONE: + [NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + ], + MASTER_SRC_WISHBONEBUF: + [NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + ], + }), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), @@ -1306,10 +1342,19 @@ class SBusFPGABus(Module): [NextState("Master_Read") ## redundant ], ACK_RERUN: ### burst not handled - [If(~master_data_src_tosbus_fifo & ~master_data_src_fromsbus_fifo, - NextValue(wishbone_slave_timeout, wishbone_default_timeout), - NextValue(self.wishbone_slave.err, 1), - ), + [Case(master_src, { + MASTER_SRC_WISHBONE: + [NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + ], + MASTER_SRC_WISHBONEBUF: + [NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + ], + MASTER_SRC_BLKDMAFIFO: + [NextValue(master_src_retry, 1), + ], + }), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), @@ -1317,10 +1362,19 @@ class SBusFPGABus(Module): NextState("Idle") ], ACK_ERR: ## ### burst not handled - [If(~master_data_src_tosbus_fifo & ~master_data_src_fromsbus_fifo, - NextValue(wishbone_slave_timeout, wishbone_default_timeout), - NextValue(self.wishbone_slave.err, 1), - ), + [Case(master_src, { + MASTER_SRC_WISHBONE: + [NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + ], + MASTER_SRC_WISHBONEBUF: + [NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + ], + MASTER_SRC_BLKDMAFIFO: + [NextValue(master_src_retry, ~master_src_retry), # only retry if this wasn't a retry + ], + }), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), @@ -1329,10 +1383,16 @@ class SBusFPGABus(Module): NextState("Idle") ], "default": ## other ### burst not handled - [If(~master_data_src_tosbus_fifo & ~master_data_src_fromsbus_fifo, - NextValue(wishbone_slave_timeout, wishbone_default_timeout), - NextValue(self.wishbone_slave.err, 1), - ), + [Case(master_src, { + MASTER_SRC_WISHBONE: + [NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + ], + MASTER_SRC_WISHBONEBUF: + [NextValue(wishbone_slave_timeout, wishbone_default_timeout), + NextValue(self.wishbone_slave.err, 1), + ], + }), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), @@ -1343,16 +1403,17 @@ class SBusFPGABus(Module): ) slave_fsm.act("Master_Read_Ack", #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x0b), self.led_display.value[8:40])), - If(master_data_src_fromsbus_fifo, - Case(burst_counter, { - 0: NextValue(fifo_buffer[0:32], SBUS_3V3_D_i), - 1: NextValue(fifo_buffer[32:64], SBUS_3V3_D_i), - 2: NextValue(fifo_buffer[64:96], SBUS_3V3_D_i), - 3: NextValue(fifo_buffer[96:128], SBUS_3V3_D_i), - 4: NextValue(fifo_buffer[128:160], SBUS_3V3_D_i), - 5: NextValue(fifo_buffer[160:192], SBUS_3V3_D_i), - 6: NextValue(fifo_buffer[192:224], SBUS_3V3_D_i), - 7: NextValue(fifo_buffer[224:256], SBUS_3V3_D_i), + Case(master_src, { + MASTER_SRC_BLKDMAFIFO: + [Case(burst_counter, { + 0: NextValue(fifo_buffer[0:32], SBUS_3V3_D_i), + 1: NextValue(fifo_buffer[32:64], SBUS_3V3_D_i), + 2: NextValue(fifo_buffer[64:96], SBUS_3V3_D_i), + 3: NextValue(fifo_buffer[96:128], SBUS_3V3_D_i), + 4: NextValue(fifo_buffer[128:160], SBUS_3V3_D_i), + 5: NextValue(fifo_buffer[160:192], SBUS_3V3_D_i), + 6: NextValue(fifo_buffer[192:224], SBUS_3V3_D_i), + 7: NextValue(fifo_buffer[224:256], SBUS_3V3_D_i), # 8: NextValue(fifo_buffer[256:288], SBUS_3V3_D_i), # 9: NextValue(fifo_buffer[288:320], SBUS_3V3_D_i), # 10: NextValue(fifo_buffer[320:352], SBUS_3V3_D_i), @@ -1361,16 +1422,20 @@ class SBusFPGABus(Module): # 13: NextValue(fifo_buffer[416:448], SBUS_3V3_D_i), # 14: NextValue(fifo_buffer[448:480], SBUS_3V3_D_i), # 15: NextValue(fifo_buffer[480:512], SBUS_3V3_D_i), - }), - ).Else( - NextValue(self.master_read_buffer_data[burst_counter[0:2]], SBUS_3V3_D_i), - NextValue(self.master_read_buffer_done[burst_counter[0:2]], 1), - ), + }), + ], + MASTER_SRC_WISHBONEBUF: + [NextValue(self.master_read_buffer_data[burst_counter[0:2]], SBUS_3V3_D_i), + NextValue(self.master_read_buffer_done[burst_counter[0:2]], 1), + ], + }), NextValue(burst_counter, burst_counter + 1), If(burst_counter == burst_limit_m1, - If(~master_data_src_fromsbus_fifo, - NextValue(self.master_read_buffer_start, 0), - ), + Case(master_src, { + MASTER_SRC_WISHBONEBUF: + [NextValue(self.master_read_buffer_start, 0), + ], + }), NextState("Master_Read_Finish") ).Else( Case(SBUS_3V3_ACKs_i, { @@ -1403,16 +1468,18 @@ class SBusFPGABus(Module): ) slave_fsm.act("Master_Read_Finish", ## missing the handling of late error #NextValue(self.led_display.value, Cat(Signal(8, reset = 0x0c), self.led_display.value[8:40])), - If(master_data_src_fromsbus_fifo, - fromsbus_fifo.we.eq(1), - fromsbus_fifo.din.eq(Cat(fifo_blk_addr, fifo_buffer)), - NextValue(master_data_src_fromsbus_fifo, 0), - ), + Case(master_src, { + MASTER_SRC_BLKDMAFIFO: + [fromsbus_fifo.we.eq(1), + fromsbus_fifo.din.eq(Cat(fifo_blk_addr, fifo_buffer)), + ], + }), NextValue(sbus_oe_data, 0), NextValue(sbus_oe_slave_in, 0), NextValue(sbus_oe_master_in, 0), NextValue(sbus_master_throttle, sbus_default_master_throttle), NextValue(stat_master_done_counter, stat_master_done_counter + 1), + NextValue(master_src_retry, 0), NextState("Idle") ) slave_fsm.act("Master_Write", @@ -1421,20 +1488,18 @@ class SBusFPGABus(Module): ACK_WORD: # FIXME: check againt master_size ? [If(burst_counter == burst_limit_m1, NextState("Master_Write_Final"), - If(master_data_src_tosbus_fifo, - NextValue(master_data_src_tosbus_fifo, 0), - ) ).Else( NextValue(SBUS_3V3_D_o, master_data), NextValue(burst_counter, burst_counter + 1), - If(master_data_src_tosbus_fifo, - Case(burst_counter, { #0:32 just ack'd, 32:64 is on the bus now, burst_counter will only increment for the next cycle, so we're two steps ahead - 0: NextValue(master_data, fifo_buffer[64:96]), - 1: NextValue(master_data, fifo_buffer[96:128]), - 2: NextValue(master_data, fifo_buffer[128:160]), - 3: NextValue(master_data, fifo_buffer[160:192]), - 4: NextValue(master_data, fifo_buffer[192:224]), - 5: NextValue(master_data, fifo_buffer[224:256]), + Case(master_src, { + MASTER_SRC_BLKDMAFIFO: + [Case(burst_counter, { #0:32 just ack'd, 32:64 is on the bus now, burst_counter will only increment for the next cycle, so we're two steps ahead + 0: NextValue(master_data, fifo_buffer[64:96]), + 1: NextValue(master_data, fifo_buffer[96:128]), + 2: NextValue(master_data, fifo_buffer[128:160]), + 3: NextValue(master_data, fifo_buffer[160:192]), + 4: NextValue(master_data, fifo_buffer[192:224]), + 5: NextValue(master_data, fifo_buffer[224:256]), # 6: NextValue(master_data, fifo_buffer[256:288]), # 7: NextValue(master_data, fifo_buffer[288:320]), # 8: NextValue(master_data, fifo_buffer[320:352]), @@ -1446,8 +1511,9 @@ class SBusFPGABus(Module): #14: NextValue(master_data, fifo_buffer[512:544]), #15: NextValue(master_data, fifo_buffer[544:576]), "default": NextValue(master_data, 0), - }) - ), + }) + ], + }), )], ACK_BYTE: # FIXME: check againt master_size ? [NextState("Master_Write_Final"), @@ -1489,6 +1555,7 @@ class SBusFPGABus(Module): NextValue(sbus_oe_master_in, 0), NextValue(sbus_master_throttle, sbus_default_master_throttle), NextValue(stat_master_done_counter, stat_master_done_counter + 1), + NextValue(master_src_retry, 0), NextState("Idle") ) # ##### FINISHED ##### From eede097217be7abf595c3a4e92b473af65529f59 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 22 Aug 2021 10:27:28 -0400 Subject: [PATCH 63/78] CSRs don't support CDC... move Engine to sysclk ; also forgot to commit fsmstat --- sbus-to-ztex-gateware-migen/engine.py | 3 +- .../sbus_to_fpga_fsmstat.py | 59 +++++++++++++++++ .../sbus_to_fpga_soc.py | 66 +++++++++++-------- 3 files changed, 98 insertions(+), 30 deletions(-) create mode 100644 sbus-to-ztex-gateware-migen/sbus_to_fpga_fsmstat.py diff --git a/sbus-to-ztex-gateware-migen/engine.py b/sbus-to-ztex-gateware-migen/engine.py index 43c0626..72a180f 100644 --- a/sbus-to-ztex-gateware-migen/engine.py +++ b/sbus-to-ztex-gateware-migen/engine.py @@ -1934,7 +1934,8 @@ Here are the currently implemented opcodes for The Engine: ##### TIMING CONSTRAINTS -- you want these. Trust me. clk50 = "clk50" - clk100 = "clk100" + #clk100 = "clk100" + clk100 = "sysclk" clk200 = "clk200" # registered exec units need this set of rules ### clk200->clk50 multi-cycle paths: diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsmstat.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsmstat.py new file mode 100644 index 0000000..6ae0230 --- /dev/null +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_fsmstat.py @@ -0,0 +1,59 @@ +from migen import * +from migen.genlib.cdc import BusSynchronizer +from litex.soc.interconnect.csr import * +from litex.soc.interconnect import wishbone + +class SBusFPGABusStat(Module, AutoCSR): + def __init__(self, sbus_bus): + self.stat_ctrl = CSRStorage(fields = [CSRField("update", 1, description = "update")]) + self.submodules.sync_update = BusSynchronizer(width = 1, idomain="sys", odomain="sbus") + self.comb += self.sync_update.i.eq(self.stat_ctrl.fields.update) + self.comb += sbus_bus.stat_update.eq(self.sync_update.o) + + self.live_stat_cycle_counter = CSRStatus(32, description="live_stat_cycle_counter") + self.stat_cycle_counter = CSRStatus(32, description="stat_cycle_counter") + self.stat_slave_start_counter = CSRStatus(32, description="stat_slave_start_counter") + self.stat_slave_done_counter = CSRStatus(32, description="stat_slave_done_counter") + self.stat_slave_rerun_counter = CSRStatus(32, description="stat_slave_rerun_counter") + self.stat_slave_early_error_counter = CSRStatus(32, description="stat_slave_early_error_counter") + self.stat_master_start_counter = CSRStatus(32, description="stat_master_start_counter") + self.stat_master_done_counter = CSRStatus(32, description="stat_master_done_counter") + self.stat_master_error_counter = CSRStatus(32, description="stat_master_error_counter") + self.stat_master_rerun_counter = CSRStatus(32, description="stat_master_rerun_counter") + self.sbus_master_error_virtual = CSRStatus(32, description="sbus_master_error_virtual") + + self.submodules.sync_live_stat_cycle_counter = BusSynchronizer(width = 32, idomain="sbus", odomain="sys") + self.comb += self.sync_live_stat_cycle_counter.i.eq(sbus_bus.stat_cycle_counter) + self.comb += self.live_stat_cycle_counter.status.eq(self.sync_live_stat_cycle_counter.o) + + self.submodules.sync_stat_cycle_counter = BusSynchronizer(width = 32, idomain="sbus", odomain="sys") + self.comb += self.sync_stat_cycle_counter.i.eq(sbus_bus.buf_stat_cycle_counter) + self.comb += self.stat_cycle_counter.status.eq(self.sync_stat_cycle_counter.o) + + self.submodules.sync_stat_slave_start_counter = BusSynchronizer(width = 32, idomain="sbus", odomain="sys"); + self.comb += self.sync_stat_slave_start_counter.i.eq(sbus_bus.buf_stat_slave_start_counter) + self.comb += self.stat_slave_start_counter.status.eq(self.sync_stat_slave_start_counter.o) + self.submodules.sync_stat_slave_done_counter = BusSynchronizer(width = 32, idomain="sbus", odomain="sys"); + self.comb += self.sync_stat_slave_done_counter.i.eq(sbus_bus.buf_stat_slave_done_counter) + self.comb += self.stat_slave_done_counter.status.eq(self.sync_stat_slave_done_counter.o) + self.submodules.sync_stat_slave_rerun_counter = BusSynchronizer(width = 32, idomain="sbus", odomain="sys"); + self.comb += self.sync_stat_slave_rerun_counter.i.eq(sbus_bus.buf_stat_slave_rerun_counter) + self.comb += self.stat_slave_rerun_counter.status.eq(self.sync_stat_slave_rerun_counter.o) + self.submodules.sync_stat_slave_early_error_counter = BusSynchronizer(width = 32, idomain="sbus", odomain="sys"); + self.comb += self.sync_stat_slave_early_error_counter.i.eq(sbus_bus.buf_stat_slave_early_error_counter) + self.comb += self.stat_slave_early_error_counter.status.eq(self.sync_stat_slave_early_error_counter.o) + self.submodules.sync_stat_master_start_counter = BusSynchronizer(width = 32, idomain="sbus", odomain="sys"); + self.comb += self.sync_stat_master_start_counter.i.eq(sbus_bus.buf_stat_master_start_counter) + self.comb += self.stat_master_start_counter.status.eq(self.sync_stat_master_start_counter.o) + self.submodules.sync_stat_master_done_counter = BusSynchronizer(width = 32, idomain="sbus", odomain="sys"); + self.comb += self.sync_stat_master_done_counter.i.eq(sbus_bus.buf_stat_master_done_counter) + self.comb += self.stat_master_done_counter.status.eq(self.sync_stat_master_done_counter.o) + self.submodules.sync_stat_master_error_counter = BusSynchronizer(width = 32, idomain="sbus", odomain="sys"); + self.comb += self.sync_stat_master_error_counter.i.eq(sbus_bus.buf_stat_master_error_counter) + self.comb += self.stat_master_error_counter.status.eq(self.sync_stat_master_error_counter.o) + self.submodules.sync_stat_master_rerun_counter = BusSynchronizer(width = 32, idomain="sbus", odomain="sys"); + self.comb += self.sync_stat_master_rerun_counter.i.eq(sbus_bus.buf_stat_master_rerun_counter) + self.comb += self.stat_master_rerun_counter.status.eq(self.sync_stat_master_rerun_counter.o) + self.submodules.sync_sbus_master_error_virtual = BusSynchronizer(width = 32, idomain="sbus", odomain="sys"); + self.comb += self.sync_sbus_master_error_virtual.i.eq(sbus_bus.buf_sbus_master_error_virtual) + self.comb += self.sbus_master_error_virtual.status.eq(self.sync_sbus_master_error_virtual.o) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 777ae4d..ab48314 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -43,7 +43,7 @@ class _CRG(Module): # self.clock_domains.cd_por = ClockDomain() # 48 MHz native, reset'ed by SBus, power-on-reset timer self.clock_domains.cd_usb = ClockDomain() # 48 MHZ PLL, reset'ed by SBus (via pll), for USB controller self.clock_domains.cd_clk50 = ClockDomain() # 50 MHz (gated) for curve25519engine -> eng_clk - self.clock_domains.cd_clk100 = ClockDomain() # 100 MHz for curve25519engine -> sys_clk + #self.clock_domains.cd_clk100 = ClockDomain() # 100 MHz for curve25519engine -> sys_clk self.clock_domains.cd_clk100_gated = ClockDomain() # 100 MHz (gated) for curve25519engine -> mul_clk self.clock_domains.cd_clk200 = ClockDomain() # 200 MHz (gated) for curve25519engine -> rf_clk @@ -67,11 +67,13 @@ class _CRG(Module): self.comb += self.cd_sbus.rst.eq(~rst_sbus) ##self.cd_sys.clk = clk_sbus ##self.comb += self.cd_sys.rst.eq(~rst_sbus) + + self.curve25519_on = Signal() self.submodules.pll = pll = S7MMCM(speedgrade=-1) #pll.register_clkin(clk48, 48e6) pll.register_clkin(self.clk48_bufg, 48e6) - pll.create_clkout(self.cd_sys, sys_clk_freq) + pll.create_clkout(self.cd_sys, sys_clk_freq, gated_replicas={self.cd_clk100_gated : pll.locked & self.curve25519_on}) platform.add_platform_command("create_generated_clock -name sysclk [get_pins {{MMCME2_ADV/CLKOUT0}}]") pll.create_clkout(self.cd_sys4x, 4*sys_clk_freq) platform.add_platform_command("create_generated_clock -name sys4xclk [get_pins {{MMCME2_ADV/CLKOUT1}}]") @@ -84,25 +86,30 @@ class _CRG(Module): #platform.add_false_path_constraints(self.cd_sbus.clk, self.cd_sys.clk) ##platform.add_false_path_constraints(self.cd_native.clk, self.cd_sys.clk) - self.submodules.curve25519_pll = curve25519_pll = S7MMCM(speedgrade=-1) - curve25519_clk_freq = 90e6 - self.curve25519_on = Signal() - #curve25519_pll.register_clkin(clk48, 48e6) - curve25519_pll.register_clkin(self.clk48_bufg, 48e6) - curve25519_pll.create_clkout(self.cd_clk50, curve25519_clk_freq/2, margin=0, ce=curve25519_pll.locked & self.curve25519_on) - platform.add_platform_command("create_generated_clock -name clk50 [get_pins {{MMCME2_ADV_1/CLKOUT0}}]") - curve25519_pll.create_clkout(self.cd_clk100, curve25519_clk_freq, margin=0, ce=curve25519_pll.locked, - gated_replicas={self.cd_clk100_gated : curve25519_pll.locked & self.curve25519_on}) - platform.add_platform_command("create_generated_clock -name clk100 [get_pins {{MMCME2_ADV_1/CLKOUT1}}]") - curve25519_pll.create_clkout(self.cd_clk200, curve25519_clk_freq*2, margin=0, ce=curve25519_pll.locked & self.curve25519_on) - platform.add_platform_command("create_generated_clock -name clk200 [get_pins {{MMCME2_ADV_1/CLKOUT2}}]") - #self.comb += curve25519_pll.reset.eq(~rst_sbus) # | ~por_done - platform.add_false_path_constraints(self.cd_sys.clk, self.cd_clk50.clk) - platform.add_false_path_constraints(self.cd_sys.clk, self.cd_clk100.clk) - platform.add_false_path_constraints(self.cd_sys.clk, self.cd_clk200.clk) - platform.add_false_path_constraints(self.cd_clk50.clk, self.cd_sys.clk) - platform.add_false_path_constraints(self.cd_clk100.clk, self.cd_sys.clk) - platform.add_false_path_constraints(self.cd_clk200.clk, self.cd_sys.clk) + pll.create_clkout(self.cd_clk50, sys_clk_freq/2, ce=pll.locked & self.curve25519_on) + platform.add_platform_command("create_generated_clock -name clk50 [get_pins {{MMCME2_ADV/CLKOUT3}}]") + pll.create_clkout(self.cd_clk200, sys_clk_freq*2, ce=pll.locked & self.curve25519_on) + platform.add_platform_command("create_generated_clock -name clk200 [get_pins {{MMCME2_ADV/CLKOUT4}}]") + + #self.submodules.curve25519_pll = curve25519_pll = S7MMCM(speedgrade=-1) + #curve25519_clk_freq = 90e6 + ##self.curve25519_on = Signal() + ##curve25519_pll.register_clkin(clk48, 48e6) + #curve25519_pll.register_clkin(self.clk48_bufg, 48e6) + #curve25519_pll.create_clkout(self.cd_clk50, curve25519_clk_freq/2, margin=0, ce=curve25519_pll.locked & self.curve25519_on) + #platform.add_platform_command("create_generated_clock -name clk50 [get_pins {{MMCME2_ADV_1/CLKOUT0}}]") + #curve25519_pll.create_clkout(self.cd_clk100, curve25519_clk_freq, margin=0, ce=curve25519_pll.locked, + # gated_replicas={self.cd_clk100_gated : curve25519_pll.locked & self.curve25519_on}) + #platform.add_platform_command("create_generated_clock -name clk100 [get_pins {{MMCME2_ADV_1/CLKOUT1}}]") + #curve25519_pll.create_clkout(self.cd_clk200, curve25519_clk_freq*2, margin=0, ce=curve25519_pll.locked & self.curve25519_on) + #platform.add_platform_command("create_generated_clock -name clk200 [get_pins {{MMCME2_ADV_1/CLKOUT2}}]") + ##self.comb += curve25519_pll.reset.eq(~rst_sbus) # | ~por_done + #platform.add_false_path_constraints(self.cd_sys.clk, self.cd_clk50.clk) + #platform.add_false_path_constraints(self.cd_sys.clk, self.cd_clk100.clk) + #platform.add_false_path_constraints(self.cd_sys.clk, self.cd_clk200.clk) + #platform.add_false_path_constraints(self.cd_clk50.clk, self.cd_sys.clk) + #platform.add_false_path_constraints(self.cd_clk100.clk, self.cd_sys.clk) + #platform.add_false_path_constraints(self.cd_clk200.clk, self.cd_sys.clk) # Power on reset, reset propagate from SBus to SYS # por_count = Signal(16, reset=2**16-1) @@ -283,14 +290,15 @@ class SBusFPGA(SoCCore): # beware the naming, as 'clk50' 'sysclk' 'clk200' are used in the original platform constraints # the local engine.py was slightly modified to have configurable names, so we can have 'clk50', 'clk100', 'clk200' # Beware that Engine implicitely runs in 'sys' by default, need to rename that one as well - self.submodules.curve25519engine = ClockDomainsRenamer({"eng_clk":"clk50", "rf_clk":"clk200", "mul_clk":"clk100_gated", "sys":"clk100"})(Engine(platform=platform,prefix=self.mem_map.get("curve25519engine", None))) - self.submodules.curve25519engine_wishbone_cdc = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=self.curve25519engine.bus, cd_master="sys", cd_slave="clk100") - self.bus.add_slave("curve25519engine", self.curve25519engine_wishbone_cdc, SoCRegion(origin=self.mem_map.get("curve25519engine", None), size=0x20000, cached=False)) - #self.bus.add_slave("curve25519engine", self.curve25519engine.bus, SoCRegion(origin=self.mem_map.get("curve25519engine", None), size=0x20000, cached=False)) - self.submodules.curve25519_on_sync = BusSynchronizer(width = 1, idomain = "clk100", odomain = "sys") - self.comb += self.curve25519_on_sync.i.eq(self.curve25519engine.power.fields.on) - self.comb += self.crg.curve25519_on.eq(self.curve25519_on_sync.o) - + self.submodules.curve25519engine = ClockDomainsRenamer({"eng_clk":"clk50", "rf_clk":"clk200", "mul_clk":"clk100_gated"})(Engine(platform=platform,prefix=self.mem_map.get("curve25519engine", None))) # , "sys":"clk100" + #self.submodules.curve25519engine_wishbone_cdc = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=self.curve25519engine.bus, cd_master="sys", cd_slave="clk100") + #self.bus.add_slave("curve25519engine", self.curve25519engine_wishbone_cdc, SoCRegion(origin=self.mem_map.get("curve25519engine", None), size=0x20000, cached=False)) + self.bus.add_slave("curve25519engine", self.curve25519engine.bus, SoCRegion(origin=self.mem_map.get("curve25519engine", None), size=0x20000, cached=False)) + #self.submodules.curve25519_on_sync = BusSynchronizer(width = 1, idomain = "clk100", odomain = "sys") + #self.comb += self.curve25519_on_sync.i.eq(self.curve25519engine.power.fields.on) + #self.comb += self.crg.curve25519_on.eq(self.curve25519_on_sync.o) + self.comb += self.crg.curve25519_on.eq(self.curve25519engine.power.fields.on) + def main(): parser = argparse.ArgumentParser(description="SbusFPGA") parser.add_argument("--build", action="store_true", help="Build bitstream") From 170c540cf74a975ef1031ae5cce263c8d48f13e1 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Mon, 23 Aug 2021 04:48:05 -0400 Subject: [PATCH 64/78] ioctl on/off switch on stats + ctrl pgm --- .../9.0/usr/src/sys/dev/sbus/sbusfpga_stat.c | 40 +++++++++++++- .../9.0/usr/src/sys/dev/sbus/sbusfpga_stat.h | 1 + .../sbusfpga_stat_ctl.c | 55 +++++++++++++++++++ 3 files changed, 93 insertions(+), 3 deletions(-) create mode 100644 sbus-to-ztex-gateware-migen/sbusfpga_stat_ctl.c diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.c index 00be73e..c63dd02 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.c @@ -58,7 +58,7 @@ CFATTACH_DECL_NEW(sbusfpga_stat, sizeof(struct sbusfpga_sbus_bus_stat_softc), dev_type_open(sbusfpga_stat_open); dev_type_close(sbusfpga_stat_close); - +dev_type_ioctl(sbusfpga_stat_ioctl); const struct cdevsw sbusfpga_stat_cdevsw = { @@ -66,7 +66,7 @@ const struct cdevsw sbusfpga_stat_cdevsw = { .d_close = sbusfpga_stat_close, .d_read = noread, .d_write = nowrite, - .d_ioctl = noioctl, + .d_ioctl = sbusfpga_stat_ioctl, .d_stop = nostop, .d_tty = notty, .d_poll = nopoll, @@ -187,9 +187,42 @@ sbusfpga_stat_attach(device_t parent, device_t self, void *aux) callout_init(&sc->sc_display, CALLOUT_MPSAFE); callout_setfunc(&sc->sc_display, sbusfpga_stat_display, sc); + /* disable by default */ + sc->sc_enable = 0; + /* do it once during boot*/ callout_schedule(&sc->sc_display, sc->sc_delay); } +#define SBUSFPGA_STAT_ON _IO(0, 1) +#define SBUSFPGA_STAT_OFF _IO(0, 0) + +int +sbusfpga_stat_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) +{ + struct sbusfpga_sbus_bus_stat_softc *sc = device_lookup_private(&sbusfpga_stat_cd, minor(dev)); + int err = 0; + + switch (cmd) { + case SBUSFPGA_STAT_ON: + if (!sc->sc_enable) { + sc->sc_enable = 1; + callout_schedule(&sc->sc_display, sc->sc_delay); + } + break; + case SBUSFPGA_STAT_OFF: + if (sc->sc_enable) { + callout_stop(&sc->sc_display); + sc->sc_enable = 0; + } + break; + default: + err = ENOTTY; + break; + } + + return err; +} + static void sbusfpga_stat_display(void *args) { struct sbusfpga_sbus_bus_stat_softc *sc = args; unsigned int c = sbus_bus_stat_stat_cycle_counter_read(sc), c2; @@ -219,5 +252,6 @@ static void sbusfpga_stat_display(void *args) { sbus_bus_stat_sbus_master_error_virtual_read(sc)); } sbus_bus_stat_stat_ctrl_write(sc, 0); - callout_schedule(&sc->sc_display, sc->sc_delay); + if (sc->sc_enable) + callout_schedule(&sc->sc_display, sc->sc_delay); } diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.h b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.h index 2ab4c48..1a6699f 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.h +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_stat.h @@ -39,6 +39,7 @@ struct sbusfpga_sbus_bus_stat_softc { int sc_bufsiz; /* Size of buffer */ callout_t sc_display; int sc_delay; + int sc_enable; }; #endif /* _SBUSFPGA_STAT_H_ */ diff --git a/sbus-to-ztex-gateware-migen/sbusfpga_stat_ctl.c b/sbus-to-ztex-gateware-migen/sbusfpga_stat_ctl.c new file mode 100644 index 0000000..fe9d08b --- /dev/null +++ b/sbus-to-ztex-gateware-migen/sbusfpga_stat_ctl.c @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SBUSFPGA_STAT_ON _IO(0, 1) +#define SBUSFPGA_STAT_OFF _IO(0, 0) + +int main(int argc, char **argv) { + const char const * device = "/dev/sbusfpga_stat0"; + int devfd; + int onoff; + + if (argc != 2) { + fprintf(stderr, "Usage: %s on|off\n", argv[0]); + return -1; + } + + if (strncmp("on", argv[1], 2) == 0) { + onoff = 1; + } else if (strncmp("off", argv[1], 3) == 0) { + onoff = 0; + } else { + fprintf(stderr, "Usage: %s on|off\n", argv[0]); + return -1; + } + + if ( (devfd = open(device, O_RDWR)) == -1) { + perror("can't open device file"); + return -1; + } + + switch (onoff) { + case 0: + if (ioctl(devfd, SBUSFPGA_STAT_OFF, NULL)) { + perror("Turning statistics off failed."); + close(devfd); + return -1; + } + break; + case 1: + if (ioctl(devfd, SBUSFPGA_STAT_ON, NULL)) { + perror("Turning statistics on failed."); + close(devfd); + return -1; + } + break; + } + + return 0; +} From d2218c6981b86fb2e015d2da63de896268692e20 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Wed, 25 Aug 2021 09:44:28 -0400 Subject: [PATCH 65/78] commit the experimental GCM/AES stuff in the engine --- .../sys/dev/sbus/sbusfpga_curve25519engine.c | 215 ++++++++++++------ sbus-to-ztex-gateware-migen/engine.py | 207 ++++++++++++++++- .../engine_code/Cargo.toml | 10 +- .../engine_code/engine_code.rs | 92 +++++++- 4 files changed, 445 insertions(+), 79 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c index 2e0e000..514464f 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c @@ -87,7 +87,7 @@ struct sbusfpga_curve25519engine_montgomeryjob { uint32_t scalar[8]; }; -static int init_program(struct sbusfpga_curve25519engine_softc *sc); +static int init_programs(struct sbusfpga_curve25519engine_softc *sc); static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusfpga_curve25519engine_montgomeryjob *job, const int window); static int start_job(struct sbusfpga_curve25519engine_softc *sc); static int wait_job(struct sbusfpga_curve25519engine_softc *sc); @@ -96,47 +96,6 @@ static int read_outputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf static int power_on(struct sbusfpga_curve25519engine_softc *sc); static int power_off(struct sbusfpga_curve25519engine_softc *sc); -#define SBUSFPGA_DO_MONTGOMERYJOB _IOWR(0, 0, struct sbusfpga_curve25519engine_montgomeryjob) - -int -sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) -{ - struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, minor(dev)); - struct sbusfpga_curve25519engine_montgomeryjob* job = (struct sbusfpga_curve25519engine_montgomeryjob*)data; - int err = 0; - - if (!sc->initialized) { - if (init_program(sc)) { - return ENXIO; - } else { - sc->initialized = 1; - } - } - switch (cmd) { - case SBUSFPGA_DO_MONTGOMERYJOB: { - err = write_inputs(sc, job, 0); - if (err) - return err; - err = start_job(sc); - if (err) - return err; - delay(1); - err = wait_job(sc); - if (err) - return err; - err = read_outputs(sc, job, 0); - if (err) - return err; - } - break; - default: - err = EINVAL; - break; - } - - return(err); -} - int sbusfpga_curve25519engine_open(dev_t dev, int flags, int mode, struct lwp *l) { @@ -172,8 +131,14 @@ sbusfpga_curve25519engine_match(device_t parent, cfdata_t cf, void *aux) return (strcmp("betrustedc25519e", sa->sa_name) == 0); } -static const uint32_t program[192] = {0x00640840, 0x00680800, 0x006c0600, 0x00700840, 0x004c0a80, 0x00480800, 0x007407cc, 0x007c07cb, 0x0049d483, 0x0079b643, 0x0079e482, 0x00659783, 0x006db783, 0x0079c683, 0x0079e482, 0x0069a783, 0x0071c783, 0x00480740, 0x0001a645, 0x00780008, 0x0001e006, 0x0069a8c6, 0x0005a645, 0x00780048, 0x0005e046, 0x0009c6c5, 0x00780088, 0x0009e086, 0x0071c8c6, 0x000dc6c5, 0x007800c8, 0x000de0c6, 0x00100007, 0x00141047, 0x007458c6, 0x0019d105, 0x00780188, 0x0019e186, 0x001c3007, 0x00202047, 0x002481c5, 0x00780248, 0x0025e246, 0x007488c6, 0x0029d1c5, 0x00780288, 0x0029e286, 0x006c9247, 0x0030a287, 0x00346907, 0x00645107, 0x003c5345, 0x007803c8, 0x003de3c6, 0x0068f187, 0x0070c607, 0x010004c9, 0x004e14c6, 0xe5800809, 0x0079b643, 0x0079e482, 0x00659783, 0x006db783, 0x0079c683, 0x0079e482, 0x0069a783, 0x0071c783, 0x00740640, 0x00780680, 0x0001e787, 0x00040007, 0x00041047, 0x00081787, 0x000c2007, 0x001030c7, 0x00144087, 0x00700940, 0x00185147, 0x00721706, 0x01000709, 0x00186187, 0xfe000809, 0x001c5187, 0x00700980, 0x002071c7, 0x00721706, 0x01000709, 0x00208207, 0xfe000809, 0x00247207, 0x007009c0, 0x00289247, 0x00721706, 0x01000709, 0x0028a287, 0xfe000809, 0x002c9287, 0x00700980, 0x0030b2c7, 0x00721706, 0x01000709, 0x0030c307, 0xfe000809, 0x00347307, 0x00700a00, 0x0038d347, 0x00721706, 0x01000709, 0x0038e387, 0xfe000809, 0x003cd387, 0x00700a40, 0x0040f3c7, 0x00721706, 0x01000709, 0x00410407, 0xfe000809, 0x0044f407, 0x00700a00, 0x00491447, 0x00721706, 0x01000709, 0x00492487, 0xfe000809, 0x004cd487, 0x00700940, 0x005134c7, 0x00721706, 0x01000709, 0x00514507, 0xfe000809, 0x00543507, 0x007d5747, 0x0000000a, 0x0000000a, 0x0000000a}; -static const uint32_t program_len = 134; +static const uint32_t program_ec25519[134] = {0x00640840, 0x00680800, 0x006c0600, 0x00700840, 0x004c0a80, 0x00480800, 0x007407cc, 0x007c07cb, 0x0049d483, 0x0079b643, 0x0079e482, 0x00659783, 0x006db783, 0x0079c683, 0x0079e482, 0x0069a783, 0x0071c783, 0x00480740, 0x0001a645, 0x00780008, 0x0001e006, 0x0069a8c6, 0x0005a645, 0x00780048, 0x0005e046, 0x0009c6c5, 0x00780088, 0x0009e086, 0x0071c8c6, 0x000dc6c5, 0x007800c8, 0x000de0c6, 0x00100007, 0x00141047, 0x007458c6, 0x0019d105, 0x00780188, 0x0019e186, 0x001c3007, 0x00202047, 0x002481c5, 0x00780248, 0x0025e246, 0x007488c6, 0x0029d1c5, 0x00780288, 0x0029e286, 0x006c9247, 0x0030a287, 0x00346907, 0x00645107, 0x003c5345, 0x007803c8, 0x003de3c6, 0x0068f187, 0x0070c607, 0x010004c9, 0x004e14c6, 0xe5800809, 0x0079b643, 0x0079e482, 0x00659783, 0x006db783, 0x0079c683, 0x0079e482, 0x0069a783, 0x0071c783, 0x00740640, 0x00780680, 0x0001e787, 0x00040007, 0x00041047, 0x00081787, 0x000c2007, 0x001030c7, 0x00144087, 0x00700940, 0x00185147, 0x00721706, 0x01000709, 0x00186187, 0xfe000809, 0x001c5187, 0x00700980, 0x002071c7, 0x00721706, 0x01000709, 0x00208207, 0xfe000809, 0x00247207, 0x007009c0, 0x00289247, 0x00721706, 0x01000709, 0x0028a287, 0xfe000809, 0x002c9287, 0x00700980, 0x0030b2c7, 0x00721706, 0x01000709, 0x0030c307, 0xfe000809, 0x00347307, 0x00700a00, 0x0038d347, 0x00721706, 0x01000709, 0x0038e387, 0xfe000809, 0x003cd387, 0x00700a40, 0x0040f3c7, 0x00721706, 0x01000709, 0x00410407, 0xfe000809, 0x0044f407, 0x00700a00, 0x00491447, 0x00721706, 0x01000709, 0x00492487, 0xfe000809, 0x004cd487, 0x00700940, 0x005134c7, 0x00721706, 0x01000709, 0x00514507, 0xfe000809, 0x00543507, 0x007d5747, 0x0000000a }; + +static const uint32_t program_gcm[20] = {0x0010100d, 0x0094100d, 0x0118100d, 0x019c100d, 0x00186143, 0x00160191, 0x00186811, 0x001c61c3, 0x00105103, 0x008441ce, 0x0082010e, 0x00080010, 0x008e008f, 0x0112008f, 0x0396008f, 0x00083083, 0x00105103, 0x00084083, 0x00001083, 0x0000000a }; +static const uint32_t program_aes[21] = {0x00000052, 0x00800052, 0x01000052, 0x01800052, 0x0000000a }; + +static const uint32_t* programs[4] = { program_ec25519, program_gcm, program_aes, NULL }; +static const uint32_t program_len[4] = { 134, 20, 5, 0 }; +static uint32_t program_offset[4]; /* * Attach all the sub-devices we can find @@ -265,8 +230,8 @@ sbusfpga_curve25519engine_attach(device_t parent, device_t self, void *aux) /* first we need to turn the engine power on ... */ power_on(sc); - if (init_program(sc)) { - if (init_program(sc)) { + if (init_programs(sc)) { + if (init_programs(sc)) { aprint_normal_dev(sc->sc_dev, "INIT - FAILED\n"); sc->initialized = 0; } else { @@ -307,6 +272,120 @@ sbusfpga_curve25519engine_attach(device_t parent, device_t self, void *aux) #undef CSR_SDPHY_BASE #undef CSR_TRNG_BASE +#define REG_BASE(reg) (base + (reg * 32)) +#define SUBREG_ADDR(reg, off) (REG_BASE(reg) + (off)*4) + +#define SBUSFPGA_DO_MONTGOMERYJOB _IOWR(0, 0, struct sbusfpga_curve25519engine_montgomeryjob) +#define SBUSFPGA_EC25519_CHECKGCM _IOW(0, 1, struct sbusfpga_curve25519engine_montgomeryjob) +#define SBUSFPGA_EC25519_CHECKAES _IOW(0, 2, struct sbusfpga_curve25519engine_montgomeryjob) + +int +sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) +{ + struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, minor(dev)); + int err = 0; + + if (!sc->initialized) { + if (init_programs(sc)) { + return ENXIO; + } else { + sc->initialized = 1; + } + } + switch (cmd) { + case SBUSFPGA_DO_MONTGOMERYJOB: { + struct sbusfpga_curve25519engine_montgomeryjob* job = (struct sbusfpga_curve25519engine_montgomeryjob*)data; + curve25519engine_mpstart_write(sc, program_offset[0]); /* EC25519 */ + curve25519engine_mplen_write(sc, program_len[0]); /* EC25519 */ + + err = write_inputs(sc, job, 0); + if (err) + return err; + err = start_job(sc); + if (err) + return err; + delay(1); + err = wait_job(sc); + if (err) + return err; + err = read_outputs(sc, job, 0); + if (err) + return err; + } + break; + case SBUSFPGA_EC25519_CHECKGCM: { + const uint32_t base = 0; + struct sbusfpga_curve25519engine_montgomeryjob* job = (struct sbusfpga_curve25519engine_montgomeryjob*)data; + int reg, i; + + curve25519engine_mpstart_write(sc, program_offset[1]); /* GCM */ + curve25519engine_mplen_write(sc, program_len[1]); /* GCM */ + for (i = 0 ; i < 8 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(0,i), job->affine_u[i]); + } + for (i = 0 ; i < 8 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(1,i), job->scalar[i]); + } + + err = start_job(sc); + if (err) + return err; + delay(1); + err = wait_job(sc); + /* if (err) */ + /* return err; */ + + for (reg = 0 ; reg < 32 ; reg++) { + uint32_t buf[8]; + for (i = 0 ; i < 8 ; i ++) { + buf[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i)); + } + device_printf(sc->sc_dev, "GCM %d: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n", reg, + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); + } + } + break; + case SBUSFPGA_EC25519_CHECKAES: { + const uint32_t base = 0; + struct sbusfpga_curve25519engine_montgomeryjob* job = (struct sbusfpga_curve25519engine_montgomeryjob*)data; + int reg, i; + + curve25519engine_mpstart_write(sc, program_offset[2]); /* AES */ + curve25519engine_mplen_write(sc, program_len[2]); /* AES */ + for (i = 0 ; i < 8 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(0,i), job->affine_u[i]); + } + for (i = 0 ; i < 8 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(1,i), job->scalar[i]); + } + + err = start_job(sc); + if (err) + return err; + delay(1); + err = wait_job(sc); + /* if (err) */ + /* return err; */ + + for (reg = 0 ; reg < 32 ; reg++) { + uint32_t buf[8]; + for (i = 0 ; i < 8 ; i ++) { + buf[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i)); + } + device_printf(sc->sc_dev, "AES %d: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n", reg, + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); + } + } + break; + default: + err = EINVAL; + break; + } + + return(err); +} + + static int power_on(struct sbusfpga_curve25519engine_softc *sc) { int err = 0; if ((curve25519engine_power_read(sc) & 1) == 0) { @@ -321,20 +400,25 @@ static int power_off(struct sbusfpga_curve25519engine_softc *sc) { return err; } -static int init_program(struct sbusfpga_curve25519engine_softc *sc) { +static int init_programs(struct sbusfpga_curve25519engine_softc *sc) { /* the microcode is a the beginning */ int err = 0; - uint32_t i; - - for (i = 0 ; i < program_len + 1 ; i++) { - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_microcode, (i*4), program[i]); - if ((i%16)==15) - delay(1); + uint32_t i, j; + uint32_t offset = 0; + + for (j = 0 ; programs[j] != NULL; j ++) { + program_offset[j] = offset; + for (i = 0 ; i < program_len[j] ; i++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_microcode, ((offset+i)*4), programs[j][i]); + if ((i%16)==15) + delay(1); + } + offset += program_len[j]; } curve25519engine_window_write(sc, 0); /* could use window_window to access fields, but it creates a RMW cycle for nothing */ - curve25519engine_mpstart_write(sc, 0); - curve25519engine_mplen_write(sc, program_len); + curve25519engine_mpstart_write(sc, 0); /* EC25519 */ + curve25519engine_mplen_write(sc, program_len[0]); /* EC25519 */ aprint_normal_dev(sc->sc_dev, "INIT - Curve25519Engine status: 0x%08x\n", curve25519engine_status_read(sc)); @@ -342,10 +426,10 @@ static int init_program(struct sbusfpga_curve25519engine_softc *sc) { /* double check */ u_int32_t x; int count = 0; - for (i = 0 ; i < program_len + 1 && count < 10; i++) { + for (i = 0 ; i < program_len[0] && count < 10; i++) { x = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_microcode, (i*4)); - if (x != program[i]) { - aprint_error_dev(sc->sc_dev, "INIT - Curve25519Engine program failure: [%d] 0x%08x <> 0x%08x\n", i, x, program[i]); + if (x != programs[0][i]) { + aprint_error_dev(sc->sc_dev, "INIT - Curve25519Engine program failure: [%d] 0x%08x <> 0x%08x\n", i, x, programs[0][i]); err = 1; count ++; } @@ -360,7 +444,7 @@ static int init_program(struct sbusfpga_curve25519engine_softc *sc) { aprint_error_dev(sc->sc_dev, "INIT - Curve25519Engine register failure: mpstart = 0x%08x\n", x); err = 1; } - if ((x = curve25519engine_mplen_read(sc)) != program_len) { + if ((x = curve25519engine_mplen_read(sc)) != program_len[0]) { aprint_error_dev(sc->sc_dev, "INIT - Curve25519Engine register failure: mplen = 0x%08x\n", x); err = 1; } @@ -386,9 +470,6 @@ static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf aprint_error_dev(sc->sc_dev, "WRITE - Curve25519Engine status: 0x%08x, still running?\n", status); return -ENXIO; } - -#define REG_BASE(reg) (base + (reg * 32)) -#define SUBREG_ADDR(reg, off) (REG_BASE(reg) + (off)*4) for (i = 0 ; i < 8 ; i ++) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(24,i), job->affine_u[i]); /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(25,i), job->x0_u[i]); */ @@ -399,12 +480,8 @@ static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(19,i), ((i == 0) ? 254 : 0)); */ /* delay(1); */ } -#undef SUBREG_ADDR -#undef REG_BASE #if 1 -#define REG_BASE(reg) (base + (reg * 32)) -#define SUBREG_ADDR(reg, off) (REG_BASE(reg) + (off)*4) for (i = 0 ; i < 8 && !err; i ++) { if (job->affine_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(24,i))) err = EIO; /* if (job->x0_u[i] != bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(25,i))) err = EIO; */ @@ -415,8 +492,6 @@ static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf /* delay(1); */ } if (err) aprint_error_dev(sc->sc_dev, "WRITE - data did not read-write properly\n"); -#undef SUBREG_ADDR -#undef REG_BASE #endif return err; @@ -463,8 +538,6 @@ static int read_outputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf return -ENXIO; } -#define REG_BASE(reg) (base + (reg * 32)) -#define SUBREG_ADDR(reg, off) (REG_BASE(reg) + (off)*4) for (i = 0 ; i < 8 ; i ++) { /* job->affine_u[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(24,i)); */ /* job->x0_u[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(25,i)); */ @@ -475,8 +548,6 @@ static int read_outputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf /* delay(1); */ } aprint_normal_dev(sc->sc_dev, "READ - Curve25519Engine 19 low 32 bits: 0x%08x\n", bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(19,0))); -#undef SUBREG_ADDR -#undef REG_BASE return 0; } diff --git a/sbus-to-ztex-gateware-migen/engine.py b/sbus-to-ztex-gateware-migen/engine.py index 72a180f..42f8023 100644 --- a/sbus-to-ztex-gateware-migen/engine.py +++ b/sbus-to-ztex-gateware-migen/engine.py @@ -25,7 +25,13 @@ opcodes = { # mnemonic : [bit coding, docstring] "FIN" : [10, "halt execution and assert interrupt to host CPU that microcode execution is done"], "SHL" : [11, "Wd $\gets$ Ra << 1 // shift Ra left by one and store in Wd"], "XBT" : [12, "Wd[0] $\gets$ Ra[254] // extract the 255th bit of Ra and put it into the 0th bit of Wd"], - "MAX" : [13, "Maximum opcode number (for bounds checking)"], + "CLMUL": [13, "carry-less multiplication; reg-reg only; per 128-bits block"], # basically 256-bits form of vpclmulqdq + "GCM_SHLMI": [14, "Shift A left by imm, insert B MSB as dest LSB; reg-reg or reg-imm; per 128-bits block"], # make SHL redundant: SHL %rd, %ra == GCM_SHLMI %rd, %ra, #0, #1 + "GCM_SHRMI": [15, "Shift A right by imm, insert B LSB as dest MSB; reg-reg or reg-imm; per 128-bits block"], # + "GCM_CMPD": [16, "Compute D:X0 from X1:X0; reg ; per 128-bits block"], # specific + "GCM_SWAP64": [17, "Swap doubleword (64 bits) ; reg-reg or imm-reg or reg-imm; per 128-bits block"], # + "AESESMI" : [18, "AES ; reg-reg ; per 128-bits block" ], + "MAX" : [19, "Maximum opcode number (for bounds checking)"], } num_registers = 32 @@ -1418,7 +1424,203 @@ carries that have already been propagated. If we fail to do this, then we re-pro ) ] +class ExecClmul(ExecUnit, AutoDoc): + def clmul64(self, IN2, IN1): + return (Replicate(IN2[0], 64) & (IN1[0:64])) ^ Cat(Signal(1, reset = 0), (Replicate(IN2[1], 63) & IN1[0:63])) ^ Cat(Signal(2, reset = 0), (Replicate(IN2[2], 62) & IN1[0:62])) ^ Cat(Signal(3, reset = 0), (Replicate(IN2[3], 61) & IN1[0:61])) ^ Cat(Signal(4, reset = 0), (Replicate(IN2[4], 60) & IN1[0:60])) ^ Cat(Signal(5, reset = 0), (Replicate(IN2[5], 59) & IN1[0:59])) ^ Cat(Signal(6, reset = 0), (Replicate(IN2[6], 58) & IN1[0:58])) ^ Cat(Signal(7, reset = 0), (Replicate(IN2[7], 57) & IN1[0:57])) ^ Cat(Signal(8, reset = 0), (Replicate(IN2[8], 56) & IN1[0:56])) ^ Cat(Signal(9, reset = 0), (Replicate(IN2[9], 55) & IN1[0:55])) ^ Cat(Signal(10, reset = 0), (Replicate(IN2[10], 54) & IN1[0:54])) ^ Cat(Signal(11, reset = 0), (Replicate(IN2[11], 53) & IN1[0:53])) ^ Cat(Signal(12, reset = 0), (Replicate(IN2[12], 52) & IN1[0:52])) ^ Cat(Signal(13, reset = 0), (Replicate(IN2[13], 51) & IN1[0:51])) ^ Cat(Signal(14, reset = 0), (Replicate(IN2[14], 50) & IN1[0:50])) ^ Cat(Signal(15, reset = 0), (Replicate(IN2[15], 49) & IN1[0:49])) ^ Cat(Signal(16, reset = 0), (Replicate(IN2[16], 48) & IN1[0:48])) ^ Cat(Signal(17, reset = 0), (Replicate(IN2[17], 47) & IN1[0:47])) ^ Cat(Signal(18, reset = 0), (Replicate(IN2[18], 46) & IN1[0:46])) ^ Cat(Signal(19, reset = 0), (Replicate(IN2[19], 45) & IN1[0:45])) ^ Cat(Signal(20, reset = 0), (Replicate(IN2[20], 44) & IN1[0:44])) ^ Cat(Signal(21, reset = 0), (Replicate(IN2[21], 43) & IN1[0:43])) ^ Cat(Signal(22, reset = 0), (Replicate(IN2[22], 42) & IN1[0:42])) ^ Cat(Signal(23, reset = 0), (Replicate(IN2[23], 41) & IN1[0:41])) ^ Cat(Signal(24, reset = 0), (Replicate(IN2[24], 40) & IN1[0:40])) ^ Cat(Signal(25, reset = 0), (Replicate(IN2[25], 39) & IN1[0:39])) ^ Cat(Signal(26, reset = 0), (Replicate(IN2[26], 38) & IN1[0:38])) ^ Cat(Signal(27, reset = 0), (Replicate(IN2[27], 37) & IN1[0:37])) ^ Cat(Signal(28, reset = 0), (Replicate(IN2[28], 36) & IN1[0:36])) ^ Cat(Signal(29, reset = 0), (Replicate(IN2[29], 35) & IN1[0:35])) ^ Cat(Signal(30, reset = 0), (Replicate(IN2[30], 34) & IN1[0:34])) ^ Cat(Signal(31, reset = 0), (Replicate(IN2[31], 33) & IN1[0:33])) ^ Cat(Signal(32, reset = 0), (Replicate(IN2[32], 32) & IN1[0:32])) ^ Cat(Signal(33, reset = 0), (Replicate(IN2[33], 31) & IN1[0:31])) ^ Cat(Signal(34, reset = 0), (Replicate(IN2[34], 30) & IN1[0:30])) ^ Cat(Signal(35, reset = 0), (Replicate(IN2[35], 29) & IN1[0:29])) ^ Cat(Signal(36, reset = 0), (Replicate(IN2[36], 28) & IN1[0:28])) ^ Cat(Signal(37, reset = 0), (Replicate(IN2[37], 27) & IN1[0:27])) ^ Cat(Signal(38, reset = 0), (Replicate(IN2[38], 26) & IN1[0:26])) ^ Cat(Signal(39, reset = 0), (Replicate(IN2[39], 25) & IN1[0:25])) ^ Cat(Signal(40, reset = 0), (Replicate(IN2[40], 24) & IN1[0:24])) ^ Cat(Signal(41, reset = 0), (Replicate(IN2[41], 23) & IN1[0:23])) ^ Cat(Signal(42, reset = 0), (Replicate(IN2[42], 22) & IN1[0:22])) ^ Cat(Signal(43, reset = 0), (Replicate(IN2[43], 21) & IN1[0:21])) ^ Cat(Signal(44, reset = 0), (Replicate(IN2[44], 20) & IN1[0:20])) ^ Cat(Signal(45, reset = 0), (Replicate(IN2[45], 19) & IN1[0:19])) ^ Cat(Signal(46, reset = 0), (Replicate(IN2[46], 18) & IN1[0:18])) ^ Cat(Signal(47, reset = 0), (Replicate(IN2[47], 17) & IN1[0:17])) ^ Cat(Signal(48, reset = 0), (Replicate(IN2[48], 16) & IN1[0:16])) ^ Cat(Signal(49, reset = 0), (Replicate(IN2[49], 15) & IN1[0:15])) ^ Cat(Signal(50, reset = 0), (Replicate(IN2[50], 14) & IN1[0:14])) ^ Cat(Signal(51, reset = 0), (Replicate(IN2[51], 13) & IN1[0:13])) ^ Cat(Signal(52, reset = 0), (Replicate(IN2[52], 12) & IN1[0:12])) ^ Cat(Signal(53, reset = 0), (Replicate(IN2[53], 11) & IN1[0:11])) ^ Cat(Signal(54, reset = 0), (Replicate(IN2[54], 10) & IN1[0:10])) ^ Cat(Signal(55, reset = 0), (Replicate(IN2[55], 9) & IN1[0:9])) ^ Cat(Signal(56, reset = 0), (Replicate(IN2[56], 8) & IN1[0:8])) ^ Cat(Signal(57, reset = 0), (Replicate(IN2[57], 7) & IN1[0:7])) ^ Cat(Signal(58, reset = 0), (Replicate(IN2[58], 6) & IN1[0:6])) ^ Cat(Signal(59, reset = 0), (Replicate(IN2[59], 5) & IN1[0:5])) ^ Cat(Signal(60, reset = 0), (Replicate(IN2[60], 4) & IN1[0:4])) ^ Cat(Signal(61, reset = 0), (Replicate(IN2[61], 3) & IN1[0:3])) ^ Cat(Signal(62, reset = 0), (Replicate(IN2[62], 2) & IN1[0:2])) ^ Cat(Signal(63, reset = 0), (Replicate(IN2[63], 1) & IN1[0:1])) + def clmul64h(self, IN2, IN1): + return Cat((((Replicate(IN2[0], 1)) & IN1[63:64]) ^ ((Replicate(IN2[1], 2)) & IN1[62:64]) ^ ((Replicate(IN2[2], 3)) & IN1[61:64]) ^ ((Replicate(IN2[3], 4)) & IN1[60:64]) ^ ((Replicate(IN2[4], 5)) & IN1[59:64]) ^ ((Replicate(IN2[5], 6)) & IN1[58:64]) ^ ((Replicate(IN2[6], 7)) & IN1[57:64]) ^ ((Replicate(IN2[7], 8)) & IN1[56:64]) ^ ((Replicate(IN2[8], 9)) & IN1[55:64]) ^ ((Replicate(IN2[9], 10)) & IN1[54:64]) ^ ((Replicate(IN2[10], 11)) & IN1[53:64]) ^ ((Replicate(IN2[11], 12)) & IN1[52:64]) ^ ((Replicate(IN2[12], 13)) & IN1[51:64]) ^ ((Replicate(IN2[13], 14)) & IN1[50:64]) ^ ((Replicate(IN2[14], 15)) & IN1[49:64]) ^ ((Replicate(IN2[15], 16)) & IN1[48:64]) ^ ((Replicate(IN2[16], 17)) & IN1[47:64]) ^ ((Replicate(IN2[17], 18)) & IN1[46:64]) ^ ((Replicate(IN2[18], 19)) & IN1[45:64]) ^ ((Replicate(IN2[19], 20)) & IN1[44:64]) ^ ((Replicate(IN2[20], 21)) & IN1[43:64]) ^ ((Replicate(IN2[21], 22)) & IN1[42:64]) ^ ((Replicate(IN2[22], 23)) & IN1[41:64]) ^ ((Replicate(IN2[23], 24)) & IN1[40:64]) ^ ((Replicate(IN2[24], 25)) & IN1[39:64]) ^ ((Replicate(IN2[25], 26)) & IN1[38:64]) ^ ((Replicate(IN2[26], 27)) & IN1[37:64]) ^ ((Replicate(IN2[27], 28)) & IN1[36:64]) ^ ((Replicate(IN2[28], 29)) & IN1[35:64]) ^ ((Replicate(IN2[29], 30)) & IN1[34:64]) ^ ((Replicate(IN2[30], 31)) & IN1[33:64]) ^ ((Replicate(IN2[31], 32)) & IN1[32:64]) ^ ((Replicate(IN2[32], 33)) & IN1[31:64]) ^ ((Replicate(IN2[33], 34)) & IN1[30:64]) ^ ((Replicate(IN2[34], 35)) & IN1[29:64]) ^ ((Replicate(IN2[35], 36)) & IN1[28:64]) ^ ((Replicate(IN2[36], 37)) & IN1[27:64]) ^ ((Replicate(IN2[37], 38)) & IN1[26:64]) ^ ((Replicate(IN2[38], 39)) & IN1[25:64]) ^ ((Replicate(IN2[39], 40)) & IN1[24:64]) ^ ((Replicate(IN2[40], 41)) & IN1[23:64]) ^ ((Replicate(IN2[41], 42)) & IN1[22:64]) ^ ((Replicate(IN2[42], 43)) & IN1[21:64]) ^ ((Replicate(IN2[43], 44)) & IN1[20:64]) ^ ((Replicate(IN2[44], 45)) & IN1[19:64]) ^ ((Replicate(IN2[45], 46)) & IN1[18:64]) ^ ((Replicate(IN2[46], 47)) & IN1[17:64]) ^ ((Replicate(IN2[47], 48)) & IN1[16:64]) ^ ((Replicate(IN2[48], 49)) & IN1[15:64]) ^ ((Replicate(IN2[49], 50)) & IN1[14:64]) ^ ((Replicate(IN2[50], 51)) & IN1[13:64]) ^ ((Replicate(IN2[51], 52)) & IN1[12:64]) ^ ((Replicate(IN2[52], 53)) & IN1[11:64]) ^ ((Replicate(IN2[53], 54)) & IN1[10:64]) ^ ((Replicate(IN2[54], 55)) & IN1[9:64]) ^ ((Replicate(IN2[55], 56)) & IN1[8:64]) ^ ((Replicate(IN2[56], 57)) & IN1[7:64]) ^ ((Replicate(IN2[57], 58)) & IN1[6:64]) ^ ((Replicate(IN2[58], 59)) & IN1[5:64]) ^ ((Replicate(IN2[59], 60)) & IN1[4:64]) ^ ((Replicate(IN2[60], 61)) & IN1[3:64]) ^ ((Replicate(IN2[61], 62)) & IN1[2:64]) ^ ((Replicate(IN2[62], 63)) & IN1[1:64]) ^ ((Replicate(IN2[63], 64)) & IN1[0:64])), Signal(1, reset = 0))[1:65] + + def __init__(self, width=256): + ExecUnit.__init__(self, width, ["CLMUL"]) + self.notes = ModuleDoc(title="Clmul ExecUnit Subclass", body=f""" + """) + + clmul64x_in1 = Signal(64) + clmul64x_in2 = Signal(64) + clmul64_out = Signal(64) + clmul64h_out = Signal(64) + nlane = width // 128 + clmul_buf = Signal((nlane-1) * 128) ## width must be a multiple of 128... + lanec = Signal(log2_int(nlane, False)) + assert(nlane == 2) ## fixme + + self.sync.eng_clk += [ + clmul64_out.eq(self.clmul64(clmul64x_in1, clmul64x_in2)), + clmul64h_out.eq(self.clmul64h(clmul64x_in1, clmul64x_in2)), + ] + + self.sync.eng_clk += [ + #self.q_valid.eq(self.start), + self.instruction_out.eq(self.instruction_in), + ] + + + self.submodules.seq = seq = ClockDomainsRenamer("eng_clk")(FSM(reset_state="IDLE")) + seq.act("IDLE", + If(self.start, + Case(self.instruction.immediate[0:2], { + 0x0: [ clmul64x_in1.eq(self.a[ 0: 64]), clmul64x_in2.eq(self.b[ 0: 64]) ], + 0x1: [ clmul64x_in1.eq(self.a[ 0: 64]), clmul64x_in2.eq(self.b[ 64:128]) ], + 0x2: [ clmul64x_in1.eq(self.a[ 64:128]), clmul64x_in2.eq(self.b[ 0: 64]) ], + 0x3: [ clmul64x_in1.eq(self.a[ 64:128]), clmul64x_in2.eq(self.b[ 64:128]) ], + }), + NextState("NEXT"))) + seq.act("NEXT", + Case(lanec, { + 0: [ NextValue(clmul_buf[0:128], Cat(clmul64_out, clmul64h_out)), + Case(self.instruction.immediate[0:2], { + 0x0: [ clmul64x_in1.eq(self.a[128:192]), clmul64x_in2.eq(self.b[128:192]) ], + 0x1: [ clmul64x_in1.eq(self.a[128:192]), clmul64x_in2.eq(self.b[192:256]) ], + 0x2: [ clmul64x_in1.eq(self.a[192:256]), clmul64x_in2.eq(self.b[128:192]) ], + 0x3: [ clmul64x_in1.eq(self.a[192:256]), clmul64x_in2.eq(self.b[192:256]) ], + }), + NextValue(lanec, 1), + ], + 1: [ self.q_valid.eq(1), + self.q.eq(Cat(clmul_buf, clmul64_out, clmul64h_out)), + NextValue(lanec, 0), + NextState("IDLE") + ], + })) + +class ExecGCMShifts(ExecUnit, AutoDoc): + def __init__(self, width=256): + ExecUnit.__init__(self, width, ["GCM_SHLMI", "GCM_SHRMI", "GCM_CMPD", "GCM_SWAP64"]) + self.notes = ModuleDoc(title="GCM Shifts ExecUnit Subclass", body=f""" + """) + + assert(width == 256) # fixme + + self.sync.eng_clk += [ + self.q_valid.eq(self.start), + self.instruction_out.eq(self.instruction_in), + ] + self.comb += [ + If(self.instruction.opcode == opcodes["GCM_CMPD"][0], + self.q.eq(Cat(self.a[ 0: 64], self.a[ 64:128] ^ Cat(Signal(63, reset = 0), self.a[ 0: 1]) ^ Cat(Signal(62, reset = 0), self.a[ 0: 2]) ^ Cat(Signal(57, reset = 0), self.a[ 0: 7]), + self.a[128:192], self.a[192:256] ^ Cat(Signal(63, reset = 0), self.a[128:129]) ^ Cat(Signal(62, reset = 0), self.a[128:130]) ^ Cat(Signal(57, reset = 0), self.a[128:135])) + ) #eq + ).Elif(self.instruction.opcode == opcodes["GCM_SHRMI"][0], + Case(self.instruction.immediate[0:3], { + 0x0: self.q.eq(self.a), + 0x1: self.q.eq(Cat(self.a[1:128], self.b[0:1], self.a[129:256], self.b[0:1])), + 0x2: self.q.eq(Cat(self.a[2:128], self.b[0:2], self.a[130:256], self.b[0:2])), + 0x3: self.q.eq(Cat(self.a[3:128], self.b[0:3], self.a[131:256], self.b[0:3])), + 0x4: self.q.eq(Cat(self.a[4:128], self.b[0:4], self.a[132:256], self.b[0:4])), + 0x5: self.q.eq(Cat(self.a[5:128], self.b[0:5], self.a[133:256], self.b[0:5])), + 0x6: self.q.eq(Cat(self.a[6:128], self.b[0:6], self.a[134:256], self.b[0:6])), + 0x7: self.q.eq(Cat(self.a[7:128], self.b[0:7], self.a[135:256], self.b[0:7])), + }) + ).Elif(self.instruction.opcode == opcodes["GCM_SHLMI"][0], + Case(self.instruction.immediate[0:3], { + 0x0: self.q.eq(self.a), + 0x1: self.q.eq(Cat(self.b[127:128], self.a[0:127], self.b[255:256], self.a[128:255])), + 0x2: self.q.eq(Cat(self.b[126:128], self.a[0:126], self.b[254:256], self.a[128:254])), + 0x3: self.q.eq(Cat(self.b[125:128], self.a[0:125], self.b[253:256], self.a[128:253])), + 0x4: self.q.eq(Cat(self.b[124:128], self.a[0:124], self.b[252:256], self.a[128:252])), + 0x5: self.q.eq(Cat(self.b[123:128], self.a[0:123], self.b[251:256], self.a[128:251])), + 0x6: self.q.eq(Cat(self.b[122:128], self.a[0:122], self.b[250:256], self.a[128:250])), + 0x7: self.q.eq(Cat(self.b[121:128], self.a[0:121], self.b[249:256], self.a[128:249])), + }) + ).Elif(self.instruction.opcode == opcodes["GCM_SWAP64"][0], + self.q.eq(Cat(self.b[64:128], self.a[0:64], self.b[192:256], self.a[128:192])) + ) + ] + +class ExecAES(ExecUnit, AutoDoc): + def __init__(self, width=256): + ExecUnit.__init__(self, width, ["AESESMI"]) + self.notes = ModuleDoc(title="AES ExecUnit Subclass", body=f""" + """) + + assert(width == 256) # fixme + nlane = width // 128 + aes_buf = Signal((nlane-1) * 128) ## width must be a multiple of 128... + lanec = Signal(log2_int(nlane, False)) + assert(nlane == 2) ## fixme + + aes_in = Array(Signal(8) for a in range(4)) + aes_out = Array(Signal(24) for a in range(4)) + for i in range(4): + self.sync.eng_clk += Case(aes_in[i], { 0x00: aes_out[i].eq(0xa563c6), 0x01: aes_out[i].eq(0x847cf8), 0x02: aes_out[i].eq(0x9977ee), 0x03: aes_out[i].eq(0x8d7bf6), 0x04: aes_out[i].eq(0x0df2ff), 0x05: aes_out[i].eq(0xbd6bd6), 0x06: aes_out[i].eq(0xb16fde), 0x07: aes_out[i].eq(0x54c591), 0x08: aes_out[i].eq(0x503060), 0x09: aes_out[i].eq(0x030102), 0x0a: aes_out[i].eq(0xa967ce), 0x0b: aes_out[i].eq(0x7d2b56), 0x0c: aes_out[i].eq(0x19fee7), 0x0d: aes_out[i].eq(0x62d7b5), 0x0e: aes_out[i].eq(0xe6ab4d), 0x0f: aes_out[i].eq(0x9a76ec), 0x10: aes_out[i].eq(0x45ca8f), 0x11: aes_out[i].eq(0x9d821f), 0x12: aes_out[i].eq(0x40c989), 0x13: aes_out[i].eq(0x877dfa), 0x14: aes_out[i].eq(0x15faef), 0x15: aes_out[i].eq(0xeb59b2), 0x16: aes_out[i].eq(0xc9478e), 0x17: aes_out[i].eq(0x0bf0fb), 0x18: aes_out[i].eq(0xecad41), 0x19: aes_out[i].eq(0x67d4b3), 0x1a: aes_out[i].eq(0xfda25f), 0x1b: aes_out[i].eq(0xeaaf45), 0x1c: aes_out[i].eq(0xbf9c23), 0x1d: aes_out[i].eq(0xf7a453), 0x1e: aes_out[i].eq(0x9672e4), 0x1f: aes_out[i].eq(0x5bc09b), 0x20: aes_out[i].eq(0xc2b775), 0x21: aes_out[i].eq(0x1cfde1), 0x22: aes_out[i].eq(0xae933d), 0x23: aes_out[i].eq(0x6a264c), 0x24: aes_out[i].eq(0x5a366c), 0x25: aes_out[i].eq(0x413f7e), 0x26: aes_out[i].eq(0x02f7f5), 0x27: aes_out[i].eq(0x4fcc83), 0x28: aes_out[i].eq(0x5c3468), 0x29: aes_out[i].eq(0xf4a551), 0x2a: aes_out[i].eq(0x34e5d1), 0x2b: aes_out[i].eq(0x08f1f9), 0x2c: aes_out[i].eq(0x9371e2), 0x2d: aes_out[i].eq(0x73d8ab), 0x2e: aes_out[i].eq(0x533162), 0x2f: aes_out[i].eq(0x3f152a), 0x30: aes_out[i].eq(0x0c0408), 0x31: aes_out[i].eq(0x52c795), 0x32: aes_out[i].eq(0x652346), 0x33: aes_out[i].eq(0x5ec39d), 0x34: aes_out[i].eq(0x281830), 0x35: aes_out[i].eq(0xa19637), 0x36: aes_out[i].eq(0x0f050a), 0x37: aes_out[i].eq(0xb59a2f), 0x38: aes_out[i].eq(0x09070e), 0x39: aes_out[i].eq(0x361224), 0x3a: aes_out[i].eq(0x9b801b), 0x3b: aes_out[i].eq(0x3de2df), 0x3c: aes_out[i].eq(0x26ebcd), 0x3d: aes_out[i].eq(0x69274e), 0x3e: aes_out[i].eq(0xcdb27f), 0x3f: aes_out[i].eq(0x9f75ea), 0x40: aes_out[i].eq(0x1b0912), 0x41: aes_out[i].eq(0x9e831d), 0x42: aes_out[i].eq(0x742c58), 0x43: aes_out[i].eq(0x2e1a34), 0x44: aes_out[i].eq(0x2d1b36), 0x45: aes_out[i].eq(0xb26edc), 0x46: aes_out[i].eq(0xee5ab4), 0x47: aes_out[i].eq(0xfba05b), 0x48: aes_out[i].eq(0xf652a4), 0x49: aes_out[i].eq(0x4d3b76), 0x4a: aes_out[i].eq(0x61d6b7), 0x4b: aes_out[i].eq(0xceb37d), 0x4c: aes_out[i].eq(0x7b2952), 0x4d: aes_out[i].eq(0x3ee3dd), 0x4e: aes_out[i].eq(0x712f5e), 0x4f: aes_out[i].eq(0x978413), 0x50: aes_out[i].eq(0xf553a6), 0x51: aes_out[i].eq(0x68d1b9), 0x52: aes_out[i].eq(0x000000), 0x53: aes_out[i].eq(0x2cedc1), 0x54: aes_out[i].eq(0x602040), 0x55: aes_out[i].eq(0x1ffce3), 0x56: aes_out[i].eq(0xc8b179), 0x57: aes_out[i].eq(0xed5bb6), 0x58: aes_out[i].eq(0xbe6ad4), 0x59: aes_out[i].eq(0x46cb8d), 0x5a: aes_out[i].eq(0xd9be67), 0x5b: aes_out[i].eq(0x4b3972), 0x5c: aes_out[i].eq(0xde4a94), 0x5d: aes_out[i].eq(0xd44c98), 0x5e: aes_out[i].eq(0xe858b0), 0x5f: aes_out[i].eq(0x4acf85), 0x60: aes_out[i].eq(0x6bd0bb), 0x61: aes_out[i].eq(0x2aefc5), 0x62: aes_out[i].eq(0xe5aa4f), 0x63: aes_out[i].eq(0x16fbed), 0x64: aes_out[i].eq(0xc54386), 0x65: aes_out[i].eq(0xd74d9a), 0x66: aes_out[i].eq(0x553366), 0x67: aes_out[i].eq(0x948511), 0x68: aes_out[i].eq(0xcf458a), 0x69: aes_out[i].eq(0x10f9e9), 0x6a: aes_out[i].eq(0x060204), 0x6b: aes_out[i].eq(0x817ffe), 0x6c: aes_out[i].eq(0xf050a0), 0x6d: aes_out[i].eq(0x443c78), 0x6e: aes_out[i].eq(0xba9f25), 0x6f: aes_out[i].eq(0xe3a84b), 0x70: aes_out[i].eq(0xf351a2), 0x71: aes_out[i].eq(0xfea35d), 0x72: aes_out[i].eq(0xc04080), 0x73: aes_out[i].eq(0x8a8f05), 0x74: aes_out[i].eq(0xad923f), 0x75: aes_out[i].eq(0xbc9d21), 0x76: aes_out[i].eq(0x483870), 0x77: aes_out[i].eq(0x04f5f1), 0x78: aes_out[i].eq(0xdfbc63), 0x79: aes_out[i].eq(0xc1b677), 0x7a: aes_out[i].eq(0x75daaf), 0x7b: aes_out[i].eq(0x632142), 0x7c: aes_out[i].eq(0x301020), 0x7d: aes_out[i].eq(0x1affe5), 0x7e: aes_out[i].eq(0x0ef3fd), 0x7f: aes_out[i].eq(0x6dd2bf), 0x80: aes_out[i].eq(0x4ccd81), 0x81: aes_out[i].eq(0x140c18), 0x82: aes_out[i].eq(0x351326), 0x83: aes_out[i].eq(0x2fecc3), 0x84: aes_out[i].eq(0xe15fbe), 0x85: aes_out[i].eq(0xa29735), 0x86: aes_out[i].eq(0xcc4488), 0x87: aes_out[i].eq(0x39172e), 0x88: aes_out[i].eq(0x57c493), 0x89: aes_out[i].eq(0xf2a755), 0x8a: aes_out[i].eq(0x827efc), 0x8b: aes_out[i].eq(0x473d7a), 0x8c: aes_out[i].eq(0xac64c8), 0x8d: aes_out[i].eq(0xe75dba), 0x8e: aes_out[i].eq(0x2b1932), 0x8f: aes_out[i].eq(0x9573e6), 0x90: aes_out[i].eq(0xa060c0), 0x91: aes_out[i].eq(0x988119), 0x92: aes_out[i].eq(0xd14f9e), 0x93: aes_out[i].eq(0x7fdca3), 0x94: aes_out[i].eq(0x662244), 0x95: aes_out[i].eq(0x7e2a54), 0x96: aes_out[i].eq(0xab903b), 0x97: aes_out[i].eq(0x83880b), 0x98: aes_out[i].eq(0xca468c), 0x99: aes_out[i].eq(0x29eec7), 0x9a: aes_out[i].eq(0xd3b86b), 0x9b: aes_out[i].eq(0x3c1428), 0x9c: aes_out[i].eq(0x79dea7), 0x9d: aes_out[i].eq(0xe25ebc), 0x9e: aes_out[i].eq(0x1d0b16), 0x9f: aes_out[i].eq(0x76dbad), 0xa0: aes_out[i].eq(0x3be0db), 0xa1: aes_out[i].eq(0x563264), 0xa2: aes_out[i].eq(0x4e3a74), 0xa3: aes_out[i].eq(0x1e0a14), 0xa4: aes_out[i].eq(0xdb4992), 0xa5: aes_out[i].eq(0x0a060c), 0xa6: aes_out[i].eq(0x6c2448), 0xa7: aes_out[i].eq(0xe45cb8), 0xa8: aes_out[i].eq(0x5dc29f), 0xa9: aes_out[i].eq(0x6ed3bd), 0xaa: aes_out[i].eq(0xefac43), 0xab: aes_out[i].eq(0xa662c4), 0xac: aes_out[i].eq(0xa89139), 0xad: aes_out[i].eq(0xa49531), 0xae: aes_out[i].eq(0x37e4d3), 0xaf: aes_out[i].eq(0x8b79f2), 0xb0: aes_out[i].eq(0x32e7d5), 0xb1: aes_out[i].eq(0x43c88b), 0xb2: aes_out[i].eq(0x59376e), 0xb3: aes_out[i].eq(0xb76dda), 0xb4: aes_out[i].eq(0x8c8d01), 0xb5: aes_out[i].eq(0x64d5b1), 0xb6: aes_out[i].eq(0xd24e9c), 0xb7: aes_out[i].eq(0xe0a949), 0xb8: aes_out[i].eq(0xb46cd8), 0xb9: aes_out[i].eq(0xfa56ac), 0xba: aes_out[i].eq(0x07f4f3), 0xbb: aes_out[i].eq(0x25eacf), 0xbc: aes_out[i].eq(0xaf65ca), 0xbd: aes_out[i].eq(0x8e7af4), 0xbe: aes_out[i].eq(0xe9ae47), 0xbf: aes_out[i].eq(0x180810), 0xc0: aes_out[i].eq(0xd5ba6f), 0xc1: aes_out[i].eq(0x8878f0), 0xc2: aes_out[i].eq(0x6f254a), 0xc3: aes_out[i].eq(0x722e5c), 0xc4: aes_out[i].eq(0x241c38), 0xc5: aes_out[i].eq(0xf1a657), 0xc6: aes_out[i].eq(0xc7b473), 0xc7: aes_out[i].eq(0x51c697), 0xc8: aes_out[i].eq(0x23e8cb), 0xc9: aes_out[i].eq(0x7cdda1), 0xca: aes_out[i].eq(0x9c74e8), 0xcb: aes_out[i].eq(0x211f3e), 0xcc: aes_out[i].eq(0xdd4b96), 0xcd: aes_out[i].eq(0xdcbd61), 0xce: aes_out[i].eq(0x868b0d), 0xcf: aes_out[i].eq(0x858a0f), 0xd0: aes_out[i].eq(0x9070e0), 0xd1: aes_out[i].eq(0x423e7c), 0xd2: aes_out[i].eq(0xc4b571), 0xd3: aes_out[i].eq(0xaa66cc), 0xd4: aes_out[i].eq(0xd84890), 0xd5: aes_out[i].eq(0x050306), 0xd6: aes_out[i].eq(0x01f6f7), 0xd7: aes_out[i].eq(0x120e1c), 0xd8: aes_out[i].eq(0xa361c2), 0xd9: aes_out[i].eq(0x5f356a), 0xda: aes_out[i].eq(0xf957ae), 0xdb: aes_out[i].eq(0xd0b969), 0xdc: aes_out[i].eq(0x918617), 0xdd: aes_out[i].eq(0x58c199), 0xde: aes_out[i].eq(0x271d3a), 0xdf: aes_out[i].eq(0xb99e27), 0xe0: aes_out[i].eq(0x38e1d9), 0xe1: aes_out[i].eq(0x13f8eb), 0xe2: aes_out[i].eq(0xb3982b), 0xe3: aes_out[i].eq(0x331122), 0xe4: aes_out[i].eq(0xbb69d2), 0xe5: aes_out[i].eq(0x70d9a9), 0xe6: aes_out[i].eq(0x898e07), 0xe7: aes_out[i].eq(0xa79433), 0xe8: aes_out[i].eq(0xb69b2d), 0xe9: aes_out[i].eq(0x221e3c), 0xea: aes_out[i].eq(0x928715), 0xeb: aes_out[i].eq(0x20e9c9), 0xec: aes_out[i].eq(0x49ce87), 0xed: aes_out[i].eq(0xff55aa), 0xee: aes_out[i].eq(0x782850), 0xef: aes_out[i].eq(0x7adfa5), 0xf0: aes_out[i].eq(0x8f8c03), 0xf1: aes_out[i].eq(0xf8a159), 0xf2: aes_out[i].eq(0x808909), 0xf3: aes_out[i].eq(0x170d1a), 0xf4: aes_out[i].eq(0xdabf65), 0xf5: aes_out[i].eq(0x31e6d7), 0xf6: aes_out[i].eq(0xc64284), 0xf7: aes_out[i].eq(0xb868d0), 0xf8: aes_out[i].eq(0xc34182), 0xf9: aes_out[i].eq(0xb09929), 0xfa: aes_out[i].eq(0x772d5a), 0xfb: aes_out[i].eq(0x110f1e), 0xfc: aes_out[i].eq(0xcbb07b), 0xfd: aes_out[i].eq(0xfc54a8), 0xfe: aes_out[i].eq(0xd6bb6d), 0xff: aes_out[i].eq(0x3a162c) } ) + + self.sync.eng_clk += [ + #self.q_valid.eq(self.start), + self.instruction_out.eq(self.instruction_in), + ] + + self.submodules.seq = seq = ClockDomainsRenamer("eng_clk")(FSM(reset_state="IDLE")) + seq.act("IDLE", + If(self.start, + Case(self.instruction.immediate[0:2], { + 0x0: [ aes_in[0].eq(self.a[ 0: 8]), aes_in[1].eq(self.a[ 32: 40]), aes_in[2].eq(self.a[ 64: 72]), aes_in[3].eq(self.a[ 96:104]) ], + 0x1: [ aes_in[3].eq(self.a[ 8: 16]), aes_in[0].eq(self.a[ 40: 48]), aes_in[1].eq(self.a[ 72: 80]), aes_in[2].eq(self.a[104:112]) ], + 0x2: [ aes_in[2].eq(self.a[ 16: 24]), aes_in[3].eq(self.a[ 48: 56]), aes_in[0].eq(self.a[ 80: 88]), aes_in[1].eq(self.a[112:120]) ], + 0x3: [ aes_in[1].eq(self.a[ 24: 32]), aes_in[2].eq(self.a[ 56: 64]), aes_in[3].eq(self.a[ 88: 96]), aes_in[0].eq(self.a[120:128]) ], + }), + NextState("NEXT"))) + seq.act("NEXT", + Case(lanec, { + 0: [ Case(self.instruction.immediate[0:2], { + 0x0: [ aes_in[0].eq(self.a[128:136]), aes_in[1].eq(self.a[160:168]), aes_in[2].eq(self.a[192:200]), aes_in[3].eq(self.a[224:232]) ], + 0x1: [ aes_in[3].eq(self.a[136:144]), aes_in[0].eq(self.a[168:176]), aes_in[1].eq(self.a[200:208]), aes_in[2].eq(self.a[232:240]) ], + 0x2: [ aes_in[2].eq(self.a[144:152]), aes_in[3].eq(self.a[176:184]), aes_in[0].eq(self.a[208:216]), aes_in[1].eq(self.a[240:248]) ], + 0x3: [ aes_in[1].eq(self.a[152:160]), aes_in[2].eq(self.a[184:192]), aes_in[3].eq(self.a[216:224]), aes_in[0].eq(self.a[248:256]) ], + }), + Case(self.instruction.immediate[0:2], { + 0x0: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 0:16], aes_out[0][ 8:24], + aes_out[1][ 0:16], aes_out[1][ 8:24], + aes_out[2][ 0:16], aes_out[2][ 8:24], + aes_out[3][ 0:16], aes_out[3][ 8:24])), + ], + 0x1: [ NextValue(aes_buf[0:128], Cat(aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16], + aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16], + aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16], + aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])), + ], + 0x2: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:24], aes_out[0][ 0:16], + aes_out[1][ 8:24], aes_out[1][ 0:16], + aes_out[2][ 8:24], aes_out[2][ 0:16], + aes_out[3][ 8:24], aes_out[3][ 0:16])), + ], + 0x3: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8], + aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8], + aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8], + aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])), + ], + }), + NextValue(lanec, 1), + ], + 1: [ self.q_valid.eq(1), + Case(self.instruction.immediate[0:2], { + 0x0: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 0:16], aes_out[0][ 8:24], + aes_out[1][ 0:16], aes_out[1][ 8:24], + aes_out[2][ 0:16], aes_out[2][ 8:24], + aes_out[3][ 0:16], aes_out[3][ 8:24])), + ], + 0x1: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16], + aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16], + aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16], + aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])), + ], + 0x2: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 8:24], aes_out[0][ 0:16], + aes_out[1][ 8:24], aes_out[1][ 0:16], + aes_out[2][ 8:24], aes_out[2][ 0:16], + aes_out[3][ 8:24], aes_out[3][ 0:16])), + ], + 0x3: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8], + aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8], + aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8], + aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])), + ], + }), + NextValue(lanec, 0), + NextState("IDLE") + ], + })) + + class Engine(Module, AutoCSR, AutoDoc): def __init__(self, platform, prefix, sim=False, build_prefix=""): opdoc = "\n" @@ -1885,6 +2087,9 @@ Here are the currently implemented opcodes for The Engine: "exec_addsub" : ExecAddSub(width=rf_width_raw), "exec_testreduce": ExecTestReduce(width=rf_width_raw), "exec_mul" : ExecMul(width=rf_width_raw, sim=sim), + "exec_clmul" : ExecClmul(width=rf_width_raw), + "exec_gcmshifts" : ExecGCMShifts(width=rf_width_raw), + "exec_aes" : ExecAES(width=rf_width_raw), } index = 0 for name, unit in exec_units.items(): diff --git a/sbus-to-ztex-gateware-migen/engine_code/Cargo.toml b/sbus-to-ztex-gateware-migen/engine_code/Cargo.toml index 38dd892..a877b59 100644 --- a/sbus-to-ztex-gateware-migen/engine_code/Cargo.toml +++ b/sbus-to-ztex-gateware-migen/engine_code/Cargo.toml @@ -9,12 +9,14 @@ edition = "2018" [dependencies] [dependencies.engine25519-as] -git="https://github.com/betrusted-io/engine25519-as.git" -rev="6681e73c1fdc4a460b5ef9f9c7c91aef546d00f3" +#git="https://github.com/betrusted-io/engine25519-as.git" +#rev="6681e73c1fdc4a460b5ef9f9c7c91aef546d00f3" +path = "/home/dolbeau/engine25519-as" [dev-dependencies.engine25519-as] -git="https://github.com/betrusted-io/engine25519-as.git" -rev="6681e73c1fdc4a460b5ef9f9c7c91aef546d00f3" +#git="https://github.com/betrusted-io/engine25519-as.git" +#rev="6681e73c1fdc4a460b5ef9f9c7c91aef546d00f3" +path = "/home/dolbeau/engine25519-as" [[bin]] name = "engine_code" diff --git a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs index 0182e10..f27ef93 100644 --- a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs +++ b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs @@ -539,9 +539,97 @@ fn main() -> std::io::Result<()> { mul %31, %24, %24 fin ); + + let gcmcode_test = assemble_engine25519!( + start: + // A in %0 + // B in %1 + clmul %4, %0, %1, #0 + clmul %5, %0, %1, #1 + clmul %6, %0, %1, #2 + clmul %7, %0, %1, #3 + //gcm_sl1ai %8, %0, %1 + //gcm_sl1ai %9, %0, #0 + //gcm_sl1ai %10, %1, %0 + //gcm_sl1ai %11, %1, #0 + gcm_cmpd %12, %0 + gcm_cmpd %13, %1 + //gcm_sri %14, %0, #0 + //gcm_sri %15, %0, #1 + //gcm_sri %16, %0, #2 + //gcm_sri %17, %0, #3 + //gcm_sri %18, %0, #4 + //gcm_sri %19, %0, #5 + //gcm_sri %20, %0, #6 + //gcm_sri %21, %0, #7 + fin + ); + let gcmcode = assemble_engine25519!( + start: + // A in %0 + // B in %1 + + // // poly mult + // C + clmul %4, %0, %1, #0 + // E + clmul %5, %0, %1, #1 + // F + clmul %6, %0, %1, #2 + // D + clmul %7, %0, %1, #3 + // E ^ F + xor %6, %5, %6 + // put low64 of E^F in high64 + gcm_swap64 %5, %6, #0 + // put high64 of E^F in low64 + gcm_swap64 %6, #0, %6 + // D xor low + xor %7, %7, %6 + // C xor high + xor %4, %4, %5 + + // // reduction + // X1:X0 in %4 + // X3:X2 in %7 + // shift everybody by 1 to the left + // high shifting in 1 bit from low + gcm_shlmi %1, %7, %4, #1 + // low + gcm_shlmi %0, %4, #0, #1 + // post-shift + // X1:X0 in %0 + // X3:X2 in %1 + // compute D + gcm_cmpd %2, %0 + // compute E, F, G + gcm_shrmi %3, %2, #0, #1 + gcm_shrmi %4, %2, #0, #2 + gcm_shrmi %5, %2, #0, #7 + // XOR everybody + xor %2, %2, %3 + xor %4, %4, %5 + xor %2, %2, %4 + xor %0, %2, %1 + // output in %0 + fin + );let gcmcode = assemble_engine25519!( + start: + // X in %1 + // KEY in %0 + // one full round demo + aesesmi %0, %1, %0, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + fin + ); + + let mut pos = 0; - while pos < mcode_upd.len() { - println!("0x{:08x},", mcode_upd[pos]); + while pos < gcmcode.len() { + println!("0x{:08x},", gcmcode[pos]); pos = pos + 1; } Ok(()) From 28b857851ae64197a4f4d87fac5153c75cb5174c Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Thu, 26 Aug 2021 06:03:36 -0400 Subject: [PATCH 66/78] Full AES encrypt --- .../sys/dev/sbus/sbusfpga_curve25519engine.c | 146 ++++++++++++++++-- .../sys/dev/sbus/sbusfpga_curve25519engine.h | 15 +- sbus-to-ztex-gateware-migen/engine.py | 134 ++++++++++------ .../engine_code/engine_code.rs | 73 ++++++++- 4 files changed, 312 insertions(+), 56 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c index 514464f..ebdd54b 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c @@ -42,6 +42,8 @@ __KERNEL_RCSID(0, "$NetBSD$"); #include #include +#include + #include #include @@ -58,6 +60,7 @@ CFATTACH_DECL_NEW(sbusfpga_c29e, sizeof(struct sbusfpga_curve25519engine_softc), dev_type_open(sbusfpga_curve25519engine_open); dev_type_close(sbusfpga_curve25519engine_close); dev_type_ioctl(sbusfpga_curve25519engine_ioctl); +dev_type_mmap(sbusfpga_curve25519engine_mmap); @@ -70,7 +73,7 @@ const struct cdevsw sbusfpga_c29e_cdevsw = { .d_stop = nostop, .d_tty = notty, .d_poll = nopoll, - .d_mmap = nommap, + .d_mmap = sbusfpga_curve25519engine_mmap, .d_kqfilter = nokqfilter, .d_discard = nodiscard, .d_flag = 0 @@ -86,12 +89,17 @@ struct sbusfpga_curve25519engine_montgomeryjob { uint32_t affine_u[8]; uint32_t scalar[8]; }; +struct sbusfpga_curve25519engine_aesjob { + uint32_t data[8]; + uint32_t keys[120]; +}; static int init_programs(struct sbusfpga_curve25519engine_softc *sc); static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusfpga_curve25519engine_montgomeryjob *job, const int window); static int start_job(struct sbusfpga_curve25519engine_softc *sc); static int wait_job(struct sbusfpga_curve25519engine_softc *sc); static int read_outputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusfpga_curve25519engine_montgomeryjob *job, const int window); +static int dma_init(struct sbusfpga_curve25519engine_softc *sc); static int power_on(struct sbusfpga_curve25519engine_softc *sc); static int power_off(struct sbusfpga_curve25519engine_softc *sc); @@ -134,10 +142,11 @@ sbusfpga_curve25519engine_match(device_t parent, cfdata_t cf, void *aux) static const uint32_t program_ec25519[134] = {0x00640840, 0x00680800, 0x006c0600, 0x00700840, 0x004c0a80, 0x00480800, 0x007407cc, 0x007c07cb, 0x0049d483, 0x0079b643, 0x0079e482, 0x00659783, 0x006db783, 0x0079c683, 0x0079e482, 0x0069a783, 0x0071c783, 0x00480740, 0x0001a645, 0x00780008, 0x0001e006, 0x0069a8c6, 0x0005a645, 0x00780048, 0x0005e046, 0x0009c6c5, 0x00780088, 0x0009e086, 0x0071c8c6, 0x000dc6c5, 0x007800c8, 0x000de0c6, 0x00100007, 0x00141047, 0x007458c6, 0x0019d105, 0x00780188, 0x0019e186, 0x001c3007, 0x00202047, 0x002481c5, 0x00780248, 0x0025e246, 0x007488c6, 0x0029d1c5, 0x00780288, 0x0029e286, 0x006c9247, 0x0030a287, 0x00346907, 0x00645107, 0x003c5345, 0x007803c8, 0x003de3c6, 0x0068f187, 0x0070c607, 0x010004c9, 0x004e14c6, 0xe5800809, 0x0079b643, 0x0079e482, 0x00659783, 0x006db783, 0x0079c683, 0x0079e482, 0x0069a783, 0x0071c783, 0x00740640, 0x00780680, 0x0001e787, 0x00040007, 0x00041047, 0x00081787, 0x000c2007, 0x001030c7, 0x00144087, 0x00700940, 0x00185147, 0x00721706, 0x01000709, 0x00186187, 0xfe000809, 0x001c5187, 0x00700980, 0x002071c7, 0x00721706, 0x01000709, 0x00208207, 0xfe000809, 0x00247207, 0x007009c0, 0x00289247, 0x00721706, 0x01000709, 0x0028a287, 0xfe000809, 0x002c9287, 0x00700980, 0x0030b2c7, 0x00721706, 0x01000709, 0x0030c307, 0xfe000809, 0x00347307, 0x00700a00, 0x0038d347, 0x00721706, 0x01000709, 0x0038e387, 0xfe000809, 0x003cd387, 0x00700a40, 0x0040f3c7, 0x00721706, 0x01000709, 0x00410407, 0xfe000809, 0x0044f407, 0x00700a00, 0x00491447, 0x00721706, 0x01000709, 0x00492487, 0xfe000809, 0x004cd487, 0x00700940, 0x005134c7, 0x00721706, 0x01000709, 0x00514507, 0xfe000809, 0x00543507, 0x007d5747, 0x0000000a }; static const uint32_t program_gcm[20] = {0x0010100d, 0x0094100d, 0x0118100d, 0x019c100d, 0x00186143, 0x00160191, 0x00186811, 0x001c61c3, 0x00105103, 0x008441ce, 0x0082010e, 0x00080010, 0x008e008f, 0x0112008f, 0x0396008f, 0x00083083, 0x00105103, 0x00084083, 0x00001083, 0x0000000a }; -static const uint32_t program_aes[21] = {0x00000052, 0x00800052, 0x01000052, 0x01800052, 0x0000000a }; + +static const uint32_t program_aes[58] = {0x0001f003, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03800052, 0x0000000a }; static const uint32_t* programs[4] = { program_ec25519, program_gcm, program_aes, NULL }; -static const uint32_t program_len[4] = { 134, 20, 5, 0 }; +static const uint32_t program_len[4] = { 134, 20, 58, 0 }; static uint32_t program_offset[4]; /* @@ -242,6 +251,15 @@ sbusfpga_curve25519engine_attach(device_t parent, device_t self, void *aux) } power_off(sc); + + sc->active_sessions = 0; + sc->mapped_sessions = 0; + + if (!dma_init(sc)) { + // ouch + sc->active_sessions = 0xFFFFFFFF; + sc->mapped_sessions = 0xFFFFFFFF; + } } #define CONFIG_CSR_DATA_WIDTH 32 @@ -275,9 +293,31 @@ sbusfpga_curve25519engine_attach(device_t parent, device_t self, void *aux) #define REG_BASE(reg) (base + (reg * 32)) #define SUBREG_ADDR(reg, off) (REG_BASE(reg) + (off)*4) +#include +//cprng_strong32() +struct sbusfpga_curve25519engine_session { + uint32_t session; + uint32_t cookie; +}; + #define SBUSFPGA_DO_MONTGOMERYJOB _IOWR(0, 0, struct sbusfpga_curve25519engine_montgomeryjob) #define SBUSFPGA_EC25519_CHECKGCM _IOW(0, 1, struct sbusfpga_curve25519engine_montgomeryjob) -#define SBUSFPGA_EC25519_CHECKAES _IOW(0, 2, struct sbusfpga_curve25519engine_montgomeryjob) +#define SBUSFPGA_EC25519_CHECKAES _IOW(0, 2, struct sbusfpga_curve25519engine_aesjob) + +#define SBUSFPGA_EC25519_OPENSESSION _IOR(1, 0, struct sbusfpga_curve25519engine_session) +#define SBUSFPGA_EC25519_CLOSESESSION _IOR(1, 1, struct sbusfpga_curve25519engine_session) + +static int get_session(struct sbusfpga_curve25519engine_softc *sc) { + int i; + /* don't use 0, we use it for testing */ + for (i = 1 ; (i < MAX_ACTIVE_SESSION) && (i < MAX_SESSION) ; i++) { + if (((sc->active_sessions & (1<mapped_sessions & (1<active_sessions |= (1<sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(0,i), job->affine_u[i]); + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(0,i), job->data[i]); } - for (i = 0 ; i < 8 ; i ++) { - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(1,i), job->scalar[i]); + for (reg = 31 ; reg > 16 ; reg--) { + for (i = 0 ; i < 8 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i), job->keys[i]); + } } err = start_job(sc); @@ -377,6 +419,29 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st } } break; + case SBUSFPGA_EC25519_OPENSESSION:{ + struct sbusfpga_curve25519engine_session* ses = (struct sbusfpga_curve25519engine_session*)data; + int s = get_session(sc); + if (s < 0) + return EBUSY; + ses->session = s; + sc->sessions_cookies[s] = cprng_strong32(); + ses->cookie = sc->sessions_cookies[s]; + } + break; + case SBUSFPGA_EC25519_CLOSESESSION:{ + struct sbusfpga_curve25519engine_session* ses = (struct sbusfpga_curve25519engine_session*)data; + if ((ses->session >= MAX_ACTIVE_SESSION) || (ses->session >= MAX_SESSION)) + return EINVAL; + if (sc->sessions_cookies[ses->session] != ses->cookie) + return EINVAL; + if ((sc->mapped_sessions & (1 << ses->session)) != 0) + return EBUSY; + sc->sessions_cookies[ses->session] = 0; + sc->active_sessions &= ~(1 << ses->session); + } + break; + default: err = EINVAL; break; @@ -544,10 +609,73 @@ static int read_outputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf /* job->x0_w[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(26,i)); */ /* job->x1_u[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(27,i)); */ /* job->x1_w[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(28,i)); */ - job->scalar[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(31,i)); + job->scalar[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(0,i)); /* delay(1); */ } aprint_normal_dev(sc->sc_dev, "READ - Curve25519Engine 19 low 32 bits: 0x%08x\n", bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(19,0))); return 0; } + + +static int +dma_init(struct sbusfpga_curve25519engine_softc *sc) { + + /* Allocate a dmamap */ + if (bus_dmamap_create(sc->sc_dmatag, SBUSFPGA_CURVE25519ENGINE_VAL_DMA_MAX_SZ, 1, SBUSFPGA_CURVE25519ENGINE_VAL_DMA_MAX_SZ, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, &sc->sc_dmamap) != 0) { + aprint_error_dev(sc->sc_dev, "DMA map create failed\n"); + return 0; + } else { + aprint_normal_dev(sc->sc_dev, "dmamap: %lu %lu %d (%p)\n", sc->sc_dmamap->dm_maxsegsz, sc->sc_dmamap->dm_mapsize, sc->sc_dmamap->dm_nsegs, sc->sc_dmatag->_dmamap_load); + } + + if (bus_dmamem_alloc(sc->sc_dmatag, SBUSFPGA_CURVE25519ENGINE_VAL_DMA_MAX_SZ, 64, 64, &sc->sc_segs, 1, &sc->sc_rsegs, BUS_DMA_NOWAIT | BUS_DMA_STREAMING)) { + aprint_error_dev(sc->sc_dev, "cannot allocate DVMA memory"); + bus_dmamap_destroy(sc->sc_dmatag, sc->sc_dmamap); + return 0; + } + + if (bus_dmamem_map(sc->sc_dmatag, &sc->sc_segs, 1, SBUSFPGA_CURVE25519ENGINE_VAL_DMA_MAX_SZ, &sc->sc_dma_kva, BUS_DMA_NOWAIT)) { + aprint_error_dev(sc->sc_dev, "cannot allocate DVMA address"); + bus_dmamem_free(sc->sc_dmatag, &sc->sc_segs, 1); + bus_dmamap_destroy(sc->sc_dmatag, sc->sc_dmamap); + return 0; + } + + if (bus_dmamap_load(sc->sc_dmatag, sc->sc_dmamap, sc->sc_dma_kva, SBUSFPGA_CURVE25519ENGINE_VAL_DMA_MAX_SZ, /* kernel space */ NULL, + BUS_DMA_NOWAIT | BUS_DMA_STREAMING | BUS_DMA_WRITE)) { + aprint_error_dev(sc->sc_dev, "cannot load dma map"); + bus_dmamem_unmap(sc->sc_dmatag, &sc->sc_dma_kva, SBUSFPGA_CURVE25519ENGINE_VAL_DMA_MAX_SZ); + bus_dmamem_free(sc->sc_dmatag, &sc->sc_segs, 1); + bus_dmamap_destroy(sc->sc_dmatag, sc->sc_dmamap); + return 0; + } + + aprint_normal_dev(sc->sc_dev, "DMA: SW -> kernel address is %p, dvma address is 0x%08llx\n", sc->sc_dma_kva, sc->sc_dmamap->dm_segs[0].ds_addr); + + return 1; +} + +paddr_t sbusfpga_curve25519engine_mmap(dev_t dev, off_t offset, int prot) { + struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, minor(dev)); + paddr_t addr = -1; + int ses = offset / 4096; + + if (offset % 4096) + return -1; + if (prot & PROT_EXEC) + return -1; + if (sc->mapped_sessions & (1 << ses)) + return -1; + if ((sc->active_sessions & (1 << ses)) == 0) + return -1; + + addr = bus_dmamem_mmap(sc->sc_dmatag, &sc->sc_segs, 1, offset, prot, BUS_DMA_NOWAIT); + + device_printf(sc->sc_dev, "mapped page %d\n", ses); + + if (addr != -1) + sc->mapped_sessions |= (1 << ses); + + return addr; +} diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.h b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.h index 1bda49a..df352ca 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.h +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.h @@ -29,6 +29,11 @@ #ifndef _SBUSFPGA_CURVE25519ENGINE_H_ #define _SBUSFPGA_CURVE25519ENGINE_H_ +#define MAX_SESSION 32 // HW limit +#define MAX_ACTIVE_SESSION 8 // SW-imposed limit +// Single 4KiB pages per session +#define SBUSFPGA_CURVE25519ENGINE_VAL_DMA_MAX_SZ (MAX_ACTIVE_SESSION*4*1024) + struct sbusfpga_curve25519engine_softc { device_t sc_dev; /* us as a device */ u_int sc_rev; /* revision */ @@ -42,8 +47,16 @@ struct sbusfpga_curve25519engine_softc { int sc_bufsiz_curve25519engine; /* Size of buffer */ int sc_bufsiz_microcode; /* Size of buffer */ int sc_bufsiz_regfile; /* Size of buffer */ - bus_dma_tag_t sc_dmatag; int initialized; + uint32_t active_sessions; + uint32_t mapped_sessions; + uint32_t sessions_cookies[MAX_ACTIVE_SESSION]; + /* DMA kernel structures */ + bus_dma_tag_t sc_dmatag; + bus_dmamap_t sc_dmamap; + bus_dma_segment_t sc_segs; + int sc_rsegs; + void * sc_dma_kva; }; #endif /* _SBUSFPGA_CURVE25519ENGINE_H_ */ diff --git a/sbus-to-ztex-gateware-migen/engine.py b/sbus-to-ztex-gateware-migen/engine.py index 42f8023..212aa0a 100644 --- a/sbus-to-ztex-gateware-migen/engine.py +++ b/sbus-to-ztex-gateware-migen/engine.py @@ -30,7 +30,7 @@ opcodes = { # mnemonic : [bit coding, docstring] "GCM_SHRMI": [15, "Shift A right by imm, insert B LSB as dest MSB; reg-reg or reg-imm; per 128-bits block"], # "GCM_CMPD": [16, "Compute D:X0 from X1:X0; reg ; per 128-bits block"], # specific "GCM_SWAP64": [17, "Swap doubleword (64 bits) ; reg-reg or imm-reg or reg-imm; per 128-bits block"], # - "AESESMI" : [18, "AES ; reg-reg ; per 128-bits block" ], + "AESESMI" : [18, "AES ; reg-reg ; per 128-bits block; imm[0:2] indicates sub-round (as in rv32's aes32esmi) ; imm[2] is 1 for aesesi (shared opcode)" ], "MAX" : [19, "Maximum opcode number (for bounds checking)"], } @@ -1568,52 +1568,100 @@ class ExecAES(ExecUnit, AutoDoc): 0x2: [ aes_in[2].eq(self.a[144:152]), aes_in[3].eq(self.a[176:184]), aes_in[0].eq(self.a[208:216]), aes_in[1].eq(self.a[240:248]) ], 0x3: [ aes_in[1].eq(self.a[152:160]), aes_in[2].eq(self.a[184:192]), aes_in[3].eq(self.a[216:224]), aes_in[0].eq(self.a[248:256]) ], }), - Case(self.instruction.immediate[0:2], { - 0x0: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 0:16], aes_out[0][ 8:24], - aes_out[1][ 0:16], aes_out[1][ 8:24], - aes_out[2][ 0:16], aes_out[2][ 8:24], - aes_out[3][ 0:16], aes_out[3][ 8:24])), - ], - 0x1: [ NextValue(aes_buf[0:128], Cat(aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16], - aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16], - aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16], - aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])), - ], - 0x2: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:24], aes_out[0][ 0:16], - aes_out[1][ 8:24], aes_out[1][ 0:16], - aes_out[2][ 8:24], aes_out[2][ 0:16], - aes_out[3][ 8:24], aes_out[3][ 0:16])), - ], - 0x3: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8], - aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8], - aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8], - aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])), - ], + Case(self.instruction.immediate[2:3], { + 0: Case(self.instruction.immediate[0:2], { + 0x0: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 0:16], aes_out[0][ 8:24], + aes_out[1][ 0:16], aes_out[1][ 8:24], + aes_out[2][ 0:16], aes_out[2][ 8:24], + aes_out[3][ 0:16], aes_out[3][ 8:24])), + ], + 0x1: [ NextValue(aes_buf[0:128], Cat(aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16], + aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16], + aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16], + aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])), + ], + 0x2: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:24], aes_out[0][ 0:16], + aes_out[1][ 8:24], aes_out[1][ 0:16], + aes_out[2][ 8:24], aes_out[2][ 0:16], + aes_out[3][ 8:24], aes_out[3][ 0:16])), + ], + 0x3: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8], + aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8], + aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8], + aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])), + ], + }), + 1: Case(self.instruction.immediate[0:2], { + 0x0: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:16], Signal(24, reset = 0), + aes_out[1][ 8:16], Signal(24, reset = 0), + aes_out[2][ 8:16], Signal(24, reset = 0), + aes_out[3][ 8:16], Signal(24, reset = 0))), + ], + 0x1: [ NextValue(aes_buf[0:128], Cat(Signal(8, reset = 0), aes_out[0][ 8:16], Signal(16, reset = 0), + Signal(8, reset = 0), aes_out[1][ 8:16], Signal(16, reset = 0), + Signal(8, reset = 0), aes_out[2][ 8:16], Signal(16, reset = 0), + Signal(8, reset = 0), aes_out[3][ 8:16], Signal(16, reset = 0))), + ], + 0x2: [ NextValue(aes_buf[0:128], Cat(Signal(16, reset = 0), aes_out[0][ 8:16], Signal(8, reset = 0), + Signal(16, reset = 0), aes_out[1][ 8:16], Signal(8, reset = 0), + Signal(16, reset = 0), aes_out[2][ 8:16], Signal(8, reset = 0), + Signal(16, reset = 0), aes_out[3][ 8:16], Signal(8, reset = 0))), + ], + 0x3: [ NextValue(aes_buf[0:128], Cat(Signal(24, reset = 0), aes_out[0][ 8:16], + Signal(24, reset = 0), aes_out[1][ 8:16], + Signal(24, reset = 0), aes_out[2][ 8:16], + Signal(24, reset = 0), aes_out[3][ 8:16])), + ], + }), }), NextValue(lanec, 1), ], 1: [ self.q_valid.eq(1), - Case(self.instruction.immediate[0:2], { - 0x0: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 0:16], aes_out[0][ 8:24], - aes_out[1][ 0:16], aes_out[1][ 8:24], - aes_out[2][ 0:16], aes_out[2][ 8:24], - aes_out[3][ 0:16], aes_out[3][ 8:24])), - ], - 0x1: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16], - aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16], - aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16], - aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])), - ], - 0x2: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 8:24], aes_out[0][ 0:16], - aes_out[1][ 8:24], aes_out[1][ 0:16], - aes_out[2][ 8:24], aes_out[2][ 0:16], - aes_out[3][ 8:24], aes_out[3][ 0:16])), - ], - 0x3: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8], - aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8], - aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8], - aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])), - ], + Case(self.instruction.immediate[2:3], { + 0: Case(self.instruction.immediate[0:2], { + 0x0: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 0:16], aes_out[0][ 8:24], + aes_out[1][ 0:16], aes_out[1][ 8:24], + aes_out[2][ 0:16], aes_out[2][ 8:24], + aes_out[3][ 0:16], aes_out[3][ 8:24])), + ], + 0x1: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16], + aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16], + aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16], + aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])), + ], + 0x2: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 8:24], aes_out[0][ 0:16], + aes_out[1][ 8:24], aes_out[1][ 0:16], + aes_out[2][ 8:24], aes_out[2][ 0:16], + aes_out[3][ 8:24], aes_out[3][ 0:16])), + ], + 0x3: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8], + aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8], + aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8], + aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])), + ], + }), + 1: Case(self.instruction.immediate[0:2], { + 0x0: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 8:16], Signal(24, reset = 0), + aes_out[1][ 8:16], Signal(24, reset = 0), + aes_out[2][ 8:16], Signal(24, reset = 0), + aes_out[3][ 8:16], Signal(24, reset = 0))), + ], + 0x1: [ self.q.eq(self.b ^ Cat(aes_buf, Signal(8, reset = 0), aes_out[0][ 8:16], Signal(16, reset = 0), + Signal(8, reset = 0), aes_out[1][ 8:16], Signal(16, reset = 0), + Signal(8, reset = 0), aes_out[2][ 8:16], Signal(16, reset = 0), + Signal(8, reset = 0), aes_out[3][ 8:16], Signal(16, reset = 0))), + ], + 0x2: [ self.q.eq(self.b ^ Cat(aes_buf, Signal(16, reset = 0), aes_out[0][ 8:16], Signal(8, reset = 0), + Signal(16, reset = 0), aes_out[1][ 8:16], Signal(8, reset = 0), + Signal(16, reset = 0), aes_out[2][ 8:16], Signal(8, reset = 0), + Signal(16, reset = 0), aes_out[3][ 8:16], Signal(8, reset = 0))), + ], + 0x3: [ self.q.eq(self.b ^ Cat(aes_buf, Signal(24, reset = 0), aes_out[0][ 8:16], + Signal(24, reset = 0), aes_out[1][ 8:16], + Signal(24, reset = 0), aes_out[2][ 8:16], + Signal(24, reset = 0), aes_out[3][ 8:16])), + ], + }), }), NextValue(lanec, 0), NextState("IDLE") diff --git a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs index f27ef93..813a0d1 100644 --- a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs +++ b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs @@ -615,13 +615,80 @@ fn main() -> std::io::Result<()> { fin );let gcmcode = assemble_engine25519!( start: - // X in %1 - // KEY in %0 + // X in %0 + // KEY in %31-%17 (backward) // one full round demo - aesesmi %0, %1, %0, #0 + xor %0, %0, %31 + + aesesmi %1, %0, %30, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %29, #0 aesesmi %0, %1, %0, #1 aesesmi %0, %1, %0, #2 aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %28, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %27, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %26, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %25, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %24, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %23, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %22, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %21, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %20, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %19, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %18, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesi %0, %1, %17, #0 + aesesi %0, %1, %0, #1 + aesesi %0, %1, %0, #2 + aesesi %0, %1, %0, #3 fin ); From 110db38eb2fa725da621948534d12dc57ad098bf Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Thu, 26 Aug 2021 10:27:55 -0400 Subject: [PATCH 67/78] more AES/GCM --- .../sys/dev/sbus/sbusfpga_curve25519engine.c | 47 +++++++- sbus-to-ztex-gateware-migen/engine.py | 13 +- .../engine_code/engine_code.rs | 114 +++++++++++++++++- 3 files changed, 164 insertions(+), 10 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c index ebdd54b..4c647ba 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c @@ -118,7 +118,9 @@ int sbusfpga_curve25519engine_close(dev_t dev, int flags, int mode, struct lwp *l) { struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, minor(dev)); - power_off(sc); + + if (sc->active_sessions == 0) + power_off(sc); return (0); } @@ -145,8 +147,10 @@ static const uint32_t program_gcm[20] = {0x0010100d, 0x0094100d, 0x0118100d, 0x0 static const uint32_t program_aes[58] = {0x0001f003, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03800052, 0x0000000a }; -static const uint32_t* programs[4] = { program_ec25519, program_gcm, program_aes, NULL }; -static const uint32_t program_len[4] = { 134, 20, 58, 0 }; +static const uint32_t program_gcm_ad[70] = {0x00400800, 0x00080840, 0x0001f403, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03800052, 0x03000089, 0x003c0000, 0x01400411, 0x0042b405, 0x01400411, 0x00080800, 0xe0000809, 0x00380000, 0x01bc03d1, 0x003cf3d1, 0x0000000a }; + +static const uint32_t* programs[5] = { program_ec25519, program_gcm, program_aes, program_gcm_ad, NULL }; +static const uint32_t program_len[5] = { 134, 20, 58, 70, 0 }; static uint32_t program_offset[4]; /* @@ -303,6 +307,7 @@ struct sbusfpga_curve25519engine_session { #define SBUSFPGA_DO_MONTGOMERYJOB _IOWR(0, 0, struct sbusfpga_curve25519engine_montgomeryjob) #define SBUSFPGA_EC25519_CHECKGCM _IOW(0, 1, struct sbusfpga_curve25519engine_montgomeryjob) #define SBUSFPGA_EC25519_CHECKAES _IOW(0, 2, struct sbusfpga_curve25519engine_aesjob) +#define SBUSFPGA_EC25519_GCMAD _IOW(0, 3, struct sbusfpga_curve25519engine_aesjob) #define SBUSFPGA_EC25519_OPENSESSION _IOR(1, 0, struct sbusfpga_curve25519engine_session) #define SBUSFPGA_EC25519_CLOSESESSION _IOR(1, 1, struct sbusfpga_curve25519engine_session) @@ -397,7 +402,7 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st } for (reg = 31 ; reg > 16 ; reg--) { for (i = 0 ; i < 8 ; i ++) { - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i), job->keys[i]); + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i), job->keys[i+8*(31-reg)]); } } @@ -419,6 +424,40 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st } } break; + case SBUSFPGA_EC25519_GCMAD: { + const uint32_t base = 0; + struct sbusfpga_curve25519engine_aesjob* job = (struct sbusfpga_curve25519engine_aesjob*)data; + int reg, i; + + curve25519engine_mpstart_write(sc, program_offset[3]); /* GCM_AD */ + curve25519engine_mplen_write(sc, program_len[3]); /* GCM_AD */ + for (i = 0 ; i < 8 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(0,i), job->data[i]); + } + for (reg = 31 ; reg > 16 ; reg--) { + for (i = 0 ; i < 8 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i), job->keys[i+8*(31-reg)]); + } + } + + err = start_job(sc); + if (err) + return err; + delay(1); + err = wait_job(sc); + /* if (err) */ + /* return err; */ + + for (reg = 0 ; reg < 32 ; reg++) { + uint32_t buf[8]; + for (i = 0 ; i < 8 ; i ++) { + buf[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i)); + } + device_printf(sc->sc_dev, "GCM_AD %d: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n", reg, + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); + } + } + break; case SBUSFPGA_EC25519_OPENSESSION:{ struct sbusfpga_curve25519engine_session* ses = (struct sbusfpga_curve25519engine_session*)data; int s = get_session(sc); diff --git a/sbus-to-ztex-gateware-migen/engine.py b/sbus-to-ztex-gateware-migen/engine.py index 212aa0a..891fee6 100644 --- a/sbus-to-ztex-gateware-migen/engine.py +++ b/sbus-to-ztex-gateware-migen/engine.py @@ -13,7 +13,7 @@ opcode_bits = 6 # number of bits used to encode the opcode field opcodes = { # mnemonic : [bit coding, docstring] "UDF" : [-1, "Placeholder for undefined opcodes"], "PSA" : [0, "Wd $\gets$ Ra // pass A"], - "PSB" : [1, "Wd $\gets$ Rb // pass B"], + "PSB" : [1, "Wd $\gets$ Rb // pass B"], # Is that needed ??? "MSK" : [2, "Wd $\gets$ Replicate(Ra[0], 256) & Rb // for doing cswap()"], "XOR" : [3, "Wd $\gets$ Ra ^ Rb // bitwise XOR"], "NOT" : [4, "Wd $\gets$ ~Ra // binary invert"], @@ -29,7 +29,7 @@ opcodes = { # mnemonic : [bit coding, docstring] "GCM_SHLMI": [14, "Shift A left by imm, insert B MSB as dest LSB; reg-reg or reg-imm; per 128-bits block"], # make SHL redundant: SHL %rd, %ra == GCM_SHLMI %rd, %ra, #0, #1 "GCM_SHRMI": [15, "Shift A right by imm, insert B LSB as dest MSB; reg-reg or reg-imm; per 128-bits block"], # "GCM_CMPD": [16, "Compute D:X0 from X1:X0; reg ; per 128-bits block"], # specific - "GCM_SWAP64": [17, "Swap doubleword (64 bits) ; reg-reg or imm-reg or reg-imm; per 128-bits block"], # + "GCM_SWAP64": [17, "Swap doubleword (64 bits) ; reg-reg or imm-reg or reg-imm; per 128-bits block ; imm != 0 -> BYTEREV*"], # "AESESMI" : [18, "AES ; reg-reg ; per 128-bits block; imm[0:2] indicates sub-round (as in rv32's aes32esmi) ; imm[2] is 1 for aesesi (shared opcode)" ], "MAX" : [19, "Maximum opcode number (for bounds checking)"], } @@ -235,6 +235,7 @@ class Curve25519Const(Module, AutoDoc): 8: [50, "fifty", "The number 50 (for pow22501)"], 9: [100, "one hundred", "The number 100 (for pow22501)"], 10: [254, "two hundred fifty four", "The number 254 (iteration count)"], + 11: [0x00000001_00000000_00000000_00000000_00000001_00000000_00000000_00000000, "increment for GCM counter (LE)", "increment for GCM counter (LE)"], } self.adr = Signal(5) self.const = Signal(256) @@ -1524,7 +1525,13 @@ class ExecGCMShifts(ExecUnit, AutoDoc): 0x7: self.q.eq(Cat(self.b[121:128], self.a[0:121], self.b[249:256], self.a[128:249])), }) ).Elif(self.instruction.opcode == opcodes["GCM_SWAP64"][0], - self.q.eq(Cat(self.b[64:128], self.a[0:64], self.b[192:256], self.a[128:192])) + # also gcm_brev* + Case(self.instruction.immediate[0:2], { + 0: self.q.eq(Cat(self.b[64:128], self.a[0:64], self.b[192:256], self.a[128:192])), + 1: self.q.eq(Cat(self.a[8:16], self.a[0:8], self.a[24:32], self.a[16:24], self.a[40:48], self.a[32:40], self.a[56:64], self.a[48:56], self.a[72:80], self.a[64:72], self.a[88:96], self.a[80:88], self.a[104:112], self.a[96:104], self.a[120:128], self.a[112:120], self.a[136:144], self.a[128:136], self.a[152:160], self.a[144:152], self.a[168:176], self.a[160:168], self.a[184:192], self.a[176:184], self.a[200:208], self.a[192:200], self.a[216:224], self.a[208:216], self.a[232:240], self.a[224:232], self.a[248:256], self.a[240:248])), + 2: self.q.eq(Cat(self.a[24:32], self.a[16:24], self.a[8:16], self.a[0:8], self.a[56:64], self.a[48:56], self.a[40:48], self.a[32:40], self.a[88:96], self.a[80:88], self.a[72:80], self.a[64:72], self.a[120:128], self.a[112:120], self.a[104:112], self.a[96:104], self.a[152:160], self.a[144:152], self.a[136:144], self.a[128:136], self.a[184:192], self.a[176:184], self.a[168:176], self.a[160:168], self.a[216:224], self.a[208:216], self.a[200:208], self.a[192:200], self.a[248:256], self.a[240:248], self.a[232:240], self.a[224:232])), + 3: self.q.eq(Cat(self.a[56:64], self.a[48:56], self.a[40:48], self.a[32:40], self.a[24:32], self.a[16:24], self.a[8:16], self.a[0:8], self.a[120:128], self.a[112:120], self.a[104:112], self.a[96:104], self.a[88:96], self.a[80:88], self.a[72:80], self.a[64:72], self.a[184:192], self.a[176:184], self.a[168:176], self.a[160:168], self.a[152:160], self.a[144:152], self.a[136:144], self.a[128:136], self.a[248:256], self.a[240:248], self.a[232:240], self.a[224:232], self.a[216:224], self.a[208:216], self.a[200:208], self.a[192:200])), + }) ) ] diff --git a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs index 813a0d1..b563143 100644 --- a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs +++ b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs @@ -613,7 +613,8 @@ fn main() -> std::io::Result<()> { xor %0, %2, %1 // output in %0 fin - );let gcmcode = assemble_engine25519!( + ); + let aescode = assemble_engine25519!( start: // X in %0 // KEY in %31-%17 (backward) @@ -692,11 +693,118 @@ fn main() -> std::io::Result<()> { fin ); + let gcm_ad_code = assemble_engine25519!( + start: + // Input: rkeys in %31-%17 (backward) + // Transient: + // %0, %1, %2 are tmp + // init counter in %16 + // H will go in %15 + // T will go in %14 + psa %16, #0 + // use %2 as a flag + psa %2, #1 + genht: + xor %0, %16, %31 + + aesesmi %1, %0, %30, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %29, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %28, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %27, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %26, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %25, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %24, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %23, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %22, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %21, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %20, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %19, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %18, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesi %0, %1, %17, #0 + aesesi %0, %1, %0, #1 + aesesi %0, %1, %0, #2 + aesesi %0, %1, %0, #3 + + // if the %2 flag is cleared, we've just computed T + brz afterht, %2 + // store H in %15 + psa %15, %0 + // increment counter; should we have a gcm_inc_be ? + // for now byterev + special constant + gcm_brev32 %16, %16 + add %16, %16, #11 + gcm_brev32 %16, %16 + // clear flag & go encrypt t + psa %2, #0 + brz genht, #0 + + afterht: + // store T in %14 + psa %14, %0 + + // fully byte-revert H (first byte-in-dword, then dword-in-128bit) + gcm_brev64 %15, %15 + gcm_swap64 %15, %15, %15 + + fin + ); let mut pos = 0; - while pos < gcmcode.len() { - println!("0x{:08x},", gcmcode[pos]); + while pos < gcm_ad_code.len() { + println!("0x{:08x},", gcm_ad_code[pos]); pos = pos + 1; } Ok(()) From 28ce3a7111b77e867fe183a66fe292ea1ed27a9e Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 4 Sep 2021 02:07:37 -0400 Subject: [PATCH 68/78] prelim work for V1.2 --- sbus-to-ztex-gateware-migen/ztex213_sbus.py | 92 +++++++++++++++++++-- 1 file changed, 84 insertions(+), 8 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/ztex213_sbus.py b/sbus-to-ztex-gateware-migen/ztex213_sbus.py index 1d1f554..5ced7f6 100644 --- a/sbus-to-ztex-gateware-migen/ztex213_sbus.py +++ b/sbus-to-ztex-gateware-migen/ztex213_sbus.py @@ -19,6 +19,8 @@ from litex.build.openocd import OpenOCD # IOs ---------------------------------------------------------------------------------------------- +# FPGA daughterboard I/O + _io = [ ## 48 MHz clock reference ("clk48", 0, Pins("P15"), IOStandard("LVCMOS33")), @@ -52,7 +54,9 @@ _io = [ ), ] -_sbus_io = [ +# SBusFPGA I/O + +_sbus_io_v1_0 = [ ## leds on the SBus board ("user_led", 0, Pins("U8"), IOStandard("lvcmos33")), #LED0 ("user_led", 1, Pins("U7"), IOStandard("lvcmos33")), #LED1 @@ -89,7 +93,39 @@ _sbus_io = [ ), ] -_sbus_sbus = [ +_sbus_io_v1_2 = [ + ## leds on the SBus board + ## serial header for console + ("serial", 0, + Subsignal("tx", Pins("V9")), # FIXME: might be the other way round + Subsignal("rx", Pins("U9")), + IOStandard("LVCMOS33") + ), + ## sdcard connector + ("spisdcard", 0, + Subsignal("clk", Pins("R8")), + Subsignal("mosi", Pins("T5"), Misc("PULLUP")), + Subsignal("cs_n", Pins("V6"), Misc("PULLUP")), + Subsignal("miso", Pins("V5"), Misc("PULLUP")), + Misc("SLEW=FAST"), + IOStandard("LVCMOS33"), + ), + ("sdcard", 0, + Subsignal("data", Pins("V5 V4 V7 V6"), Misc("PULLUP")), + Subsignal("cmd", Pins("T5"), Misc("PULLUP")), + Subsignal("clk", Pins("R8")), + #Subsignal("cd", Pins("V6")), + Misc("SLEW=FAST"), + IOStandard("LVCMOS33"), + ), + ## USB + ("usb", 0, + Subsignal("dp", Pins("U8")), # Serial TX + Subsignal("dm", Pins("U7")), # Serial RX + IOStandard("LVCMOS33")) +] + +_sbus_sbus_v1_0 = [ ("SBUS_3V3_CLK", 0, Pins("D15"), IOStandard("lvttl")), ("SBUS_3V3_ASs", 0, Pins("T4"), IOStandard("lvttl")), ("SBUS_3V3_BGs", 0, Pins("T6"), IOStandard("lvttl")), @@ -108,9 +144,33 @@ _sbus_sbus = [ ("SBUS_3V3_D", 0, Pins("J18 K16 J17 K15 K13 J15 J13 J14 H14 H17 G14 G17 G16 G18 H16 F18 F16 E18 F15 D18 E17 G13 D17 F13 F14 E16 E15 C17 C16 A18 B18 C15"), IOStandard("lvttl")), ("SBUS_3V3_PA", 0, Pins("B16 B17 D14 C14 D12 A16 A15 B14 B13 B12 C12 A14 A13 B11 A11 M4 R2 M3 P2 M2 N2 K5 N1 L4 M1 L3 L1 K3"), IOStandard("lvttl")), ] +_sbus_sbus_v1_2 = [ + ("SBUS_3V3_CLK", 0, Pins("D15"), IOStandard("lvttl")), + ("SBUS_3V3_ASs", 0, Pins("T4"), IOStandard("lvttl")), + ("SBUS_3V3_BGs", 0, Pins("R7"), IOStandard("lvttl")), # moved + ("SBUS_3V3_BRs", 0, Pins("R6"), IOStandard("lvttl")), + ("SBUS_3V3_ERRs", 0, Pins("D13"), IOStandard("lvttl")), # moved + ("SBUS_DATA_OE_LED", 0, Pins("U1"), IOStandard("lvttl")), + #("SBUS_DATA_OE_LED_2", 0, Pins("T3"), IOStandard("lvttl")), + ("SBUS_3V3_RSTs", 0, Pins("U2"), IOStandard("lvttl")), + ("SBUS_3V3_SELs", 0, Pins("K6"), IOStandard("lvttl")), + ("SBUS_3V3_INT1s", 0, Pins("R5"), IOStandard("lvttl")), # moved + ("SBUS_3V3_INT2s", 0, Pins("H15"), IOStandard("lvttl")), # added + ("SBUS_3V3_INT3s", 0, Pins("R3"), IOStandard("lvttl")), # added + ("SBUS_3V3_INT4s", 0, Pins("N5"), IOStandard("lvttl")), # added + ("SBUS_3V3_INT5s", 0, Pins("L5"), IOStandard("lvttl")), # added + ("SBUS_3V3_INT6s", 0, Pins("V2"), IOStandard("lvttl")), # added + #("SBUS_3V3_INT7s", 0, Pins("N5"), IOStandard("lvttl")), + ("SBUS_3V3_PPRD", 0, Pins("N6"), IOStandard("lvttl")), + ("SBUS_OE", 0, Pins("P5"), IOStandard("lvttl")), + ("SBUS_3V3_ACKs", 0, Pins("M6 L6 N4"), IOStandard("lvttl")), + ("SBUS_3V3_SIZ", 0, Pins("T6 U3 V1"), IOStandard("lvttl")), # 0 moved + ("SBUS_3V3_D", 0, Pins("J18 K16 J17 K15 K13 J15 J13 J14 H14 H17 G14 G17 G16 G18 H16 F18 F16 E18 F15 D18 E17 G13 D17 F13 F14 E16 E15 C17 C16 A18 B18 C15"), IOStandard("lvttl")), + ("SBUS_3V3_PA", 0, Pins("B16 B17 D14 C14 D12 A16 A15 B14 B13 B12 C12 A14 A13 B11 A11 M4 R2 M3 P2 M2 N2 K5 N1 L4 M1 L3 L1 K3"), IOStandard("lvttl")), +] # reusing the UART pins !!! -_usb_io = [ +_usb_io_v1_0 = [ ("usb", 0, Subsignal("dp", Pins("V9")), # Serial TX Subsignal("dm", Pins("U9")), # Serial RX @@ -119,7 +179,10 @@ _usb_io = [ # Connectors --------------------------------------------------------------------------------------- -_connectors = [ +_connectors_v1_0 = [ +] +_connectors_v1_2 = [ + ("P1", "T8 U6 P3 P4 T1 U4 R1 T3"), ] # Platform ----------------------------------------------------------------------------------------- @@ -128,7 +191,7 @@ class Platform(XilinxPlatform): default_clk_name = "clk48" default_clk_period = 1e9/48e6 - def __init__(self, variant="ztex2.13a"): + def __init__(self, variant="ztex2.13a", version="V1.0"): device = { "ztex2.13a": "xc7a35tcsg324-1", "ztex2.13b": "xc7a50tcsg324-1", #untested @@ -136,9 +199,22 @@ class Platform(XilinxPlatform): "ztex2.13c": "xc7a75tcsg324-2", #untested "ztex2.13d": "xc7a100tcsg324-2" #untested }[variant] - XilinxPlatform.__init__(self, device, _io, _connectors, toolchain="vivado") - self.add_extension(_sbus_io) - self.add_extension(_sbus_sbus) + sbus_io = { + "V1.0" : _sbus_io_v1_0, + "V1.2" : _sbus_io_v1_2, + }[version] + sbus_sbus = { + "V1.0" : _sbus_sbus_v1_0, + "V1.2" : _sbus_sbus_v1_2, + }[version] + connectors = { + "V1.0" : _connectors_v1_0, + "V1.2" : _connectors_v1_2, + }[version] + + XilinxPlatform.__init__(self, device, _io, connectors, toolchain="vivado") + self.add_extension(sbus_io) + self.add_extension(sbus_sbus) self.toolchain.bitstream_commands = \ ["set_property BITSTREAM.CONFIG.SPI_32BIT_ADDR No [current_design]", From 55298ec5b795e6ddba15bd51a8720664687034c5 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 4 Sep 2021 02:10:26 -0400 Subject: [PATCH 69/78] prelim work for V1.2 --- .../sbus_to_fpga_soc.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index ab48314..ebe6ba2 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -140,6 +140,7 @@ class _CRG(Module): class SBusFPGA(SoCCore): def __init__(self, **kwargs): + self.version = "V1.0"; kwargs["cpu_type"] = "None" kwargs["integrated_sram_size"] = 0 @@ -148,9 +149,11 @@ class SBusFPGA(SoCCore): self.sys_clk_freq = sys_clk_freq = 100e6 ## 25e6 - self.platform = platform = ztex213_sbus.Platform(variant="ztex2.13a") + self.platform = platform = ztex213_sbus.Platform(variant="ztex2.13a", version = self.version) + + if (self.version == "V1.0"): + self.platform.add_extension(ztex213_sbus._usb_io_v1_0) - self.platform.add_extension(ztex213_sbus._usb_io) SoCCore.__init__(self, platform=platform, sys_clk_freq=sys_clk_freq, @@ -166,11 +169,12 @@ class SBusFPGA(SoCCore): # Anything at 0x10000000 is therefore unreachable directly # The position of the 'usb_fake_dma' is so it overlaps # the virtual address space used by NetBSD DMA allocators + # (themselves constrained by the SBus MMU capabilities) self.wb_mem_map = wb_mem_map = { "prom": 0x00000000, "csr" : 0x00040000, "usb_host": 0x00080000, - "usb_shared_mem": 0x00090000, + "usb_shared_mem": 0x00090000, # unused "curve25519engine": 0x000a0000, "main_ram": 0x80000000, "usb_fake_dma": 0xfc000000, @@ -179,10 +183,11 @@ class SBusFPGA(SoCCore): self.submodules.crg = _CRG(platform=platform, sys_clk_freq=sys_clk_freq) self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) # SBus max - self.submodules.leds = LedChaser( - pads = platform.request("SBUS_DATA_OE_LED_2"), #platform.request("user_led", 7), - sys_clk_freq = sys_clk_freq) - self.add_csr("leds") + if (self.version == "V1.0"): + self.submodules.leds = LedChaser( + pads = platform.request("SBUS_DATA_OE_LED_2"), #platform.request("user_led", 7), + sys_clk_freq = sys_clk_freq) + self.add_csr("leds") self.add_usb_host(pads=platform.request("usb"), usb_clk_freq=48e6) #self.comb += self.cpu.interrupt[16].eq(self.usb_host.interrupt) #fixme: need to deal with interrupts From e57cf9d9a880277ac23421d60fde247e73ecf859 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 4 Sep 2021 05:52:11 -0400 Subject: [PATCH 70/78] preliminary work on AES256-GCM in the Engine --- .../sys/dev/sbus/sbusfpga_curve25519engine.c | 464 +++++++++++++++-- sbus-to-ztex-gateware-migen/engine.py | 484 +++++++++++++----- .../engine_code/engine_code.rs | 484 +++++++++++++++++- sbus-to-ztex-gateware-migen/netbsd_csr.h | 21 +- .../sbus_to_fpga_soc.py | 21 +- 5 files changed, 1259 insertions(+), 215 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c index 4c647ba..19155a0 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c @@ -43,6 +43,9 @@ __KERNEL_RCSID(0, "$NetBSD$"); #include #include +#include +#include +#include #include @@ -97,7 +100,7 @@ struct sbusfpga_curve25519engine_aesjob { static int init_programs(struct sbusfpga_curve25519engine_softc *sc); static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusfpga_curve25519engine_montgomeryjob *job, const int window); static int start_job(struct sbusfpga_curve25519engine_softc *sc); -static int wait_job(struct sbusfpga_curve25519engine_softc *sc); +static int wait_job(struct sbusfpga_curve25519engine_softc *sc, uint32_t param); static int read_outputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusfpga_curve25519engine_montgomeryjob *job, const int window); static int dma_init(struct sbusfpga_curve25519engine_softc *sc); @@ -107,7 +110,17 @@ static int power_off(struct sbusfpga_curve25519engine_softc *sc); int sbusfpga_curve25519engine_open(dev_t dev, int flags, int mode, struct lwp *l) { - struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, minor(dev)); + int unit = minor(dev) & (MAX_SESSION - 1); + int driver = unit & ~(MAX_SESSION - 1); + struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, driver); + + if (sc == NULL) + return ENODEV; + + if ((unit != 0) && ((sc->active_sessions & (1 << unit)) == 0)) { + return ENODEV; + } + /* first we need to turn the engine power on ... */ power_on(sc); @@ -117,7 +130,18 @@ sbusfpga_curve25519engine_open(dev_t dev, int flags, int mode, struct lwp *l) int sbusfpga_curve25519engine_close(dev_t dev, int flags, int mode, struct lwp *l) { - struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, minor(dev)); + int unit = minor(dev) & (MAX_SESSION - 1); + int driver = unit & ~(MAX_SESSION - 1); + struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, driver); + + if (sc == NULL) + return ENODEV; + + if ((unit != 0) && (sc->active_sessions & (1 << unit))) { + device_printf(sc->sc_dev, "warning: close() on active session\n"); + sc->active_sessions &= ~(1 << unit); + sc->mapped_sessions &= ~(1 << unit); + } if (sc->active_sessions == 0) power_off(sc); @@ -147,11 +171,18 @@ static const uint32_t program_gcm[20] = {0x0010100d, 0x0094100d, 0x0118100d, 0x0 static const uint32_t program_aes[58] = {0x0001f003, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03800052, 0x0000000a }; -static const uint32_t program_gcm_ad[70] = {0x00400800, 0x00080840, 0x0001f403, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03800052, 0x03000089, 0x003c0000, 0x01400411, 0x0042b405, 0x01400411, 0x00080800, 0xe0000809, 0x00380000, 0x01bc03d1, 0x003cf3d1, 0x0000000a }; +static const uint32_t program_gcm_pfx[72] = {0x01400411, 0x00080840, 0x00040800, 0x0001f043, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03800052, 0x03800089, 0x003c0000, 0x01400411, 0x0042b405, 0x01400411, 0x00080800, 0x00040400, 0xdf800809, 0x00380000, 0x01bc03d1, 0x003cf3d1, 0x00340800 }; -static const uint32_t* programs[5] = { program_ec25519, program_gcm, program_aes, program_gcm_ad, NULL }; -static const uint32_t program_len[5] = { 134, 20, 58, 70, 0 }; -static uint32_t program_offset[4]; +static const uint32_t program_gcm_ad[29] = {0x0d800309, 0x000000d3, 0x01800011, 0x00000011, 0x0000d003, 0x000ec0c5, 0x0032d306, 0x0010f00d, 0x0094f00d, 0x0118f00d, 0x019cf00d, 0x00186143, 0x00160191, 0x00186811, 0x001c61c3, 0x00105103, 0x008441ce, 0x0082010e, 0x00080010, 0x009a008f, 0x0112008f, 0x0396008f, 0x00086083, 0x00105103, 0x00084083, 0x00341083, 0x00800309, 0xf2800809, 0x0000000a }; + +static const uint32_t program_gcm_aes[92] = {0x2d000309, 0x01400411, 0x0042b405, 0x01400411, 0x0001f403, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03840052, 0x000000d3, 0x00001003, 0x00ac02d3, 0x01800011, 0x00000011, 0x0000d003, 0x000ec0c5, 0x002ec2c5, 0x0032d306, 0x0010f00d, 0x0094f00d, 0x0118f00d, 0x019cf00d, 0x00186143, 0x00160191, 0x00186811, 0x001c61c3, 0x00105103, 0x008441ce, 0x0082010e, 0x00080010, 0x009a008f, 0x0112008f, 0x0396008f, 0x00086083, 0x00105103, 0x00084083, 0x00341083, 0x00800309, 0xd3000809, 0x0000000a }; + +static const uint32_t program_gcm_finish[113] = {0x2b000309, 0x01400411, 0x0042b405, 0x01400411, 0x0001f403, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03840052, 0x0004a054, 0x000000d3, 0x00001003, 0x00ac02d3, 0x01800011, 0x00000011, 0x0000d003, 0x0010f00d, 0x0094f00d, 0x0118f00d, 0x019cf00d, 0x00186143, 0x00160191, 0x00186811, 0x001c61c3, 0x00105103, 0x008441ce, 0x0082010e, 0x00080010, 0x009a008f, 0x0112008f, 0x0396008f, 0x00086083, 0x00105103, 0x00084083, 0x00341083, 0x01a40251, 0x00249251, 0x0000d243, 0x0010f00d, 0x0094f00d, 0x0118f00d, 0x019cf00d, 0x00186143, 0x00160191, 0x00186811, 0x001c61c3, 0x00105103, 0x008441ce, 0x0082010e, 0x00080010, 0x009a008f, 0x0112008f, 0x0396008f, 0x00086083, 0x00105103, 0x00084083, 0x00341083, 0x01b40351, 0x0034d351, 0x0020e343, 0x0000000a }; + +// second and third are for testing and shall be removed +static const uint32_t* programs[8] = { program_ec25519, program_gcm, program_aes, program_gcm_pfx, program_gcm_ad, program_gcm_aes, program_gcm_finish, NULL }; +static const uint32_t program_len[8] = { 134, 20, 58, 72, 29, 92, 113, 0 }; +static uint32_t program_offset[8]; /* * Attach all the sub-devices we can find @@ -303,18 +334,55 @@ struct sbusfpga_curve25519engine_session { uint32_t session; uint32_t cookie; }; +struct sbusfpga_curve25519engine_session_len { + uint32_t session; + uint32_t cookie; + uint32_t len; +}; +struct sbusfpga_curve25519engine_session_len_data { + uint32_t session; + uint32_t cookie; + uint32_t len; + uint32_t data[8]; + uint32_t keys[60]; +}; +struct sbusfpga_curve25519engine_session_len_final { + uint32_t session; + uint32_t cookie; + uint32_t len; + uint32_t data[8]; +}; + +#define CHECKSESSION(ses) \ + do { \ + if ((ses->session >= MAX_ACTIVE_SESSION) || (ses->session >= MAX_SESSION)) \ + return EINVAL; \ + if (sc->sessions_cookies[ses->session] == 0) \ + return EINVAL; \ + if (sc->sessions_cookies[ses->session] != ses->cookie) \ + return EINVAL; \ + if (ses->session != unit) \ + return EINVAL; \ + if ((sc->active_sessions & (1 << ses->session)) == 0) \ + return EINVAL; \ + } while (0) #define SBUSFPGA_DO_MONTGOMERYJOB _IOWR(0, 0, struct sbusfpga_curve25519engine_montgomeryjob) #define SBUSFPGA_EC25519_CHECKGCM _IOW(0, 1, struct sbusfpga_curve25519engine_montgomeryjob) #define SBUSFPGA_EC25519_CHECKAES _IOW(0, 2, struct sbusfpga_curve25519engine_aesjob) -#define SBUSFPGA_EC25519_GCMAD _IOW(0, 3, struct sbusfpga_curve25519engine_aesjob) -#define SBUSFPGA_EC25519_OPENSESSION _IOR(1, 0, struct sbusfpga_curve25519engine_session) -#define SBUSFPGA_EC25519_CLOSESESSION _IOR(1, 1, struct sbusfpga_curve25519engine_session) +#define SBUSFPGA_EC25519_GETSESSION _IOR(1, 0, struct sbusfpga_curve25519engine_session) +#define SBUSFPGA_EC25519_OPENSESSION _IOW(1, 1, struct sbusfpga_curve25519engine_session) +#define SBUSFPGA_EC25519_CLOSESESSION _IOW(1, 2, struct sbusfpga_curve25519engine_session) +#define SBUSFPGA_EC25519_GCMPFX _IOW(1, 3, struct sbusfpga_curve25519engine_session_len_data) +#define SBUSFPGA_EC25519_GCMAD _IOW(1, 4, struct sbusfpga_curve25519engine_session_len) +#define SBUSFPGA_EC25519_GCMAES _IOW(1, 5, struct sbusfpga_curve25519engine_session_len) +#define SBUSFPGA_EC25519_GCMFINISH _IOWR(1, 6, struct sbusfpga_curve25519engine_session_len_final) static int get_session(struct sbusfpga_curve25519engine_softc *sc) { int i; /* don't use 0, we use it for testing */ + /* also minor 0 is used to request session, 1-7 to open/close/map using session # */ for (i = 1 ; (i < MAX_ACTIVE_SESSION) && (i < MAX_SESSION) ; i++) { if (((sc->active_sessions & (1<mapped_sessions & (1<active_sessions |= (1<initialized) { if (init_programs(sc)) { return ENXIO; @@ -339,6 +413,9 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st } switch (cmd) { case SBUSFPGA_DO_MONTGOMERYJOB: { + if (unit != 0) + return ENOTTY; + struct sbusfpga_curve25519engine_montgomeryjob* job = (struct sbusfpga_curve25519engine_montgomeryjob*)data; curve25519engine_mpstart_write(sc, program_offset[0]); /* EC25519 */ curve25519engine_mplen_write(sc, program_len[0]); /* EC25519 */ @@ -350,7 +427,7 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st if (err) return err; delay(1); - err = wait_job(sc); + err = wait_job(sc, 1); if (err) return err; err = read_outputs(sc, job, 0); @@ -359,6 +436,9 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st } break; case SBUSFPGA_EC25519_CHECKGCM: { + if (unit != 0) + return ENOTTY; + const uint32_t base = 0; struct sbusfpga_curve25519engine_montgomeryjob* job = (struct sbusfpga_curve25519engine_montgomeryjob*)data; int reg, i; @@ -376,7 +456,7 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st if (err) return err; delay(1); - err = wait_job(sc); + err = wait_job(sc, 1); /* if (err) */ /* return err; */ @@ -391,6 +471,9 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st } break; case SBUSFPGA_EC25519_CHECKAES: { + if (unit != 0) + return ENOTTY; + const uint32_t base = 0; struct sbusfpga_curve25519engine_aesjob* job = (struct sbusfpga_curve25519engine_aesjob*)data; int reg, i; @@ -410,7 +493,7 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st if (err) return err; delay(1); - err = wait_job(sc); + err = wait_job(sc, 1); /* if (err) */ /* return err; */ @@ -424,19 +507,50 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st } } break; - case SBUSFPGA_EC25519_GCMAD: { - const uint32_t base = 0; - struct sbusfpga_curve25519engine_aesjob* job = (struct sbusfpga_curve25519engine_aesjob*)data; - int reg, i; - curve25519engine_mpstart_write(sc, program_offset[3]); /* GCM_AD */ - curve25519engine_mplen_write(sc, program_len[3]); /* GCM_AD */ + case SBUSFPGA_EC25519_GCMPFX: { + if (unit == 0) + return ENOTTY; + + /* FIXME: need a lock!!! */ + + const uint32_t base = unit * 0x400; + struct sbusfpga_curve25519engine_session_len_data* job = (struct sbusfpga_curve25519engine_session_len_data*)data; + int reg, i; + void* rd_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) ); + //void* wr_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) + 2048); + + CHECKSESSION(job); + + if (job->len > 128) { + device_printf(sc->sc_dev, "job->len too big: %u", job->len); + return EINVAL; + } + + curve25519engine_mpstart_write(sc, program_offset[3]); /* GCM_PFX */ + curve25519engine_mplen_write(sc, program_len[3] + program_len[4]); /* GCM_PFX + GCM_AD */ + curve25519engine_window_write(sc, unit); /* to each session its own register file */ + + /* read_addr */ for (i = 0 ; i < 8 ; i ++) { - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(0,i), job->data[i]); + /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */ + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); + } + /* write_addr */ + /* for (i = 0 ; i < 8 ; i ++) { */ + /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(4,i), ((i & 3) == 0) ? ((uint32_t)wr_ptr) : 0); */ + /* } */ + /* write_len */ + for (i = 0 ; i < 8 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i&3) == 0) ? ((uint32_t)job->len) : 0); + } + /* data */ + for (i = 0 ; i < 8 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(16,i), job->data[i]); } for (reg = 31 ; reg > 16 ; reg--) { for (i = 0 ; i < 8 ; i ++) { - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i), job->keys[i+8*(31-reg)]); + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i), job->keys[(i&3)+4*(31-reg)]); } } @@ -444,10 +558,64 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st if (err) return err; delay(1); - err = wait_job(sc); - /* if (err) */ - /* return err; */ + err = wait_job(sc, job->len); + if (err) + return err; +#if 0 + for (reg = 0 ; reg < 32 ; reg++) { + uint32_t buf[8]; + for (i = 0 ; i < 8 ; i ++) { + buf[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i)); + } + device_printf(sc->sc_dev, "GCM_PFX %d: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n", reg, + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); + } +#endif + } + break; + + case SBUSFPGA_EC25519_GCMAD: { + if (unit == 0) + return ENOTTY; + + /* FIXME: need a lock!!! */ + + const uint32_t base = unit * 0x400; + struct sbusfpga_curve25519engine_session_len* job = (struct sbusfpga_curve25519engine_session_len*)data; + int i; + void* rd_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) ); + //void* wr_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) + 2048); + + CHECKSESSION(job); + + if (job->len > 128) + return EINVAL; + + curve25519engine_mpstart_write(sc, program_offset[4]); /* GCM_AES */ + curve25519engine_mplen_write(sc, program_len[4]); /* GCM_AES */ + curve25519engine_window_write(sc, unit); /* to each session its own register file */ + + /* read_addr */ + for (i = 0 ; i < 8 ; i ++) { + /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */ + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); + } + /* write_len */ + for (i = 0 ; i < 8 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i & 3) == 0) ? ((uint32_t)job->len) : 0); + } + + err = start_job(sc); + if (err) + return err; + delay(1); + err = wait_job(sc, job->len); + if (err) + return err; + +#if 0 + int reg; for (reg = 0 ; reg < 32 ; reg++) { uint32_t buf[8]; for (i = 0 ; i < 8 ; i ++) { @@ -456,9 +624,153 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st device_printf(sc->sc_dev, "GCM_AD %d: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n", reg, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); } +#endif } break; - case SBUSFPGA_EC25519_OPENSESSION:{ + + case SBUSFPGA_EC25519_GCMAES: { + if (unit == 0) + return ENOTTY; + + /* FIXME: need a lock!!! */ + + const uint32_t base = unit * 0x400; + struct sbusfpga_curve25519engine_session_len* job = (struct sbusfpga_curve25519engine_session_len*)data; + int i; + void* rd_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) ); + void* wr_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) + 2048); + + CHECKSESSION(job); + + if (job->len > 128) + return EINVAL; + + curve25519engine_mpstart_write(sc, program_offset[5]); /* GCM_AES */ + curve25519engine_mplen_write(sc, program_len[5]); /* GCM_AES */ + curve25519engine_window_write(sc, unit); /* to each session its own register file */ + + /* read_addr */ + for (i = 0 ; i < 8 ; i ++) { + /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */ + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); + } + /* write_addr */ + for (i = 0 ; i < 8 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(11,i), ((i & 3) == 0) ? ((uint32_t)wr_ptr) : 0); + } + /* write_len */ + for (i = 0 ; i < 8 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i & 3) == 0) ? ((uint32_t)job->len) : 0); + } + + err = start_job(sc); + if (err) + return err; + delay(1); + err = wait_job(sc, job->len); + if (err) + return err; +#if 0 + int reg; + for (reg = 0 ; reg < 32 ; reg++) { + uint32_t buf[8]; + for (i = 0 ; i < 8 ; i ++) { + buf[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i)); + } + device_printf(sc->sc_dev, "GCM_AES %d: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n", reg, + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); + } +#endif + } + break; + + + case SBUSFPGA_EC25519_GCMFINISH: { + if (unit == 0) + return ENOTTY; + + /* FIXME: need a lock!!! */ + + const uint32_t base = unit * 0x400; + struct sbusfpga_curve25519engine_session_len_final* job = (struct sbusfpga_curve25519engine_session_len_final*)data; + int i; + void* rd_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) ); + void* wr_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) + 2048); + + CHECKSESSION(job); + + if (job->len > 15) + return EINVAL; + + curve25519engine_mpstart_write(sc, program_offset[6]); /* GCM_FINISH */ + curve25519engine_mplen_write(sc, program_len[6]); /* GCM_FINISH */ + curve25519engine_window_write(sc, unit); /* to each session its own register file */ + + /* read_addr */ + for (i = 0 ; i < 8 ; i ++) { + /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */ + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); + } + /* write_addr */ + for (i = 0 ; i < 8 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(11,i), ((i & 3) == 0) ? ((uint32_t)wr_ptr) : 0); + } + /* write_len */ + for (i = 0 ; i < 8 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i & 3) == 0) ? ((uint32_t)job->len) : 0); + } + /* final block */ + for (i = 0 ; i < 4 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(9,i), job->data[i]); + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(9,i+4), job->data[i]); + } + /* create and generate MMASK */ + for (i = 0 ; i < 4 ; i ++) { + uint32_t mask; + int idx = i; + if (job->len <= (idx*4)) { + mask = 0; + } else if (job->len >= (idx+1)*4) { + mask = 0xFFFFFFFF; + } else { + mask = 0xFFFFFFFF >> (8*(4-(job->len%4))); + } + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(10,i), mask); + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(10,(i+4)), mask); + } + + + err = start_job(sc); + if (err) + return err; + delay(1); + err = wait_job(sc, job->len); + if (err) + return err; + + /* final accum */ + for (i = 0 ; i < 8 ; i ++) { + job->data[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(8,i)); + } + +#if 0 + int reg; + for (reg = 0 ; reg < 32 ; reg++) { + uint32_t buf[8]; + for (i = 0 ; i < 8 ; i ++) { + buf[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i)); + } + device_printf(sc->sc_dev, "GCM_FINISH %d: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n", reg, + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); + } +#endif + } + break; + + case SBUSFPGA_EC25519_GETSESSION:{ + if (unit != 0) + return ENOTTY; + struct sbusfpga_curve25519engine_session* ses = (struct sbusfpga_curve25519engine_session*)data; int s = get_session(sc); if (s < 0) @@ -468,16 +780,29 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st ses->cookie = sc->sessions_cookies[s]; } break; - case SBUSFPGA_EC25519_CLOSESESSION:{ + case SBUSFPGA_EC25519_OPENSESSION:{ + if (unit == 0) + return ENOTTY; + struct sbusfpga_curve25519engine_session* ses = (struct sbusfpga_curve25519engine_session*)data; - if ((ses->session >= MAX_ACTIVE_SESSION) || (ses->session >= MAX_SESSION)) - return EINVAL; - if (sc->sessions_cookies[ses->session] != ses->cookie) - return EINVAL; + CHECKSESSION(ses); if ((sc->mapped_sessions & (1 << ses->session)) != 0) - return EBUSY; + return EINVAL; + } + break; + case SBUSFPGA_EC25519_CLOSESESSION:{ + if (unit == 0) + return ENOTTY; + + struct sbusfpga_curve25519engine_session* ses = (struct sbusfpga_curve25519engine_session*)data; + + CHECKSESSION(ses); + + /* if ((sc->mapped_sessions & (1 << ses->session)) != 0) */ + /* return EBUSY; */ sc->sessions_cookies[ses->session] = 0; sc->active_sessions &= ~(1 << ses->session); + sc->mapped_sessions &= ~(1 << ses->session); // FIXME } break; @@ -570,7 +895,7 @@ static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf int i; uint32_t status = curve25519engine_status_read(sc); int err = 0; - if (status & 1) { + if (status & (1<sc_dev, "WRITE - Curve25519Engine status: 0x%08x, still running?\n", status); return -ENXIO; } @@ -603,31 +928,49 @@ static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf static int start_job(struct sbusfpga_curve25519engine_softc *sc) { uint32_t status = curve25519engine_status_read(sc); - if (status & 1) { + if (status & (1<sc_dev, "START - Curve25519Engine status: 0x%08x, still running?\n", status); return -ENXIO; } curve25519engine_control_write(sc, 1); - aprint_normal_dev(sc->sc_dev, "START - Curve25519Engine status: 0x%08x\n", curve25519engine_status_read(sc)); + //aprint_normal_dev(sc->sc_dev, "START - Curve25519Engine status: 0x%08x\n", curve25519engine_status_read(sc)); return 0; } -static int wait_job(struct sbusfpga_curve25519engine_softc *sc) { +static int wait_job(struct sbusfpga_curve25519engine_softc *sc, uint32_t param) { uint32_t status = curve25519engine_status_read(sc); int count = 0; - while ((status & 1) && (count < 50)) { - aprint_normal_dev(sc->sc_dev, "WAIT - ongoing, Curve25519Engine status: 0x%08x [%d]\n", status, count); + int max_count = 50; + int del = 1; + const int max_del = 32; + static int max_del_seen = 1; + + while ((status & (1<sc_dev, "WAIT - ongoing, Curve25519Engine status: 0x%08x [%d] ls_status: 0x%08x\n", status, count, ls_status); count ++; - delay(1); + delay(del); + del = del < max_del ? 2*del : del; status = curve25519engine_status_read(sc); } + if (del > max_del_seen) { + max_del_seen = del; + aprint_normal_dev(sc->sc_dev, "WAIT - new max delay %d after %d count (param was %u)\n", max_del_seen, count, param); + } + //curve25519engine_control_write(sc, 0); - if (status & 1) { - aprint_error_dev(sc->sc_dev, "WAIT - Curve25519Engine status: 0x%08x, did not finish in time? [0x%08x]\n", status, curve25519engine_instruction_read(sc)); + if (status & (1<sc_dev, "WAIT - Curve25519Engine status: 0x%08x, did not finish in time? [inst: 0x%08x ls_status: 0x%08x]\n", status, curve25519engine_instruction_read(sc), curve25519engine_ls_status_read(sc)); + return -ENXIO; + } else if (status & (1<sc_dev, "WAIT - Curve25519Engine status: 0x%08x, sigill [inst: 0x%08x ls_status: 0x%08x]\n", status, curve25519engine_instruction_read(sc), curve25519engine_ls_status_read(sc)); + return -ENXIO; + } else if (status & (1<sc_dev, "WAIT - Curve25519Engine status: 0x%08x, aborted [inst: 0x%08x ls_status: 0x%08x]\n", status, curve25519engine_instruction_read(sc), curve25519engine_ls_status_read(sc)); return -ENXIO; } else { - aprint_normal_dev(sc->sc_dev, "WAIT - Curve25519Engine status: 0x%08x [%d, 0x%08x]\n", status, count, curve25519engine_instruction_read(sc)); + //aprint_normal_dev(sc->sc_dev, "WAIT - Curve25519Engine status: 0x%08x [%d] ls_status: 0x%08x\n", status, count, curve25519engine_ls_status_read(sc)); } return 0; @@ -637,7 +980,7 @@ static int read_outputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf const uint32_t base = window * 0x400; int i; uint32_t status = curve25519engine_status_read(sc); - if (status & 1) { + if (status & (1<sc_dev, "READ - Curve25519Engine status: 0x%08x, still running?\n", status); return -ENXIO; } @@ -690,31 +1033,42 @@ dma_init(struct sbusfpga_curve25519engine_softc *sc) { return 0; } - aprint_normal_dev(sc->sc_dev, "DMA: SW -> kernel address is %p, dvma address is 0x%08llx\n", sc->sc_dma_kva, sc->sc_dmamap->dm_segs[0].ds_addr); + aprint_normal_dev(sc->sc_dev, "DMA: SW -> kernel address is %p, dvma address is 0x%08llx, seg %llx / %ld\n", sc->sc_dma_kva, sc->sc_dmamap->dm_segs[0].ds_addr, sc->sc_segs.ds_addr, sc->sc_segs.ds_len); return 1; } paddr_t sbusfpga_curve25519engine_mmap(dev_t dev, off_t offset, int prot) { - struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, minor(dev)); + int unit = minor(dev) & (MAX_SESSION - 1); + int driver = unit & ~(MAX_SESSION - 1); + struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, driver); paddr_t addr = -1; - int ses = offset / 4096; + + device_printf(sc->sc_dev, "%s:%d: %lld %d for %d / %d\n", __PRETTY_FUNCTION__, __LINE__, offset, prot, driver, unit); - if (offset % 4096) + if (offset != 0) return -1; if (prot & PROT_EXEC) return -1; - if (sc->mapped_sessions & (1 << ses)) + /* if (sc->mapped_sessions & (1 << unit)) */ + /* return -1; */ + if ((sc->active_sessions & (1 << unit)) == 0) return -1; - if ((sc->active_sessions & (1 << ses)) == 0) + if (unit >= MAX_ACTIVE_SESSION) + return -1; + if (unit <= 0) return -1; - - addr = bus_dmamem_mmap(sc->sc_dmatag, &sc->sc_segs, 1, offset, prot, BUS_DMA_NOWAIT); - device_printf(sc->sc_dev, "mapped page %d\n", ses); + // addr = bus_dmamem_mmap(sc->sc_dmatag, sc->sc_dmamap->dm_segs, 1, (off_t)(4096*unit), prot, BUS_DMA_NOWAIT); + if (pmap_extract(pmap_kernel(), ((vaddr_t)sc->sc_dma_kva) + (unit * 4096), &addr)) { + + device_printf(sc->sc_dev, "mapped page %d to 0x%08lx [0x%08lx], kernel is %p\n", unit, addr, atop(addr), (void*)(((vaddr_t)sc->sc_dma_kva) + (unit * 4096))); - if (addr != -1) - sc->mapped_sessions |= (1 << ses); + ((uint32_t*)(((vaddr_t)sc->sc_dma_kva) + (unit * 4096)))[0] = 0xDEADBEEF; + sc->mapped_sessions |= (1 << unit); + + return addr; + } - return addr; + return -1; } diff --git a/sbus-to-ztex-gateware-migen/engine.py b/sbus-to-ztex-gateware-migen/engine.py index 891fee6..9e69d18 100644 --- a/sbus-to-ztex-gateware-migen/engine.py +++ b/sbus-to-ztex-gateware-migen/engine.py @@ -30,8 +30,10 @@ opcodes = { # mnemonic : [bit coding, docstring] "GCM_SHRMI": [15, "Shift A right by imm, insert B LSB as dest MSB; reg-reg or reg-imm; per 128-bits block"], # "GCM_CMPD": [16, "Compute D:X0 from X1:X0; reg ; per 128-bits block"], # specific "GCM_SWAP64": [17, "Swap doubleword (64 bits) ; reg-reg or imm-reg or reg-imm; per 128-bits block ; imm != 0 -> BYTEREV*"], # - "AESESMI" : [18, "AES ; reg-reg ; per 128-bits block; imm[0:2] indicates sub-round (as in rv32's aes32esmi) ; imm[2] is 1 for aesesi (shared opcode)" ], - "MAX" : [19, "Maximum opcode number (for bounds checking)"], + "AESESMI" : [18, "AES ; reg-reg ; per 128-bits block; imm[0:2] indicates sub-round (as in rv32's aes32esmi) ; imm[2] is 1 for aesesi (shared opcode)" ], + "MEM" : [19, "MEM ; imm[0] == 0 for LOAD, imm[0] == 1 for STORE (beware, store copy the address in the output reg)" ], + "AND" : [20, "Wd $\gets$ Ra & Rb // bitwise AND"], + "MAX" : [21, "Maximum opcode number (for bounds checking)"], } num_registers = 32 @@ -236,6 +238,8 @@ class Curve25519Const(Module, AutoDoc): 9: [100, "one hundred", "The number 100 (for pow22501)"], 10: [254, "two hundred fifty four", "The number 254 (iteration count)"], 11: [0x00000001_00000000_00000000_00000000_00000001_00000000_00000000_00000000, "increment for GCM counter (LE)", "increment for GCM counter (LE)"], + 12: [0x00000000_00000000_00000000_00000010_00000000_00000000_00000000_00000010, "sixteen (twice)", "The number 16 (for block-size address increment)"], + 13: [0x00000000_00000000_00000000_00000001_00000000_00000000_00000000_00000001, "decrement for GCM dual-loops (LE)", "decrement for GCM dual-loops"] } self.adr = Signal(5) self.const = Signal(256) @@ -316,7 +320,7 @@ Here is an example of how to swap the contents of `ra` and `rb` based on the val class ExecLogic(ExecUnit): def __init__(self, width=256): - ExecUnit.__init__(self, width, ["XOR", "NOT", "PSA", "PSB", "XBT", "SHL"]) + ExecUnit.__init__(self, width, ["XOR", "NOT", "PSA", "PSB", "XBT", "SHL", "AND"]) self.intro = ModuleDoc(title="Logic ExecUnit Subclass", body=f""" This execution unit implements bit-wise logic operations: XOR, NOT, and passthrough. @@ -327,6 +331,7 @@ passthrough. * PSB returns the value of B * SHL returns A << 1 * XBT returns the 255th bit of A, reported in the 0th bit of the result +* AND returns the result of A&B """) @@ -348,6 +353,8 @@ passthrough. self.q.eq(Cat(self.a[254], zeros)) ).Elif(self.instruction.opcode == opcodes["SHL"][0], self.q.eq(Cat(0, self.a[:255])), + ).Elif(self.instruction.opcode == opcodes["AND"][0], + self.q.eq(self.a & self.b), ), ] @@ -1442,7 +1449,7 @@ class ExecClmul(ExecUnit, AutoDoc): clmul64_out = Signal(64) clmul64h_out = Signal(64) nlane = width // 128 - clmul_buf = Signal((nlane-1) * 128) ## width must be a multiple of 128... + clmul_buf = Signal(nlane * 128) ## width must be a multiple of 128... lanec = Signal(log2_int(nlane, False)) assert(nlane == 2) ## fixme @@ -1460,30 +1467,37 @@ class ExecClmul(ExecUnit, AutoDoc): self.submodules.seq = seq = ClockDomainsRenamer("eng_clk")(FSM(reset_state="IDLE")) seq.act("IDLE", If(self.start, + NextValue(lanec, 0), Case(self.instruction.immediate[0:2], { - 0x0: [ clmul64x_in1.eq(self.a[ 0: 64]), clmul64x_in2.eq(self.b[ 0: 64]) ], - 0x1: [ clmul64x_in1.eq(self.a[ 0: 64]), clmul64x_in2.eq(self.b[ 64:128]) ], - 0x2: [ clmul64x_in1.eq(self.a[ 64:128]), clmul64x_in2.eq(self.b[ 0: 64]) ], - 0x3: [ clmul64x_in1.eq(self.a[ 64:128]), clmul64x_in2.eq(self.b[ 64:128]) ], + 0x0: [ NextValue(clmul64x_in1, self.a[ 0: 64]), NextValue(clmul64x_in2, self.b[ 0: 64]) ], + 0x1: [ NextValue(clmul64x_in1, self.a[ 0: 64]), NextValue(clmul64x_in2, self.b[ 64:128]) ], + 0x2: [ NextValue(clmul64x_in1, self.a[ 64:128]), NextValue(clmul64x_in2, self.b[ 0: 64]) ], + 0x3: [ NextValue(clmul64x_in1, self.a[ 64:128]), NextValue(clmul64x_in2, self.b[ 64:128]) ], }), NextState("NEXT"))) seq.act("NEXT", + Case(self.instruction.immediate[0:2], { + 0x0: [ NextValue(clmul64x_in1, self.a[128:192]), NextValue(clmul64x_in2, self.b[128:192]) ], + 0x1: [ NextValue(clmul64x_in1, self.a[128:192]), NextValue(clmul64x_in2, self.b[192:256]) ], + 0x2: [ NextValue(clmul64x_in1, self.a[192:256]), NextValue(clmul64x_in2, self.b[128:192]) ], + 0x3: [ NextValue(clmul64x_in1, self.a[192:256]), NextValue(clmul64x_in2, self.b[192:256]) ], + }), + NextState("WRITE")) + seq.act("WRITE", Case(lanec, { 0: [ NextValue(clmul_buf[0:128], Cat(clmul64_out, clmul64h_out)), - Case(self.instruction.immediate[0:2], { - 0x0: [ clmul64x_in1.eq(self.a[128:192]), clmul64x_in2.eq(self.b[128:192]) ], - 0x1: [ clmul64x_in1.eq(self.a[128:192]), clmul64x_in2.eq(self.b[192:256]) ], - 0x2: [ clmul64x_in1.eq(self.a[192:256]), clmul64x_in2.eq(self.b[128:192]) ], - 0x3: [ clmul64x_in1.eq(self.a[192:256]), clmul64x_in2.eq(self.b[192:256]) ], - }), NextValue(lanec, 1), ], - 1: [ self.q_valid.eq(1), - self.q.eq(Cat(clmul_buf, clmul64_out, clmul64h_out)), - NextValue(lanec, 0), - NextState("IDLE") + 1: [ NextValue(clmul_buf[128:256], Cat(clmul64_out, clmul64h_out)), + NextState("OUT"), ], })) + seq.act("OUT", + self.q_valid.eq(1), + self.q.eq(clmul_buf), + NextState("IDLE"), + ); + class ExecGCMShifts(ExecUnit, AutoDoc): def __init__(self, width=256): @@ -1505,13 +1519,13 @@ class ExecGCMShifts(ExecUnit, AutoDoc): ).Elif(self.instruction.opcode == opcodes["GCM_SHRMI"][0], Case(self.instruction.immediate[0:3], { 0x0: self.q.eq(self.a), - 0x1: self.q.eq(Cat(self.a[1:128], self.b[0:1], self.a[129:256], self.b[0:1])), - 0x2: self.q.eq(Cat(self.a[2:128], self.b[0:2], self.a[130:256], self.b[0:2])), - 0x3: self.q.eq(Cat(self.a[3:128], self.b[0:3], self.a[131:256], self.b[0:3])), - 0x4: self.q.eq(Cat(self.a[4:128], self.b[0:4], self.a[132:256], self.b[0:4])), - 0x5: self.q.eq(Cat(self.a[5:128], self.b[0:5], self.a[133:256], self.b[0:5])), - 0x6: self.q.eq(Cat(self.a[6:128], self.b[0:6], self.a[134:256], self.b[0:6])), - 0x7: self.q.eq(Cat(self.a[7:128], self.b[0:7], self.a[135:256], self.b[0:7])), + 0x1: self.q.eq(Cat(self.a[1:128], self.b[0:1], self.a[129:256], self.b[128:129])), + 0x2: self.q.eq(Cat(self.a[2:128], self.b[0:2], self.a[130:256], self.b[128:130])), + 0x3: self.q.eq(Cat(self.a[3:128], self.b[0:3], self.a[131:256], self.b[128:131])), + 0x4: self.q.eq(Cat(self.a[4:128], self.b[0:4], self.a[132:256], self.b[128:132])), + 0x5: self.q.eq(Cat(self.a[5:128], self.b[0:5], self.a[133:256], self.b[128:133])), + 0x6: self.q.eq(Cat(self.a[6:128], self.b[0:6], self.a[134:256], self.b[128:134])), + 0x7: self.q.eq(Cat(self.a[7:128], self.b[0:7], self.a[135:256], self.b[128:135])), }) ).Elif(self.instruction.opcode == opcodes["GCM_SHLMI"][0], Case(self.instruction.immediate[0:3], { @@ -1525,12 +1539,33 @@ class ExecGCMShifts(ExecUnit, AutoDoc): 0x7: self.q.eq(Cat(self.b[121:128], self.a[0:121], self.b[249:256], self.a[128:249])), }) ).Elif(self.instruction.opcode == opcodes["GCM_SWAP64"][0], - # also gcm_brev* + # also gcm_brev*, gcm_swap32 Case(self.instruction.immediate[0:2], { - 0: self.q.eq(Cat(self.b[64:128], self.a[0:64], self.b[192:256], self.a[128:192])), - 1: self.q.eq(Cat(self.a[8:16], self.a[0:8], self.a[24:32], self.a[16:24], self.a[40:48], self.a[32:40], self.a[56:64], self.a[48:56], self.a[72:80], self.a[64:72], self.a[88:96], self.a[80:88], self.a[104:112], self.a[96:104], self.a[120:128], self.a[112:120], self.a[136:144], self.a[128:136], self.a[152:160], self.a[144:152], self.a[168:176], self.a[160:168], self.a[184:192], self.a[176:184], self.a[200:208], self.a[192:200], self.a[216:224], self.a[208:216], self.a[232:240], self.a[224:232], self.a[248:256], self.a[240:248])), - 2: self.q.eq(Cat(self.a[24:32], self.a[16:24], self.a[8:16], self.a[0:8], self.a[56:64], self.a[48:56], self.a[40:48], self.a[32:40], self.a[88:96], self.a[80:88], self.a[72:80], self.a[64:72], self.a[120:128], self.a[112:120], self.a[104:112], self.a[96:104], self.a[152:160], self.a[144:152], self.a[136:144], self.a[128:136], self.a[184:192], self.a[176:184], self.a[168:176], self.a[160:168], self.a[216:224], self.a[208:216], self.a[200:208], self.a[192:200], self.a[248:256], self.a[240:248], self.a[232:240], self.a[224:232])), - 3: self.q.eq(Cat(self.a[56:64], self.a[48:56], self.a[40:48], self.a[32:40], self.a[24:32], self.a[16:24], self.a[8:16], self.a[0:8], self.a[120:128], self.a[112:120], self.a[104:112], self.a[96:104], self.a[88:96], self.a[80:88], self.a[72:80], self.a[64:72], self.a[184:192], self.a[176:184], self.a[168:176], self.a[160:168], self.a[152:160], self.a[144:152], self.a[136:144], self.a[128:136], self.a[248:256], self.a[240:248], self.a[232:240], self.a[224:232], self.a[216:224], self.a[208:216], self.a[200:208], self.a[192:200])), + # SWAP64 + 0: self.q.eq(Cat(self.b[ 64:128], self.a[ 0: 64], + self.b[192:256], self.a[128:192])), + # SWAP32 + 4: self.q.eq(Cat(self.b[ 32: 64], self.a[ 0: 32], self.b[ 96:128], self.a[ 64: 96], + self.b[160:192], self.a[128:160], self.b[224:256], self.a[192:224])), + # BREV16 + 1: self.q.eq(Cat(self.a[ 8: 16], self.a[ 0: 8], self.a[ 24: 32], self.a[ 16: 24], self.a[ 40: 48], self.a[ 32: 40], self.a[ 56: 64], self.a[ 48: 56], + self.a[ 72: 80], self.a[ 64: 72], self.a[ 88: 96], self.a[ 80: 88], self.a[104:112], self.a[ 96:104], self.a[120:128], self.a[112:120], + self.a[136:144], self.a[128:136], self.a[152:160], self.a[144:152], self.a[168:176], self.a[160:168], self.a[184:192], self.a[176:184], + self.a[200:208], self.a[192:200], self.a[216:224], self.a[208:216], self.a[232:240], self.a[224:232], self.a[248:256], self.a[240:248])), + # BREV32 + 2: self.q.eq(Cat(self.a[ 24: 32], self.a[ 16: 24], self.a[ 8: 16], self.a[ 0: 8], + self.a[ 56: 64], self.a[ 48: 56], self.a[ 40: 48], self.a[ 32: 40], + self.a[ 88: 96], self.a[ 80: 88], self.a[ 72: 80], self.a[ 64: 72], + self.a[120:128], self.a[112:120], self.a[104:112], self.a[ 96:104], + self.a[152:160], self.a[144:152], self.a[136:144], self.a[128:136], + self.a[184:192], self.a[176:184], self.a[168:176], self.a[160:168], + self.a[216:224], self.a[208:216], self.a[200:208], self.a[192:200], + self.a[248:256], self.a[240:248], self.a[232:240], self.a[224:232])), + # BREV64 + 3: self.q.eq(Cat(self.a[ 56: 64], self.a[ 48: 56], self.a[ 40: 48], self.a[ 32: 40], self.a[ 24: 32], self.a[ 16: 24], self.a[ 8: 16], self.a[ 0: 8], + self.a[120:128], self.a[112:120], self.a[104:112], self.a[ 96:104], self.a[ 88: 96], self.a[ 80: 88], self.a[ 72: 80], self.a[ 64: 72], + self.a[184:192], self.a[176:184], self.a[168:176], self.a[160:168], self.a[152:160], self.a[144:152], self.a[136:144], self.a[128:136], + self.a[248:256], self.a[240:248], self.a[232:240], self.a[224:232], self.a[216:224], self.a[208:216], self.a[200:208], self.a[192:200])), }) ) ] @@ -1543,7 +1578,7 @@ class ExecAES(ExecUnit, AutoDoc): assert(width == 256) # fixme nlane = width // 128 - aes_buf = Signal((nlane-1) * 128) ## width must be a multiple of 128... + aes_buf = Signal(nlane * 128) ## width must be a multiple of 128... lanec = Signal(log2_int(nlane, False)) assert(nlane == 2) ## fixme @@ -1560,120 +1595,278 @@ class ExecAES(ExecUnit, AutoDoc): self.submodules.seq = seq = ClockDomainsRenamer("eng_clk")(FSM(reset_state="IDLE")) seq.act("IDLE", If(self.start, + NextValue(lanec, 0), Case(self.instruction.immediate[0:2], { - 0x0: [ aes_in[0].eq(self.a[ 0: 8]), aes_in[1].eq(self.a[ 32: 40]), aes_in[2].eq(self.a[ 64: 72]), aes_in[3].eq(self.a[ 96:104]) ], - 0x1: [ aes_in[3].eq(self.a[ 8: 16]), aes_in[0].eq(self.a[ 40: 48]), aes_in[1].eq(self.a[ 72: 80]), aes_in[2].eq(self.a[104:112]) ], - 0x2: [ aes_in[2].eq(self.a[ 16: 24]), aes_in[3].eq(self.a[ 48: 56]), aes_in[0].eq(self.a[ 80: 88]), aes_in[1].eq(self.a[112:120]) ], - 0x3: [ aes_in[1].eq(self.a[ 24: 32]), aes_in[2].eq(self.a[ 56: 64]), aes_in[3].eq(self.a[ 88: 96]), aes_in[0].eq(self.a[120:128]) ], + 0x0: [ NextValue(aes_in[0], self.a[ 0: 8]), NextValue(aes_in[1], self.a[ 32: 40]), NextValue(aes_in[2], self.a[ 64: 72]), NextValue(aes_in[3], self.a[ 96:104]) ], + 0x1: [ NextValue(aes_in[3], self.a[ 8: 16]), NextValue(aes_in[0], self.a[ 40: 48]), NextValue(aes_in[1], self.a[ 72: 80]), NextValue(aes_in[2], self.a[104:112]) ], + 0x2: [ NextValue(aes_in[2], self.a[ 16: 24]), NextValue(aes_in[3], self.a[ 48: 56]), NextValue(aes_in[0], self.a[ 80: 88]), NextValue(aes_in[1], self.a[112:120]) ], + 0x3: [ NextValue(aes_in[1], self.a[ 24: 32]), NextValue(aes_in[2], self.a[ 56: 64]), NextValue(aes_in[3], self.a[ 88: 96]), NextValue(aes_in[0], self.a[120:128]) ], }), NextState("NEXT"))) seq.act("NEXT", + Case(self.instruction.immediate[0:2], { + 0x0: [ NextValue(aes_in[0], self.a[128:136]), NextValue(aes_in[1], self.a[160:168]), NextValue(aes_in[2], self.a[192:200]), NextValue(aes_in[3], self.a[224:232]) ], + 0x1: [ NextValue(aes_in[3], self.a[136:144]), NextValue(aes_in[0], self.a[168:176]), NextValue(aes_in[1], self.a[200:208]), NextValue(aes_in[2], self.a[232:240]) ], + 0x2: [ NextValue(aes_in[2], self.a[144:152]), NextValue(aes_in[3], self.a[176:184]), NextValue(aes_in[0], self.a[208:216]), NextValue(aes_in[1], self.a[240:248]) ], + 0x3: [ NextValue(aes_in[1], self.a[152:160]), NextValue(aes_in[2], self.a[184:192]), NextValue(aes_in[3], self.a[216:224]), NextValue(aes_in[0], self.a[248:256]) ], + }), + NextState("WRITE")) + seq.act("WRITE", Case(lanec, { - 0: [ Case(self.instruction.immediate[0:2], { - 0x0: [ aes_in[0].eq(self.a[128:136]), aes_in[1].eq(self.a[160:168]), aes_in[2].eq(self.a[192:200]), aes_in[3].eq(self.a[224:232]) ], - 0x1: [ aes_in[3].eq(self.a[136:144]), aes_in[0].eq(self.a[168:176]), aes_in[1].eq(self.a[200:208]), aes_in[2].eq(self.a[232:240]) ], - 0x2: [ aes_in[2].eq(self.a[144:152]), aes_in[3].eq(self.a[176:184]), aes_in[0].eq(self.a[208:216]), aes_in[1].eq(self.a[240:248]) ], - 0x3: [ aes_in[1].eq(self.a[152:160]), aes_in[2].eq(self.a[184:192]), aes_in[3].eq(self.a[216:224]), aes_in[0].eq(self.a[248:256]) ], - }), - Case(self.instruction.immediate[2:3], { - 0: Case(self.instruction.immediate[0:2], { - 0x0: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 0:16], aes_out[0][ 8:24], - aes_out[1][ 0:16], aes_out[1][ 8:24], - aes_out[2][ 0:16], aes_out[2][ 8:24], - aes_out[3][ 0:16], aes_out[3][ 8:24])), - ], - 0x1: [ NextValue(aes_buf[0:128], Cat(aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16], - aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16], - aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16], - aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])), - ], - 0x2: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:24], aes_out[0][ 0:16], - aes_out[1][ 8:24], aes_out[1][ 0:16], - aes_out[2][ 8:24], aes_out[2][ 0:16], - aes_out[3][ 8:24], aes_out[3][ 0:16])), - ], - 0x3: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8], - aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8], - aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8], - aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])), - ], - }), - 1: Case(self.instruction.immediate[0:2], { - 0x0: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:16], Signal(24, reset = 0), - aes_out[1][ 8:16], Signal(24, reset = 0), - aes_out[2][ 8:16], Signal(24, reset = 0), - aes_out[3][ 8:16], Signal(24, reset = 0))), - ], - 0x1: [ NextValue(aes_buf[0:128], Cat(Signal(8, reset = 0), aes_out[0][ 8:16], Signal(16, reset = 0), - Signal(8, reset = 0), aes_out[1][ 8:16], Signal(16, reset = 0), - Signal(8, reset = 0), aes_out[2][ 8:16], Signal(16, reset = 0), - Signal(8, reset = 0), aes_out[3][ 8:16], Signal(16, reset = 0))), - ], - 0x2: [ NextValue(aes_buf[0:128], Cat(Signal(16, reset = 0), aes_out[0][ 8:16], Signal(8, reset = 0), - Signal(16, reset = 0), aes_out[1][ 8:16], Signal(8, reset = 0), - Signal(16, reset = 0), aes_out[2][ 8:16], Signal(8, reset = 0), - Signal(16, reset = 0), aes_out[3][ 8:16], Signal(8, reset = 0))), - ], - 0x3: [ NextValue(aes_buf[0:128], Cat(Signal(24, reset = 0), aes_out[0][ 8:16], - Signal(24, reset = 0), aes_out[1][ 8:16], - Signal(24, reset = 0), aes_out[2][ 8:16], - Signal(24, reset = 0), aes_out[3][ 8:16])), - ], - }), - }), - NextValue(lanec, 1), - ], - 1: [ self.q_valid.eq(1), - Case(self.instruction.immediate[2:3], { - 0: Case(self.instruction.immediate[0:2], { - 0x0: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 0:16], aes_out[0][ 8:24], - aes_out[1][ 0:16], aes_out[1][ 8:24], - aes_out[2][ 0:16], aes_out[2][ 8:24], - aes_out[3][ 0:16], aes_out[3][ 8:24])), - ], - 0x1: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16], - aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16], - aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16], - aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])), - ], - 0x2: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 8:24], aes_out[0][ 0:16], - aes_out[1][ 8:24], aes_out[1][ 0:16], - aes_out[2][ 8:24], aes_out[2][ 0:16], - aes_out[3][ 8:24], aes_out[3][ 0:16])), - ], - 0x3: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8], - aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8], - aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8], - aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])), - ], - }), - 1: Case(self.instruction.immediate[0:2], { - 0x0: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 8:16], Signal(24, reset = 0), - aes_out[1][ 8:16], Signal(24, reset = 0), - aes_out[2][ 8:16], Signal(24, reset = 0), - aes_out[3][ 8:16], Signal(24, reset = 0))), - ], - 0x1: [ self.q.eq(self.b ^ Cat(aes_buf, Signal(8, reset = 0), aes_out[0][ 8:16], Signal(16, reset = 0), - Signal(8, reset = 0), aes_out[1][ 8:16], Signal(16, reset = 0), - Signal(8, reset = 0), aes_out[2][ 8:16], Signal(16, reset = 0), - Signal(8, reset = 0), aes_out[3][ 8:16], Signal(16, reset = 0))), - ], - 0x2: [ self.q.eq(self.b ^ Cat(aes_buf, Signal(16, reset = 0), aes_out[0][ 8:16], Signal(8, reset = 0), - Signal(16, reset = 0), aes_out[1][ 8:16], Signal(8, reset = 0), - Signal(16, reset = 0), aes_out[2][ 8:16], Signal(8, reset = 0), - Signal(16, reset = 0), aes_out[3][ 8:16], Signal(8, reset = 0))), - ], - 0x3: [ self.q.eq(self.b ^ Cat(aes_buf, Signal(24, reset = 0), aes_out[0][ 8:16], - Signal(24, reset = 0), aes_out[1][ 8:16], - Signal(24, reset = 0), aes_out[2][ 8:16], - Signal(24, reset = 0), aes_out[3][ 8:16])), - ], - }), - }), - NextValue(lanec, 0), - NextState("IDLE") + 0: [ Case(self.instruction.immediate[2:3], { + 0: Case(self.instruction.immediate[0:2], { + 0x0: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 0:16], aes_out[0][ 8:24], + aes_out[1][ 0:16], aes_out[1][ 8:24], + aes_out[2][ 0:16], aes_out[2][ 8:24], + aes_out[3][ 0:16], aes_out[3][ 8:24])), + ], + 0x1: [ NextValue(aes_buf[0:128], Cat(aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16], + aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16], + aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16], + aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])), + ], + 0x2: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:24], aes_out[0][ 0:16], + aes_out[1][ 8:24], aes_out[1][ 0:16], + aes_out[2][ 8:24], aes_out[2][ 0:16], + aes_out[3][ 8:24], aes_out[3][ 0:16])), + ], + 0x3: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8], + aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8], + aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8], + aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])), + ], + }), + 1: Case(self.instruction.immediate[0:2], { + 0x0: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:16], Signal(24, reset = 0), + aes_out[1][ 8:16], Signal(24, reset = 0), + aes_out[2][ 8:16], Signal(24, reset = 0), + aes_out[3][ 8:16], Signal(24, reset = 0))), + ], + 0x1: [ NextValue(aes_buf[0:128], Cat(Signal(8, reset = 0), aes_out[0][ 8:16], Signal(16, reset = 0), + Signal(8, reset = 0), aes_out[1][ 8:16], Signal(16, reset = 0), + Signal(8, reset = 0), aes_out[2][ 8:16], Signal(16, reset = 0), + Signal(8, reset = 0), aes_out[3][ 8:16], Signal(16, reset = 0))), + ], + 0x2: [ NextValue(aes_buf[0:128], Cat(Signal(16, reset = 0), aes_out[0][ 8:16], Signal(8, reset = 0), + Signal(16, reset = 0), aes_out[1][ 8:16], Signal(8, reset = 0), + Signal(16, reset = 0), aes_out[2][ 8:16], Signal(8, reset = 0), + Signal(16, reset = 0), aes_out[3][ 8:16], Signal(8, reset = 0))), + ], + 0x3: [ NextValue(aes_buf[0:128], Cat(Signal(24, reset = 0), aes_out[0][ 8:16], + Signal(24, reset = 0), aes_out[1][ 8:16], + Signal(24, reset = 0), aes_out[2][ 8:16], + Signal(24, reset = 0), aes_out[3][ 8:16])), + ], + }), + }), + NextValue(lanec, 1)], + 1: [ Case(self.instruction.immediate[2:3], { + 0: Case(self.instruction.immediate[0:2], { + 0x0: [ NextValue(aes_buf[128:256], Cat(aes_out[0][ 0:16], aes_out[0][ 8:24], + aes_out[1][ 0:16], aes_out[1][ 8:24], + aes_out[2][ 0:16], aes_out[2][ 8:24], + aes_out[3][ 0:16], aes_out[3][ 8:24])), + ], + 0x1: [ NextValue(aes_buf[128:256], Cat(aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16], + aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16], + aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16], + aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])), + ], + 0x2: [ NextValue(aes_buf[128:256], Cat(aes_out[0][ 8:24], aes_out[0][ 0:16], + aes_out[1][ 8:24], aes_out[1][ 0:16], + aes_out[2][ 8:24], aes_out[2][ 0:16], + aes_out[3][ 8:24], aes_out[3][ 0:16])), + ], + 0x3: [ NextValue(aes_buf[128:256], Cat(aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8], + aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8], + aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8], + aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])), + ], + }), + 1: Case(self.instruction.immediate[0:2], { + 0x0: [ NextValue(aes_buf[128:256], Cat(aes_out[0][ 8:16], Signal(24, reset = 0), + aes_out[1][ 8:16], Signal(24, reset = 0), + aes_out[2][ 8:16], Signal(24, reset = 0), + aes_out[3][ 8:16], Signal(24, reset = 0))), + ], + 0x1: [ NextValue(aes_buf[128:256], Cat(Signal(8, reset = 0), aes_out[0][ 8:16], Signal(16, reset = 0), + Signal(8, reset = 0), aes_out[1][ 8:16], Signal(16, reset = 0), + Signal(8, reset = 0), aes_out[2][ 8:16], Signal(16, reset = 0), + Signal(8, reset = 0), aes_out[3][ 8:16], Signal(16, reset = 0))), + ], + 0x2: [ NextValue(aes_buf[128:256], Cat(Signal(16, reset = 0), aes_out[0][ 8:16], Signal(8, reset = 0), + Signal(16, reset = 0), aes_out[1][ 8:16], Signal(8, reset = 0), + Signal(16, reset = 0), aes_out[2][ 8:16], Signal(8, reset = 0), + Signal(16, reset = 0), aes_out[3][ 8:16], Signal(8, reset = 0))), + ], + 0x3: [ NextValue(aes_buf[128:256], Cat(Signal(24, reset = 0), aes_out[0][ 8:16], + Signal(24, reset = 0), aes_out[1][ 8:16], + Signal(24, reset = 0), aes_out[2][ 8:16], + Signal(24, reset = 0), aes_out[3][ 8:16])), + ], + }), + }), + NextState("OUT") ], })) + seq.act("OUT", + self.q_valid.eq(1), + self.q.eq(self.b ^ aes_buf), + NextState("IDLE")) + +class ExecLS(ExecUnit, AutoDoc): + def __init__(self, width=256, interface=None): + ExecUnit.__init__(self, width, ["MEM"]) + + self.notes = ModuleDoc(title=f"Load/Store ExecUnit Subclass", body=f""" + """) + + self.sync.eng_clk += [ # pipeline the instruction + self.instruction_out.eq(self.instruction_in), + ] + + assert(width == 256) # fixme + assert(len(interface.sel) == 16) # 128 bits Wishbone + + start_pipe = Signal() + self.sync.mul_clk += start_pipe.eq(self.start) # break critical path of instruction decode -> SETUP_A state muxes + self.submodules.lsseq = lsseq = ClockDomainsRenamer("mul_clk")(FSM(reset_state="IDLE")) + cpar = Signal() # to keep track of the odd-ness of our cycle, so we can align 2 mul_clk cycles of output on 1 eng_clk cycle + lbuf = Signal(width) + timeout = Signal(11) + #tries = Signal() + self.has_failure = Signal(2) + self.has_timeout = Signal(2) + + self.sync.mul_clk += If(timeout > 0, timeout.eq(timeout - 1)) + + lsseq.act("IDLE", + If(start_pipe, + #NextValue(lbuf, 0xF00FF00F_0FF00FF0_F00FF00F_0FF00FF0_F00FF00F_0FF00FF0_F00FF00F_0FF00FF0), + NextValue(cpar, 0), + NextValue(self.has_timeout, 0), + NextValue(self.has_failure, 0), + NextValue(interface.cyc, 1), + NextValue(interface.stb, 1), + NextValue(interface.sel, 2**len(interface.sel)-1), + NextValue(interface.adr, self.a[4:32]), + NextValue(interface.we, self.instruction.immediate[0]), + NextValue(timeout, 2047), + If(self.instruction.immediate[0], # do we need those tests or could we always update dat_w/dat_r ? + NextValue(interface.dat_w, self.b[0:128])), + NextState("MEMl") # MEMl + ) + ) + lsseq.act("MEMl", + NextValue(cpar, cpar ^ 1), + If(interface.ack, + If(~self.instruction.immediate[0], + NextValue(lbuf[0:128], interface.dat_r)), + NextValue(interface.cyc, 0), + NextValue(interface.stb, 0), + NextState("MEMl2") + ).Elif(interface.err, + NextValue(self.has_failure[0], 1), + NextValue(interface.cyc, 0), + NextValue(interface.stb, 0), + NextState("ERR"), + ).Elif(timeout == 0, + NextValue(self.has_timeout[0], 1), + NextValue(interface.cyc, 0), + NextValue(interface.stb, 0), + NextState("ERR"), + )) + lsseq.act("MEMl2", + NextValue(cpar, cpar ^ 1), + If(~interface.ack, + NextValue(interface.cyc, 1), + NextValue(interface.stb, 1), + NextValue(interface.sel, 2**len(interface.sel)-1), + NextValue(interface.adr, self.a[132:160]), + NextValue(interface.we, self.instruction.immediate[0]), + NextValue(timeout, 2047), + If(self.instruction.immediate[0], + NextValue(interface.dat_w, self.b[128:256])), + NextState("MEMh") + )) + lsseq.act("MEMh", + NextValue(cpar, cpar ^ 1), + If(interface.ack, + If(~self.instruction.immediate[0], + NextValue(lbuf[128:256], interface.dat_r)), + NextValue(interface.cyc, 0), + NextValue(interface.stb, 0), + NextState("MEMh2") + ).Elif(interface.err, + NextValue(self.has_failure[1], 1), + NextValue(interface.cyc, 0), + NextValue(interface.stb, 0), + NextState("ERR"), + ).Elif(timeout == 0, + NextValue(self.has_timeout[1], 1), + NextValue(interface.cyc, 0), + NextValue(interface.stb, 0), + NextState("ERR"), + )) + lsseq.act("MEMh2", + NextValue(cpar, cpar ^ 1), + If(~interface.ack, + #NextValue(tries, 0), + If(cpar, ## checkme + NextState("MEM_ODD") + ).Else( + NextState("MEM_EVEN1") + ) + )) + lsseq.act("MEM_ODD", # clock alignement cycle + NextState("MEM_EVEN1")) + lsseq.act("MEM_EVEN1", + NextState("MEM_EVEN2")) + lsseq.act("MEM_EVEN2", + NextValue(cpar, 0), + NextValue(self.has_failure, 0), + NextValue(self.has_timeout, 0), + NextState("IDLE")) + lsseq.act("ERR", + #If(~tries, # second attempt + # NextValue(cpar, 0), + # NextValue(tries, 1), + # NextState("IDLE") + #).Else(NextValue(tries, 0), # no third attempt, give up + If(cpar, ## checkme + NextState("MEM_ODD") + ).Else( + NextState("MEM_EVEN1") + ) + #) + ) + self.sync.mul_clk += [ + If(lsseq.ongoing("MEM_EVEN1") | lsseq.ongoing("MEM_EVEN2"), + self.q_valid.eq(1), + If(~self.instruction.immediate[0], + self.q.eq(lbuf), + ).Else( + # self.q.eq(Cat((self.a[0:32] + 16)[0:32], self.a[32:128], + # (self.a[128:160] + 16)[0:32], self.a[160:256])), + self.q.eq(self.a), + ), + ).Else( + self.q_valid.eq(0), + ) + ] + + self.state = Signal(32) + self.sync.mul_clk += self.state[0].eq(lsseq.ongoing("IDLE")) + self.sync.mul_clk += self.state[1].eq(lsseq.ongoing("MEMl")) + self.sync.mul_clk += self.state[2].eq(lsseq.ongoing("MEMl2")) + self.sync.mul_clk += self.state[3].eq(lsseq.ongoing("MEMh")) + self.sync.mul_clk += self.state[4].eq(lsseq.ongoing("MEMh2")) + self.sync.mul_clk += self.state[5].eq(lsseq.ongoing("MEM_ODD")) + self.sync.mul_clk += self.state[6].eq(lsseq.ongoing("MEM_EVEN1")) + self.sync.mul_clk += self.state[7].eq(lsseq.ongoing("MEM_EVEN2")) + self.sync.mul_clk += self.state[8].eq(lsseq.ongoing("MEM_ERR")) + self.sync.mul_clk += self.state[28:30].eq((self.state[28:30] & Replicate(~start_pipe, 2)) | self.has_timeout) + self.sync.mul_clk += self.state[30:32].eq((self.state[30:32] & Replicate(~start_pipe, 2)) | self.has_failure) class Engine(Module, AutoCSR, AutoDoc): @@ -1764,6 +1957,7 @@ Here are the currently implemented opcodes for The Engine: instruction = Record(instruction_layout) # current instruction to execute illegal_opcode = Signal() + abort = Signal(); ### register file rf_depth_raw = 512 @@ -1824,6 +2018,7 @@ Here are the currently implemented opcodes for The Engine: CSRField("mpc", size=log2_int(microcode_depth), description="Current location of the microcode program counter. Mostly for debug."), CSRField("pause_gnt", size=1, description="When set, the engine execution has been paused, and the RF & microcode ROM can be read out for suspend/resume"), CSRField("sigill", size=1, description="Illegal Instruction"), + CSRField("abort", size=1, description="Abort from failure"), CSRField("finished", size=1, description="Finished"), ]) pause_gnt = Signal() @@ -1834,6 +2029,7 @@ Here are the currently implemented opcodes for The Engine: self.status.fields.pause_gnt.eq(pause_gnt), self.status.fields.mpc.eq(mpc), self.status.fields.sigill.eq(illegal_opcode), + self.status.fields.abort.eq(abort), self.status.fields.finished.eq(((~running & running_r) | self.status.fields.finished) & (~(running & ~running_r))), ] @@ -1874,6 +2070,8 @@ Here are the currently implemented opcodes for The Engine: self.instruction.status.eq(micro_runport.dat_r) ] + self.ls_status = CSRStatus(32, description="Status of the L/S unit") + ### wishbone bus interface: decode the two address spaces and dispatch accordingly self.bus = bus = wishbone.Interface() wdata = Signal(32) @@ -2120,7 +2318,10 @@ Here are the currently implemented opcodes for The Engine: NextValue(running, 0), ) ).Else( - If(mpc < mpc_stop, + If(abort, + NextState("IDLE"), + NextValue(running, 0), + ).Elif(mpc < mpc_stop, NextState("FETCH"), NextValue(mpc, mpc + 1), ).Else( @@ -2136,6 +2337,7 @@ Here are the currently implemented opcodes for The Engine: ) ) + self.busls = wishbone.Interface(data_width = 128, adr_width = 28) exec_units = { "exec_mask" : ExecMask(width=rf_width_raw), "exec_logic" : ExecLogic(width=rf_width_raw), @@ -2145,6 +2347,7 @@ Here are the currently implemented opcodes for The Engine: "exec_clmul" : ExecClmul(width=rf_width_raw), "exec_gcmshifts" : ExecGCMShifts(width=rf_width_raw), "exec_aes" : ExecAES(width=rf_width_raw), + "exec_ls" : ExecLS(width=rf_width_raw,interface=self.busls) } index = 0 for name, unit in exec_units.items(): @@ -2190,6 +2393,9 @@ Here are the currently implemented opcodes for The Engine: self.comb += [ rf_write.eq(done), ] + + self.sync += abort.eq((abort & ~engine_go) | (self.exec_ls.has_failure[0] | self.exec_ls.has_failure[1] | self.exec_ls.has_timeout[0] | self.exec_ls.has_timeout[1])) + self.comb += self.ls_status.status.eq(self.exec_ls.state) ##### TIMING CONSTRAINTS -- you want these. Trust me. diff --git a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs index b563143..3f6cbd1 100644 --- a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs +++ b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs @@ -693,19 +693,25 @@ fn main() -> std::io::Result<()> { fin ); - let gcm_ad_code = assemble_engine25519!( + let gcm_pfx_code = assemble_engine25519!( start: - // Input: rkeys in %31-%17 (backward) + // Input: rkeys in %31-%17 (backward, LE) + // pub in %16 (0-11, 12-15 are ctr so 0, LE) + // RD_PTR in %3 + // ADLEN in %12 (in 16-byte-blocks) // Transient: // %0, %1, %2 are tmp - // init counter in %16 - // H will go in %15 - // T will go in %14 - psa %16, #0 + // Output: + // all inputs preserved + // H will go in %15 (byte-reverted) + // T will go in %14 + // accum (0) will go in %13 + gcm_brev32 %16, %16 // use %2 as a flag psa %2, #1 + psa %1, #0 genht: - xor %0, %16, %31 + xor %0, %1, %31 aesesmi %1, %0, %30, #0 aesesmi %1, %0, %1, #1 @@ -788,23 +794,479 @@ fn main() -> std::io::Result<()> { gcm_brev32 %16, %16 // clear flag & go encrypt t psa %2, #0 + psa %1, %16 brz genht, #0 afterht: // store T in %14 psa %14, %0 - // fully byte-revert H (first byte-in-dword, then dword-in-128bit) + // fully byte-revert H (first byte-in-dword, then dword-in-64bit) gcm_brev64 %15, %15 gcm_swap64 %15, %15, %15 + + psa %13, #0 - fin + // no fin; we fall directly into the AD code + //fin ); + let gcm_ad_code = assemble_engine25519!( + // Input: rkeys in %31-%17 (backward, LE) + // pub in %16 (0-11, 12-15 are ctr so 0, LE) + // RD_PTR in %3 + // ADLEN in %12 (in 16-byte-blocks) + // H in %15 (byte-reverted) + // T in %14 + // accum in %13 + // Transient: + // %0, %1, %4, %5, %6, %7 are tmp + // Output: + // all inputs preserved except ADLEN (%12) & RD_PTR (%3) + // Updated accum is in %13 + + // if no ad, finish + brz done, %12 + // do one block, repeat + do_ad: load %0, %3 + gcm_brev64 %0, %0 + gcm_swap64 %0, %0, %0 + + xor %0, %0, %13 + add %3, %3, #12 // #12 is 16 in both 128 bits halves + // #13 is 1 in both 128 bits halves + sub %12, %12, #13 + + // // poly mult accum = ((accum^ad) * H) + // C + clmul %4, %0, %15, #0 + // E + clmul %5, %0, %15, #1 + // F + clmul %6, %0, %15, #2 + // D + clmul %7, %0, %15, #3 + // E ^ F + xor %6, %5, %6 + // put low64 of E^F in high64 + gcm_swap64 %5, %6, #0 + // put high64 of E^F in low64 + gcm_swap64 %6, #0, %6 + // D xor low + xor %7, %7, %6 + // C xor high + xor %4, %4, %5 + + // // reduction + // X1:X0 in %4 + // X3:X2 in %7 + // shift everybody by 1 to the left + // high shifting in 1 bit from low + gcm_shlmi %1, %7, %4, #1 + // low + gcm_shlmi %0, %4, #0, #1 + // post-shift + // X1:X0 in %0 + // X3:X2 in %1 + // compute D + gcm_cmpd %2, %0 + // compute E, F, G + gcm_shrmi %6, %2, #0, #1 + gcm_shrmi %4, %2, #0, #2 + gcm_shrmi %5, %2, #0, #7 + // XOR everybody + xor %2, %2, %6 + xor %4, %4, %5 + xor %2, %2, %4 + xor %13, %2, %1 + + brz done, %12 + brz do_ad, #0 + + done: + fin + ); + let gcm_aes_code = assemble_engine25519!( + // pub in %16 (0-11, 12-15 are ctr so 0, LE) + // RD_PTR in %3 + // WR_PTR in %11 + // MLEN in %12 (in *complete* 16-byte-blocks) + // H in %15 (byte-reverted) + // T in %14 + // accum in %13 + // Transient: + // %0, %1, %4, %5, %6, %7 are tmp + // Output: + // all inputs preserved except RD_PTR (%3), WR_PTR (%11), MLEN (%12) + // accum is in %13 + + // if no msg, finish + brz done, %12 + // do one block, repeat + do_msg: + // increment counter + gcm_brev32 %16, %16 + add %16, %16, #11 + gcm_brev32 %16, %16 + + xor %0, %16, %31 + + aesesmi %1, %0, %30, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %29, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %28, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %27, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %26, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %25, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %24, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %23, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %22, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %21, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %20, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %19, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %18, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesi %0, %1, %17, #0 + aesesi %0, %1, %0, #1 + aesesi %0, %1, %0, #2 + aesesi %1, %1, %0, #3 + + //gcm_brev64 %1, %0 + //gcm_swap64 %1, %1, %1 + + load %0, %3 + xor %0, %0, %1 + store %11, %11, %0 + + gcm_brev64 %0, %0 + gcm_swap64 %0, %0, %0 + + xor %0, %0, %13 + add %3, %3, #12 // #12 is 16 in both 128 bits halves + add %11, %11, #12 // #12 is 16 in both 128 bits halves + // #13 is 1 in both 128 bits halves + sub %12, %12, #13 + + // // poly mult accum = ((accum^ad) * H) + // C + clmul %4, %0, %15, #0 + // E + clmul %5, %0, %15, #1 + // F + clmul %6, %0, %15, #2 + // D + clmul %7, %0, %15, #3 + // E ^ F + xor %6, %5, %6 + // put low64 of E^F in high64 + gcm_swap64 %5, %6, #0 + // put high64 of E^F in low64 + gcm_swap64 %6, #0, %6 + // D xor low + xor %7, %7, %6 + // C xor high + xor %4, %4, %5 + + // // reduction + // X1:X0 in %4 + // X3:X2 in %7 + // shift everybody by 1 to the left + // high shifting in 1 bit from low + gcm_shlmi %1, %7, %4, #1 + // low + gcm_shlmi %0, %4, #0, #1 + // post-shift + // X1:X0 in %0 + // X3:X2 in %1 + // compute D + gcm_cmpd %2, %0 + // compute E, F, G + gcm_shrmi %6, %2, #0, #1 + gcm_shrmi %4, %2, #0, #2 + gcm_shrmi %5, %2, #0, #7 + // XOR everybody + xor %2, %2, %6 + xor %4, %4, %5 + xor %2, %2, %4 + xor %13, %2, %1 + + brz done, %12 + brz do_msg, #0 + done: + fin + + ); + let gcm_finish_code = assemble_engine25519!( + // pub in %16 (0-11, 12-15 are ctr so 0, LE) + // RD_PTR in %3 + // WR_PTR in %11 + // MLEN in %12 (do one *partial* 16-byte-blocks, so 0 or non-zero) + // MMASK in %10 (could be computed from MLEN%16 but we don't have an instruction for it yet) + // finalblock in %9 (could be computed but we'd need to know the exact value of adlen) + // H in %15 (byte-reverted) + // T in %14 + // accum in %13 + // Transient: + // %0, %1, %4, %5, %6, %7 are tmp + // Output: + // all inputs preserved except RD_PTR (%3), WR_PTR (%11), MLEN (%12) + // accum is in %13 + // accum ^ T is in %8 + brz last, %12 + + finish_mlen: + // increment counter + gcm_brev32 %16, %16 + add %16, %16, #11 + gcm_brev32 %16, %16 + + xor %0, %16, %31 + + aesesmi %1, %0, %30, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %29, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %28, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %27, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %26, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %25, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %24, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %23, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %22, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %21, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %20, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesmi %0, %1, %19, #0 + aesesmi %0, %1, %0, #1 + aesesmi %0, %1, %0, #2 + aesesmi %0, %1, %0, #3 + + aesesmi %1, %0, %18, #0 + aesesmi %1, %0, %1, #1 + aesesmi %1, %0, %1, #2 + aesesmi %1, %0, %1, #3 + + aesesi %0, %1, %17, #0 + aesesi %0, %1, %0, #1 + aesesi %0, %1, %0, #2 + aesesi %1, %1, %0, #3 + + //gcm_brev64 %1, %0 + //gcm_swap64 %1, %1, %1 + + and %1, %1, %10 + load %0, %3 + xor %0, %0, %1 + + store %11, %11, %0 + + gcm_brev64 %0, %0 + gcm_swap64 %0, %0, %0 + + xor %0, %0, %13 + //add %3, %3, #12 // #12 is 16 in both 128 bits halves + //add %11, %11, #12 // #12 is 16 in both 128 bits halves + // #13 is 1 in both 128 bits halves + //sub %12, %12, #13 + + // // poly mult accum = ((accum^ad) * H) + // C + clmul %4, %0, %15, #0 + // E + clmul %5, %0, %15, #1 + // F + clmul %6, %0, %15, #2 + // D + clmul %7, %0, %15, #3 + // E ^ F + xor %6, %5, %6 + // put low64 of E^F in high64 + gcm_swap64 %5, %6, #0 + // put high64 of E^F in low64 + gcm_swap64 %6, #0, %6 + // D xor low + xor %7, %7, %6 + // C xor high + xor %4, %4, %5 + + // // reduction + // X1:X0 in %4 + // X3:X2 in %7 + // shift everybody by 1 to the left + // high shifting in 1 bit from low + gcm_shlmi %1, %7, %4, #1 + // low + gcm_shlmi %0, %4, #0, #1 + // post-shift + // X1:X0 in %0 + // X3:X2 in %1 + // compute D + gcm_cmpd %2, %0 + // compute E, F, G + gcm_shrmi %6, %2, #0, #1 + gcm_shrmi %4, %2, #0, #2 + gcm_shrmi %5, %2, #0, #7 + // XOR everybody + xor %2, %2, %6 + xor %4, %4, %5 + xor %2, %2, %4 + xor %13, %2, %1 + last: + // addmul of finalblock + + gcm_brev64 %9, %9 + gcm_swap64 %9, %9, %9 + xor %0, %9, %13 + //add %3, %3, #12 // #12 is 16 in both 128 bits halves + //add %11, %11, #12 // #12 is 16 in both 128 bits halves + // #13 is 1 in both 128 bits halves + //sub %12, %12, #13 + + // // poly mult accum = ((accum^ad) * H) + // C + clmul %4, %0, %15, #0 + // E + clmul %5, %0, %15, #1 + // F + clmul %6, %0, %15, #2 + // D + clmul %7, %0, %15, #3 + // E ^ F + xor %6, %5, %6 + // put low64 of E^F in high64 + gcm_swap64 %5, %6, #0 + // put high64 of E^F in low64 + gcm_swap64 %6, #0, %6 + // D xor low + xor %7, %7, %6 + // C xor high + xor %4, %4, %5 + + // // reduction + // X1:X0 in %4 + // X3:X2 in %7 + // shift everybody by 1 to the left + // high shifting in 1 bit from low + gcm_shlmi %1, %7, %4, #1 + // low + gcm_shlmi %0, %4, #0, #1 + // post-shift + // X1:X0 in %0 + // X3:X2 in %1 + // compute D + gcm_cmpd %2, %0 + // compute E, F, G + gcm_shrmi %6, %2, #0, #1 + gcm_shrmi %4, %2, #0, #2 + gcm_shrmi %5, %2, #0, #7 + // XOR everybody + xor %2, %2, %6 + xor %4, %4, %5 + xor %2, %2, %4 + xor %13, %2, %1 + + gcm_brev64 %13, %13 + gcm_swap64 %13, %13, %13 + + xor %8, %13, %14 + + fin + ); let mut pos = 0; - while pos < gcm_ad_code.len() { - println!("0x{:08x},", gcm_ad_code[pos]); + while pos < gcm_finish_code.len() { + println!("0x{:08x},", gcm_finish_code[pos]); pos = pos + 1; } Ok(()) diff --git a/sbus-to-ztex-gateware-migen/netbsd_csr.h b/sbus-to-ztex-gateware-migen/netbsd_csr.h index 277fb33..01b3798 100644 --- a/sbus-to-ztex-gateware-migen/netbsd_csr.h +++ b/sbus-to-ztex-gateware-migen/netbsd_csr.h @@ -1,5 +1,5 @@ //-------------------------------------------------------------------------------- -// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-08-22 07:40:46 +// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-09-03 09:40:05 //-------------------------------------------------------------------------------- #ifndef __GENERATED_CSR_H #define __GENERATED_CSR_H @@ -237,11 +237,21 @@ static inline uint32_t curve25519engine_status_sigill_read(struct sbusfpga_curve uint32_t word = curve25519engine_status_read(sc); return curve25519engine_status_sigill_extract(sc, word); } -#define CSR_CURVE25519ENGINE_STATUS_FINISHED_OFFSET 13 +#define CSR_CURVE25519ENGINE_STATUS_ABORT_OFFSET 13 +#define CSR_CURVE25519ENGINE_STATUS_ABORT_SIZE 1 +static inline uint32_t curve25519engine_status_abort_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { + uint32_t mask = ((1 << 1)-1); + return ( (oldword >> 13) & mask ); +} +static inline uint32_t curve25519engine_status_abort_read(struct sbusfpga_curve25519engine_softc *sc) { + uint32_t word = curve25519engine_status_read(sc); + return curve25519engine_status_abort_extract(sc, word); +} +#define CSR_CURVE25519ENGINE_STATUS_FINISHED_OFFSET 14 #define CSR_CURVE25519ENGINE_STATUS_FINISHED_SIZE 1 static inline uint32_t curve25519engine_status_finished_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) { uint32_t mask = ((1 << 1)-1); - return ( (oldword >> 13) & mask ); + return ( (oldword >> 14) & mask ); } static inline uint32_t curve25519engine_status_finished_read(struct sbusfpga_curve25519engine_softc *sc) { uint32_t word = curve25519engine_status_read(sc); @@ -439,6 +449,11 @@ static inline uint32_t curve25519engine_instruction_immediate_read(struct sbusfp uint32_t word = curve25519engine_instruction_read(sc); return curve25519engine_instruction_immediate_extract(sc, word); } +#define CSR_CURVE25519ENGINE_LS_STATUS_ADDR (CSR_CURVE25519ENGINE_BASE + 0x2cL) +#define CSR_CURVE25519ENGINE_LS_STATUS_SIZE 1 +static inline uint32_t curve25519engine_ls_status_read(struct sbusfpga_curve25519engine_softc *sc) { + return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x2cL); +} #endif // CSR_CURVE25519ENGINE_BASE /* ddrphy */ diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index ebe6ba2..b574d74 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -139,9 +139,9 @@ class _CRG(Module): self.submodules.idelayctrl = S7IDELAYCTRL(self.cd_idelay) class SBusFPGA(SoCCore): - def __init__(self, **kwargs): - self.version = "V1.0"; - + def __init__(self, version, **kwargs): + print(f"Building SBusFPGA for board version {version}") + kwargs["cpu_type"] = "None" kwargs["integrated_sram_size"] = 0 kwargs["with_uart"] = False @@ -149,9 +149,9 @@ class SBusFPGA(SoCCore): self.sys_clk_freq = sys_clk_freq = 100e6 ## 25e6 - self.platform = platform = ztex213_sbus.Platform(variant="ztex2.13a", version = self.version) + self.platform = platform = ztex213_sbus.Platform(variant="ztex2.13a", version = version) - if (self.version == "V1.0"): + if (version == "V1.0"): self.platform.add_extension(ztex213_sbus._usb_io_v1_0) SoCCore.__init__(self, @@ -183,7 +183,7 @@ class SBusFPGA(SoCCore): self.submodules.crg = _CRG(platform=platform, sys_clk_freq=sys_clk_freq) self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) # SBus max - if (self.version == "V1.0"): + if (version == "V1.0"): self.submodules.leds = LedChaser( pads = platform.request("SBUS_DATA_OE_LED_2"), #platform.request("user_led", 7), sys_clk_freq = sys_clk_freq) @@ -299,6 +299,7 @@ class SBusFPGA(SoCCore): #self.submodules.curve25519engine_wishbone_cdc = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=self.curve25519engine.bus, cd_master="sys", cd_slave="clk100") #self.bus.add_slave("curve25519engine", self.curve25519engine_wishbone_cdc, SoCRegion(origin=self.mem_map.get("curve25519engine", None), size=0x20000, cached=False)) self.bus.add_slave("curve25519engine", self.curve25519engine.bus, SoCRegion(origin=self.mem_map.get("curve25519engine", None), size=0x20000, cached=False)) + self.bus.add_master(name="curve25519engineLS", master=self.curve25519engine.busls) #self.submodules.curve25519_on_sync = BusSynchronizer(width = 1, idomain = "clk100", odomain = "sys") #self.comb += self.curve25519_on_sync.i.eq(self.curve25519engine.power.fields.on) #self.comb += self.crg.curve25519_on.eq(self.curve25519_on_sync.o) @@ -307,17 +308,20 @@ class SBusFPGA(SoCCore): def main(): parser = argparse.ArgumentParser(description="SbusFPGA") parser.add_argument("--build", action="store_true", help="Build bitstream") + parser.add_argument("--version", default="V1.0", help="SBusFPGA board version (default V1.0)") builder_args(parser) vivado_build_args(parser) args = parser.parse_args() - soc = SBusFPGA(**soc_core_argdict(args)) + soc = SBusFPGA(**soc_core_argdict(args), + version=args.version) #soc.add_uart(name="uart", baudrate=115200, fifo_depth=16) builder = Builder(soc, **builder_argdict(args)) builder.build(**vivado_build_argdict(args), run=args.build) # Generate modified CSR registers definitions/access functions to netbsd_csr.h. + # should be split per-device (and without base) to still work if we have identical devices in different configurations on multiple boards csr_contents = sbus_to_fpga_export.get_csr_header( regions = soc.csr_regions, constants = soc.constants, @@ -325,6 +329,9 @@ def main(): write_to_file(os.path.join("netbsd_csr.h"), csr_contents) # tells the prom where to find what + # just one, as that is board-specific + # BEWARE! then need to run 'forth_to_migen_rom.sh' *and* regenerate the bitstream with the proper PROM built-in! + # (there's surely a better way...) csr_forth_contents = sbus_to_fpga_export.get_csr_forth_header( csr_regions = soc.csr_regions, mem_regions = soc.mem_regions, From e820d105da8bfca854d5e2e93c01a0e400f61de3 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 4 Sep 2021 06:25:27 -0400 Subject: [PATCH 71/78] enough delay for longer-timed AES/GCM --- .../9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c index 19155a0..2f71c72 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c @@ -941,10 +941,11 @@ static int start_job(struct sbusfpga_curve25519engine_softc *sc) { static int wait_job(struct sbusfpga_curve25519engine_softc *sc, uint32_t param) { uint32_t status = curve25519engine_status_read(sc); int count = 0; - int max_count = 50; + int max_count = 250; int del = 1; const int max_del = 32; static int max_del_seen = 1; + static int max_cnt_seen = 0; while ((status & (1<sc_dev, "WAIT - new max delay %d after %d count (param was %u)\n", max_del_seen, count, param); } + if (count > max_cnt_seen) { + max_cnt_seen = count; + aprint_normal_dev(sc->sc_dev, "WAIT - new max count %d with %d delay (param was %u)\n", max_cnt_seen, del, param); + + } //curve25519engine_control_write(sc, 0); if (status & (1< Date: Sat, 4 Sep 2021 09:04:40 -0400 Subject: [PATCH 72/78] prelim for boot-time support --- NetBSD/9.0/etc/disktab.sbusfpga | 4 ++++ NetBSD/9.0/etc/rc.d/sbusfpga_sdram | 13 +++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 NetBSD/9.0/etc/disktab.sbusfpga create mode 100644 NetBSD/9.0/etc/rc.d/sbusfpga_sdram diff --git a/NetBSD/9.0/etc/disktab.sbusfpga b/NetBSD/9.0/etc/disktab.sbusfpga new file mode 100644 index 0000000..31dd600 --- /dev/null +++ b/NetBSD/9.0/etc/disktab.sbusfpga @@ -0,0 +1,4 @@ +sbusfpga256|SBusFPGA with 256 MiB SDRAM: \ + :ns#2:nt#4:nc#65536:se#512: \ + :oa#0:pa#524288:ta=4.2BSD: \ + :oc#0:pc#524288: diff --git a/NetBSD/9.0/etc/rc.d/sbusfpga_sdram b/NetBSD/9.0/etc/rc.d/sbusfpga_sdram new file mode 100644 index 0000000..fe60532 --- /dev/null +++ b/NetBSD/9.0/etc/rc.d/sbusfpga_sdram @@ -0,0 +1,13 @@ +#!/bin/sh +# +# $NetBSD$ +# + +# PROVIDE: SBUSFPGA_SDRAM + +if test -b /dev/sbusfpga_sdram0; then + for DEVICE in /dev/sbusfpga_sdram[0-9]; do + test -b ${DEVICE} && disklabel -w ${DEVICE} sbusfpga256 || return + test -b ${DEVICE}a && newfs ${DEVICE}a || return + done +fi From e352323ae3f86f3d21b101dd8aeeb7f6a201efe0 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 4 Sep 2021 09:04:58 -0400 Subject: [PATCH 73/78] display all codes in engine_code --- .../engine_code/engine_code.rs | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs index 3f6cbd1..5dff469 100644 --- a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs +++ b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs @@ -1265,9 +1265,43 @@ fn main() -> std::io::Result<()> { let mut pos = 0; - while pos < gcm_finish_code.len() { - println!("0x{:08x},", gcm_finish_code[pos]); + + pos = 0; + println!("PFX:"); + while pos < gcm_pfx_code.len() { + print!("0x{:08x},", gcm_pfx_code[pos]); pos = pos + 1; } + println!(""); + println!("-> {}", gcm_pfx_code.len()); + + pos = 0; + println!("AD:"); + while pos < gcm_ad_code.len() { + print!("0x{:08x},", gcm_ad_code[pos]); + pos = pos + 1; + } + println!(""); + println!("-> {}", gcm_ad_code.len()); + + pos = 0; + println!("AES:"); + while pos < gcm_aes_code.len() { + print!("0x{:08x},", gcm_aes_code[pos]); + pos = pos + 1; + } + println!(""); + println!("-> {}", gcm_aes_code.len()); + + pos = 0; + println!("FINISH:"); + while pos < gcm_finish_code.len() { + print!("0x{:08x},", gcm_finish_code[pos]); + pos = pos + 1; + } + println!(""); + println!("-> {}", gcm_finish_code.len()); + + Ok(()) } From 984abbc656a364dd4b2f6c2ed512df4a66a08c46 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 4 Sep 2021 09:05:13 -0400 Subject: [PATCH 74/78] USB configurability --- .../sbus_to_fpga_soc.py | 57 +++++++++++-------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index b574d74..7467382 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -32,7 +32,7 @@ import sbus_to_fpga_export; # CRG ---------------------------------------------------------------------------------------------- class _CRG(Module): - def __init__(self, platform, sys_clk_freq): + def __init__(self, platform, sys_clk_freq, usb=True): self.clock_domains.cd_sys = ClockDomain() # 100 MHz PLL, reset'ed by SBus (via pll), SoC/Wishbone main clock self.clock_domains.cd_sys4x = ClockDomain(reset_less=True) self.clock_domains.cd_sys4x_dqs = ClockDomain(reset_less=True) @@ -41,7 +41,8 @@ class _CRG(Module): self.clock_domains.cd_native = ClockDomain(reset_less=True) # 48MHz native, non-reset'ed (for power-on long delay, never reset, we don't want the delay after a warm reset) self.clock_domains.cd_sbus = ClockDomain() # 16.67-25 MHz SBus, reset'ed by SBus, native SBus clock domain # self.clock_domains.cd_por = ClockDomain() # 48 MHz native, reset'ed by SBus, power-on-reset timer - self.clock_domains.cd_usb = ClockDomain() # 48 MHZ PLL, reset'ed by SBus (via pll), for USB controller + if (usb): + self.clock_domains.cd_usb = ClockDomain() # 48 MHZ PLL, reset'ed by SBus (via pll), for USB controller self.clock_domains.cd_clk50 = ClockDomain() # 50 MHz (gated) for curve25519engine -> eng_clk #self.clock_domains.cd_clk100 = ClockDomain() # 100 MHz for curve25519engine -> sys_clk self.clock_domains.cd_clk100_gated = ClockDomain() # 100 MHz (gated) for curve25519engine -> mul_clk @@ -121,14 +122,15 @@ class _CRG(Module): # self.comb += pll.reset.eq(~por_done | ~rst_sbus) # USB - self.submodules.usb_pll = usb_pll = S7MMCM(speedgrade=-1) - #usb_pll.register_clkin(clk48, 48e6) - usb_pll.register_clkin(self.clk48_bufg, 48e6) - usb_pll.create_clkout(self.cd_usb, 48e6, margin = 0) - platform.add_platform_command("create_generated_clock -name usbclk [get_pins {{MMCME2_ADV_2/CLKOUT0}}]") - self.comb += usb_pll.reset.eq(~rst_sbus) # | ~por_done - platform.add_false_path_constraints(self.cd_sys.clk, self.cd_usb.clk) - + if (usb): + self.submodules.usb_pll = usb_pll = S7MMCM(speedgrade=-1) + #usb_pll.register_clkin(clk48, 48e6) + usb_pll.register_clkin(self.clk48_bufg, 48e6) + usb_pll.create_clkout(self.cd_usb, 48e6, margin = 0) + platform.add_platform_command("create_generated_clock -name usbclk [get_pins {{MMCME2_ADV_2/CLKOUT0}}]") + self.comb += usb_pll.reset.eq(~rst_sbus) # | ~por_done + platform.add_false_path_constraints(self.cd_sys.clk, self.cd_usb.clk) + self.submodules.pll_idelay = pll_idelay = S7MMCM(speedgrade=-1) #pll_idelay.register_clkin(clk48, 48e6) pll_idelay.register_clkin(self.clk48_bufg, 48e6) @@ -139,7 +141,7 @@ class _CRG(Module): self.submodules.idelayctrl = S7IDELAYCTRL(self.cd_idelay) class SBusFPGA(SoCCore): - def __init__(self, version, **kwargs): + def __init__(self, version, usb, **kwargs): print(f"Building SBusFPGA for board version {version}") kwargs["cpu_type"] = "None" @@ -180,7 +182,7 @@ class SBusFPGA(SoCCore): "usb_fake_dma": 0xfc000000, } self.mem_map.update(wb_mem_map) - self.submodules.crg = _CRG(platform=platform, sys_clk_freq=sys_clk_freq) + self.submodules.crg = _CRG(platform=platform, sys_clk_freq=sys_clk_freq, usb=usb) self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) # SBus max if (version == "V1.0"): @@ -188,18 +190,18 @@ class SBusFPGA(SoCCore): pads = platform.request("SBUS_DATA_OE_LED_2"), #platform.request("user_led", 7), sys_clk_freq = sys_clk_freq) self.add_csr("leds") - - self.add_usb_host(pads=platform.request("usb"), usb_clk_freq=48e6) - #self.comb += self.cpu.interrupt[16].eq(self.usb_host.interrupt) #fixme: need to deal with interrupts - # self.add_ram(name="usb_shared_mem", origin=self.mem_map["usb_shared_mem"], size=2**16) - - pad_SBUS_3V3_INT1s = platform.request("SBUS_3V3_INT1s") - SBUS_3V3_INT1s_o = Signal(reset=1) - # the 74LVC2G07 takes care of the Z state: 1 -> Z on the bus, 0 -> 0 on the bus (asserted interrupt) - self.comb += pad_SBUS_3V3_INT1s.eq(SBUS_3V3_INT1s_o) - self.comb += SBUS_3V3_INT1s_o.eq(~self.usb_host.interrupt) ## - + if (usb): + self.add_usb_host(pads=platform.request("usb"), usb_clk_freq=48e6) + if (version == "V1.0"): + pad_usb_interrupt = platform.request("SBUS_3V3_INT1s") ## only one usable + elif (version == "V1.2"): + pad_usb_interrupt = platform.request("SBUS_3V3_INT3s") ## can be 1-6, beware others + sig_usb_interrupt = Signal(reset=1) + # the 74LVC2G07 takes care of the Z state: 1 -> Z on the bus, 0 -> 0 on the bus (asserted interrupt) + self.comb += pad_usb_interrupt.eq(sig_usb_interrupt) + self.comb += sig_usb_interrupt.eq(~self.usb_host.interrupt) ## + #pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") #SBUS_DATA_OE_LED_o = Signal() @@ -284,7 +286,9 @@ class SBusFPGA(SoCCore): self.submodules.sbus_bus_stat = SBusFPGABusStat(sbus_bus = self.sbus_bus) self.bus.add_master(name="SBusBridgeToWishbone", master=wishbone_master_sys) - self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_slave_sys, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) + + if (usb): + self.bus.add_slave(name="usb_fake_dma", slave=self.wishbone_slave_sys, region=SoCRegion(origin=self.mem_map.get("usb_fake_dma", None), size=0x03ffffff, cached=False)) #self.bus.add_master(name="mem_read_master", master=self.exchange_with_mem.wishbone_r_slave) #self.bus.add_master(name="mem_write_master", master=self.exchange_with_mem.wishbone_w_slave) @@ -295,6 +299,7 @@ class SBusFPGA(SoCCore): # beware the naming, as 'clk50' 'sysclk' 'clk200' are used in the original platform constraints # the local engine.py was slightly modified to have configurable names, so we can have 'clk50', 'clk100', 'clk200' # Beware that Engine implicitely runs in 'sys' by default, need to rename that one as well + # Actually renaming 'sys' doesn't work - unless we can CDC the CSRs as well self.submodules.curve25519engine = ClockDomainsRenamer({"eng_clk":"clk50", "rf_clk":"clk200", "mul_clk":"clk100_gated"})(Engine(platform=platform,prefix=self.mem_map.get("curve25519engine", None))) # , "sys":"clk100" #self.submodules.curve25519engine_wishbone_cdc = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=self.curve25519engine.bus, cd_master="sys", cd_slave="clk100") #self.bus.add_slave("curve25519engine", self.curve25519engine_wishbone_cdc, SoCRegion(origin=self.mem_map.get("curve25519engine", None), size=0x20000, cached=False)) @@ -309,12 +314,14 @@ def main(): parser = argparse.ArgumentParser(description="SbusFPGA") parser.add_argument("--build", action="store_true", help="Build bitstream") parser.add_argument("--version", default="V1.0", help="SBusFPGA board version (default V1.0)") + parser.add_argument("--usb", action="store_true", help="add a USB OHCI controller") builder_args(parser) vivado_build_args(parser) args = parser.parse_args() soc = SBusFPGA(**soc_core_argdict(args), - version=args.version) + version=args.version, + usb=args.usb) #soc.add_uart(name="uart", baudrate=115200, fifo_depth=16) builder = Builder(soc, **builder_argdict(args)) From 0b22dd98c3055c2687bfec50880531cbac3bfc9f Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 4 Sep 2021 13:17:03 -0400 Subject: [PATCH 75/78] 4 AES inst -> just 1 --- .../sys/dev/sbus/sbusfpga_curve25519engine.c | 12 +- sbus-to-ztex-gateware-migen/engine.py | 265 +++++++++------- .../engine_code/engine_code.rs | 297 ++++-------------- 3 files changed, 224 insertions(+), 350 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c index 2f71c72..675b974 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c @@ -169,19 +169,19 @@ static const uint32_t program_ec25519[134] = {0x00640840, 0x00680800, 0x006c0600 static const uint32_t program_gcm[20] = {0x0010100d, 0x0094100d, 0x0118100d, 0x019c100d, 0x00186143, 0x00160191, 0x00186811, 0x001c61c3, 0x00105103, 0x008441ce, 0x0082010e, 0x00080010, 0x008e008f, 0x0112008f, 0x0396008f, 0x00083083, 0x00105103, 0x00084083, 0x00001083, 0x0000000a }; -static const uint32_t program_aes[58] = {0x0001f003, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03800052, 0x0000000a }; +static const uint32_t program_aes[16] = {0x0001f003,0x0005e012,0x0001d052,0x0005c012,0x0001b052,0x0005a012,0x00019052,0x00058012,0x00017052,0x00056012,0x00015052,0x00054012,0x00013052,0x00052012,0x00811052,0x0000000a }; -static const uint32_t program_gcm_pfx[72] = {0x01400411, 0x00080840, 0x00040800, 0x0001f043, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03800052, 0x03800089, 0x003c0000, 0x01400411, 0x0042b405, 0x01400411, 0x00080800, 0x00040400, 0xdf800809, 0x00380000, 0x01bc03d1, 0x003cf3d1, 0x00340800 }; +static const uint32_t program_gcm_pfx[30] = {0x01400411,0x00080840,0x00040800,0x0001f043,0x0005e012,0x0001d052,0x0005c012,0x0001b052,0x0005a012,0x00019052,0x00058012,0x00017052,0x00056012,0x00015052,0x00054012,0x00013052,0x00052012,0x00811052,0x03800089,0x003c0000,0x01400411,0x0042b405,0x01400411,0x00080800,0x00040400,0xf4800809,0x00380000,0x01bc03d1,0x003cf3d1,0x00340800 }; -static const uint32_t program_gcm_ad[29] = {0x0d800309, 0x000000d3, 0x01800011, 0x00000011, 0x0000d003, 0x000ec0c5, 0x0032d306, 0x0010f00d, 0x0094f00d, 0x0118f00d, 0x019cf00d, 0x00186143, 0x00160191, 0x00186811, 0x001c61c3, 0x00105103, 0x008441ce, 0x0082010e, 0x00080010, 0x009a008f, 0x0112008f, 0x0396008f, 0x00086083, 0x00105103, 0x00084083, 0x00341083, 0x00800309, 0xf2800809, 0x0000000a }; +static const uint32_t program_gcm_ad[29] = {0x0d800309,0x000000d3,0x01800011,0x00000011,0x0000d003,0x000ec0c5,0x0032d306,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x00800309,0xf2800809,0x0000000a }; -static const uint32_t program_gcm_aes[92] = {0x2d000309, 0x01400411, 0x0042b405, 0x01400411, 0x0001f403, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03840052, 0x000000d3, 0x00001003, 0x00ac02d3, 0x01800011, 0x00000011, 0x0000d003, 0x000ec0c5, 0x002ec2c5, 0x0032d306, 0x0010f00d, 0x0094f00d, 0x0118f00d, 0x019cf00d, 0x00186143, 0x00160191, 0x00186811, 0x001c61c3, 0x00105103, 0x008441ce, 0x0082010e, 0x00080010, 0x009a008f, 0x0112008f, 0x0396008f, 0x00086083, 0x00105103, 0x00084083, 0x00341083, 0x00800309, 0xd3000809, 0x0000000a }; +static const uint32_t program_gcm_aes[50] = {0x18000309,0x01400411,0x0042b405,0x01400411,0x0001f403,0x0005e012,0x0001d052,0x0005c012,0x0001b052,0x0005a012,0x00019052,0x00058012,0x00017052,0x00056012,0x00015052,0x00054012,0x00013052,0x00052012,0x00851052,0x000000d3,0x00001003,0x00ac02d3,0x01800011,0x00000011,0x0000d003,0x000ec0c5,0x002ec2c5,0x0032d306,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x00800309,0xe8000809,0x0000000a }; -static const uint32_t program_gcm_finish[113] = {0x2b000309, 0x01400411, 0x0042b405, 0x01400411, 0x0001f403, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03840052, 0x0004a054, 0x000000d3, 0x00001003, 0x00ac02d3, 0x01800011, 0x00000011, 0x0000d003, 0x0010f00d, 0x0094f00d, 0x0118f00d, 0x019cf00d, 0x00186143, 0x00160191, 0x00186811, 0x001c61c3, 0x00105103, 0x008441ce, 0x0082010e, 0x00080010, 0x009a008f, 0x0112008f, 0x0396008f, 0x00086083, 0x00105103, 0x00084083, 0x00341083, 0x01a40251, 0x00249251, 0x0000d243, 0x0010f00d, 0x0094f00d, 0x0118f00d, 0x019cf00d, 0x00186143, 0x00160191, 0x00186811, 0x001c61c3, 0x00105103, 0x008441ce, 0x0082010e, 0x00080010, 0x009a008f, 0x0112008f, 0x0396008f, 0x00086083, 0x00105103, 0x00084083, 0x00341083, 0x01b40351, 0x0034d351, 0x0020e343, 0x0000000a }; +static const uint32_t program_gcm_finish[71] = {0x16000309,0x01400411,0x0042b405,0x01400411,0x0001f403,0x0005e012,0x0001d052,0x0005c012,0x0001b052,0x0005a012,0x00019052,0x00058012,0x00017052,0x00056012,0x00015052,0x00054012,0x00013052,0x00052012,0x00851052,0x0004a054,0x000000d3,0x00001003,0x00ac02d3,0x01800011,0x00000011,0x0000d003,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x01a40251,0x00249251,0x0000d243,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x01b40351,0x0034d351,0x0020e343,0x0000000a }; // second and third are for testing and shall be removed static const uint32_t* programs[8] = { program_ec25519, program_gcm, program_aes, program_gcm_pfx, program_gcm_ad, program_gcm_aes, program_gcm_finish, NULL }; -static const uint32_t program_len[8] = { 134, 20, 58, 72, 29, 92, 113, 0 }; +static const uint32_t program_len[8] = { 134, 20, 16, 30, 29, 50, 71, 0 }; static uint32_t program_offset[8]; /* diff --git a/sbus-to-ztex-gateware-migen/engine.py b/sbus-to-ztex-gateware-migen/engine.py index 9e69d18..f107325 100644 --- a/sbus-to-ztex-gateware-migen/engine.py +++ b/sbus-to-ztex-gateware-migen/engine.py @@ -30,7 +30,7 @@ opcodes = { # mnemonic : [bit coding, docstring] "GCM_SHRMI": [15, "Shift A right by imm, insert B LSB as dest MSB; reg-reg or reg-imm; per 128-bits block"], # "GCM_CMPD": [16, "Compute D:X0 from X1:X0; reg ; per 128-bits block"], # specific "GCM_SWAP64": [17, "Swap doubleword (64 bits) ; reg-reg or imm-reg or reg-imm; per 128-bits block ; imm != 0 -> BYTEREV*"], # - "AESESMI" : [18, "AES ; reg-reg ; per 128-bits block; imm[0:2] indicates sub-round (as in rv32's aes32esmi) ; imm[2] is 1 for aesesi (shared opcode)" ], + "AESESMI" : [18, "AES ; reg-reg ; per 128-bits block; imm[0] is 1 for aesesi (shared opcode)" ], "MEM" : [19, "MEM ; imm[0] == 0 for LOAD, imm[0] == 1 for STORE (beware, store copy the address in the output reg)" ], "AND" : [20, "Wd $\gets$ Ra & Rb // bitwise AND"], "MAX" : [21, "Maximum opcode number (for bounds checking)"], @@ -1579,7 +1579,6 @@ class ExecAES(ExecUnit, AutoDoc): assert(width == 256) # fixme nlane = width // 128 aes_buf = Signal(nlane * 128) ## width must be a multiple of 128... - lanec = Signal(log2_int(nlane, False)) assert(nlane == 2) ## fixme aes_in = Array(Signal(8) for a in range(4)) @@ -1595,123 +1594,157 @@ class ExecAES(ExecUnit, AutoDoc): self.submodules.seq = seq = ClockDomainsRenamer("eng_clk")(FSM(reset_state="IDLE")) seq.act("IDLE", If(self.start, - NextValue(lanec, 0), - Case(self.instruction.immediate[0:2], { - 0x0: [ NextValue(aes_in[0], self.a[ 0: 8]), NextValue(aes_in[1], self.a[ 32: 40]), NextValue(aes_in[2], self.a[ 64: 72]), NextValue(aes_in[3], self.a[ 96:104]) ], - 0x1: [ NextValue(aes_in[3], self.a[ 8: 16]), NextValue(aes_in[0], self.a[ 40: 48]), NextValue(aes_in[1], self.a[ 72: 80]), NextValue(aes_in[2], self.a[104:112]) ], - 0x2: [ NextValue(aes_in[2], self.a[ 16: 24]), NextValue(aes_in[3], self.a[ 48: 56]), NextValue(aes_in[0], self.a[ 80: 88]), NextValue(aes_in[1], self.a[112:120]) ], - 0x3: [ NextValue(aes_in[1], self.a[ 24: 32]), NextValue(aes_in[2], self.a[ 56: 64]), NextValue(aes_in[3], self.a[ 88: 96]), NextValue(aes_in[0], self.a[120:128]) ], - }), - NextState("NEXT"))) - seq.act("NEXT", - Case(self.instruction.immediate[0:2], { - 0x0: [ NextValue(aes_in[0], self.a[128:136]), NextValue(aes_in[1], self.a[160:168]), NextValue(aes_in[2], self.a[192:200]), NextValue(aes_in[3], self.a[224:232]) ], - 0x1: [ NextValue(aes_in[3], self.a[136:144]), NextValue(aes_in[0], self.a[168:176]), NextValue(aes_in[1], self.a[200:208]), NextValue(aes_in[2], self.a[232:240]) ], - 0x2: [ NextValue(aes_in[2], self.a[144:152]), NextValue(aes_in[3], self.a[176:184]), NextValue(aes_in[0], self.a[208:216]), NextValue(aes_in[1], self.a[240:248]) ], - 0x3: [ NextValue(aes_in[1], self.a[152:160]), NextValue(aes_in[2], self.a[184:192]), NextValue(aes_in[3], self.a[216:224]), NextValue(aes_in[0], self.a[248:256]) ], + # put the first byte in the lookup tables (LANE1) + # [ NextValue(aes_in[i], self.a[32*i:32*i+8]) for i in range(0, 4) ], + NextValue(aes_in[0], self.a[ 0: 8]), + NextValue(aes_in[1], self.a[ 32: 40]), + NextValue(aes_in[2], self.a[ 64: 72]), + NextValue(aes_in[3], self.a[ 96:104]), + NextState("LANE2_1"))) + seq.act("LANE2_1", + # put the first byte in the lookup tables (LANE2) + NextValue(aes_in[0], self.a[128:136]), + NextValue(aes_in[1], self.a[160:168]), + NextValue(aes_in[2], self.a[192:200]), + NextValue(aes_in[3], self.a[224:232]), + NextState("LANE1_2")) + seq.act("LANE1_2", + # store the xor'ed result for LANE1, byte 1 in aes_buf + Case(self.instruction.immediate[0:1], { + 0:[ NextValue(aes_buf[ 0: 32], self.b[ 0: 32] ^ Cat(aes_out[0][ 0:16], aes_out[0][ 8:24])), + NextValue(aes_buf[ 32: 64], self.b[ 32: 64] ^ Cat(aes_out[1][ 0:16], aes_out[1][ 8:24])), + NextValue(aes_buf[ 64: 96], self.b[ 64: 96] ^ Cat(aes_out[2][ 0:16], aes_out[2][ 8:24])), + NextValue(aes_buf[ 96:128], self.b[ 96:128] ^ Cat(aes_out[3][ 0:16], aes_out[3][ 8:24]))], + 1:[ NextValue(aes_buf[ 0: 32], self.b[ 0: 32] ^ Cat(aes_out[0][ 8:16], Signal(24, reset = 0))), + NextValue(aes_buf[ 32: 64], self.b[ 32: 64] ^ Cat(aes_out[1][ 8:16], Signal(24, reset = 0))), + NextValue(aes_buf[ 64: 96], self.b[ 64: 96] ^ Cat(aes_out[2][ 8:16], Signal(24, reset = 0))), + NextValue(aes_buf[ 96:128], self.b[ 96:128] ^ Cat(aes_out[3][ 8:16], Signal(24, reset = 0)))], }), - NextState("WRITE")) - seq.act("WRITE", - Case(lanec, { - 0: [ Case(self.instruction.immediate[2:3], { - 0: Case(self.instruction.immediate[0:2], { - 0x0: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 0:16], aes_out[0][ 8:24], - aes_out[1][ 0:16], aes_out[1][ 8:24], - aes_out[2][ 0:16], aes_out[2][ 8:24], - aes_out[3][ 0:16], aes_out[3][ 8:24])), - ], - 0x1: [ NextValue(aes_buf[0:128], Cat(aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16], - aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16], - aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16], - aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])), - ], - 0x2: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:24], aes_out[0][ 0:16], - aes_out[1][ 8:24], aes_out[1][ 0:16], - aes_out[2][ 8:24], aes_out[2][ 0:16], - aes_out[3][ 8:24], aes_out[3][ 0:16])), - ], - 0x3: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8], - aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8], - aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8], - aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])), - ], - }), - 1: Case(self.instruction.immediate[0:2], { - 0x0: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:16], Signal(24, reset = 0), - aes_out[1][ 8:16], Signal(24, reset = 0), - aes_out[2][ 8:16], Signal(24, reset = 0), - aes_out[3][ 8:16], Signal(24, reset = 0))), - ], - 0x1: [ NextValue(aes_buf[0:128], Cat(Signal(8, reset = 0), aes_out[0][ 8:16], Signal(16, reset = 0), - Signal(8, reset = 0), aes_out[1][ 8:16], Signal(16, reset = 0), - Signal(8, reset = 0), aes_out[2][ 8:16], Signal(16, reset = 0), - Signal(8, reset = 0), aes_out[3][ 8:16], Signal(16, reset = 0))), - ], - 0x2: [ NextValue(aes_buf[0:128], Cat(Signal(16, reset = 0), aes_out[0][ 8:16], Signal(8, reset = 0), - Signal(16, reset = 0), aes_out[1][ 8:16], Signal(8, reset = 0), - Signal(16, reset = 0), aes_out[2][ 8:16], Signal(8, reset = 0), - Signal(16, reset = 0), aes_out[3][ 8:16], Signal(8, reset = 0))), - ], - 0x3: [ NextValue(aes_buf[0:128], Cat(Signal(24, reset = 0), aes_out[0][ 8:16], - Signal(24, reset = 0), aes_out[1][ 8:16], - Signal(24, reset = 0), aes_out[2][ 8:16], - Signal(24, reset = 0), aes_out[3][ 8:16])), - ], - }), - }), - NextValue(lanec, 1)], - 1: [ Case(self.instruction.immediate[2:3], { - 0: Case(self.instruction.immediate[0:2], { - 0x0: [ NextValue(aes_buf[128:256], Cat(aes_out[0][ 0:16], aes_out[0][ 8:24], - aes_out[1][ 0:16], aes_out[1][ 8:24], - aes_out[2][ 0:16], aes_out[2][ 8:24], - aes_out[3][ 0:16], aes_out[3][ 8:24])), - ], - 0x1: [ NextValue(aes_buf[128:256], Cat(aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16], - aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16], - aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16], - aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])), - ], - 0x2: [ NextValue(aes_buf[128:256], Cat(aes_out[0][ 8:24], aes_out[0][ 0:16], - aes_out[1][ 8:24], aes_out[1][ 0:16], - aes_out[2][ 8:24], aes_out[2][ 0:16], - aes_out[3][ 8:24], aes_out[3][ 0:16])), - ], - 0x3: [ NextValue(aes_buf[128:256], Cat(aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8], - aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8], - aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8], - aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])), - ], - }), - 1: Case(self.instruction.immediate[0:2], { - 0x0: [ NextValue(aes_buf[128:256], Cat(aes_out[0][ 8:16], Signal(24, reset = 0), - aes_out[1][ 8:16], Signal(24, reset = 0), - aes_out[2][ 8:16], Signal(24, reset = 0), - aes_out[3][ 8:16], Signal(24, reset = 0))), - ], - 0x1: [ NextValue(aes_buf[128:256], Cat(Signal(8, reset = 0), aes_out[0][ 8:16], Signal(16, reset = 0), - Signal(8, reset = 0), aes_out[1][ 8:16], Signal(16, reset = 0), - Signal(8, reset = 0), aes_out[2][ 8:16], Signal(16, reset = 0), - Signal(8, reset = 0), aes_out[3][ 8:16], Signal(16, reset = 0))), - ], - 0x2: [ NextValue(aes_buf[128:256], Cat(Signal(16, reset = 0), aes_out[0][ 8:16], Signal(8, reset = 0), - Signal(16, reset = 0), aes_out[1][ 8:16], Signal(8, reset = 0), - Signal(16, reset = 0), aes_out[2][ 8:16], Signal(8, reset = 0), - Signal(16, reset = 0), aes_out[3][ 8:16], Signal(8, reset = 0))), - ], - 0x3: [ NextValue(aes_buf[128:256], Cat(Signal(24, reset = 0), aes_out[0][ 8:16], - Signal(24, reset = 0), aes_out[1][ 8:16], - Signal(24, reset = 0), aes_out[2][ 8:16], - Signal(24, reset = 0), aes_out[3][ 8:16])), - ], - }), - }), - NextState("OUT") - ], - })) + # put the second byte in the lookup tables (LANE1) + NextValue(aes_in[3], self.a[ 8: 16]), + NextValue(aes_in[0], self.a[ 40: 48]), + NextValue(aes_in[1], self.a[ 72: 80]), + NextValue(aes_in[2], self.a[104:112]), + NextState("LANE2_2")) + seq.act("LANE2_2", + # store the xor'ed result for LANE2, byte 1 in aes_buf + Case(self.instruction.immediate[0:1], { + 0:[ NextValue(aes_buf[128:160], self.b[128:160] ^ Cat(aes_out[0][ 0:16], aes_out[0][ 8:24])), + NextValue(aes_buf[160:192], self.b[160:192] ^ Cat(aes_out[1][ 0:16], aes_out[1][ 8:24])), + NextValue(aes_buf[192:224], self.b[192:224] ^ Cat(aes_out[2][ 0:16], aes_out[2][ 8:24])), + NextValue(aes_buf[224:256], self.b[224:256] ^ Cat(aes_out[3][ 0:16], aes_out[3][ 8:24]))], + 1:[ NextValue(aes_buf[128:160], self.b[128:160] ^ Cat(aes_out[0][ 8:16], Signal(24, reset = 0))), + NextValue(aes_buf[160:192], self.b[160:192] ^ Cat(aes_out[1][ 8:16], Signal(24, reset = 0))), + NextValue(aes_buf[192:224], self.b[192:224] ^ Cat(aes_out[2][ 8:16], Signal(24, reset = 0))), + NextValue(aes_buf[224:256], self.b[224:256] ^ Cat(aes_out[3][ 8:16], Signal(24, reset = 0)))], + }), + # put the second byte in the lookup tables (LANE2) + NextValue(aes_in[3], self.a[136:144]), + NextValue(aes_in[0], self.a[168:176]), + NextValue(aes_in[1], self.a[200:208]), + NextValue(aes_in[2], self.a[232:240]), + NextState("LANE1_3")) + seq.act("LANE1_3", + # store the xor'ed result for LANE1, byte 2 in aes_buf + Case(self.instruction.immediate[0:1], { + 0:[ NextValue(aes_buf[ 0: 32], aes_buf[ 0: 32] ^ Cat(aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16])), + NextValue(aes_buf[ 32: 64], aes_buf[ 32: 64] ^ Cat(aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16])), + NextValue(aes_buf[ 64: 96], aes_buf[ 64: 96] ^ Cat(aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16])), + NextValue(aes_buf[ 96:128], aes_buf[ 96:128] ^ Cat(aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16]))], + 1:[ NextValue(aes_buf[ 0: 32], aes_buf[ 0: 32] ^ Cat(Signal(8, reset = 0), aes_out[0][ 8:16], Signal(16, reset = 0))), + NextValue(aes_buf[ 32: 64], aes_buf[ 32: 64] ^ Cat(Signal(8, reset = 0), aes_out[1][ 8:16], Signal(16, reset = 0))), + NextValue(aes_buf[ 64: 96], aes_buf[ 64: 96] ^ Cat(Signal(8, reset = 0), aes_out[2][ 8:16], Signal(16, reset = 0))), + NextValue(aes_buf[ 96:128], aes_buf[ 96:128] ^ Cat(Signal(8, reset = 0), aes_out[3][ 8:16], Signal(16, reset = 0)))], + }), + # put the third byte in the lookup tables (LANE1) + NextValue(aes_in[2], self.a[ 16: 24]), + NextValue(aes_in[3], self.a[ 48: 56]), + NextValue(aes_in[0], self.a[ 80: 88]), + NextValue(aes_in[1], self.a[112:120]), + NextState("LANE2_3")) + seq.act("LANE2_3", + # store the xor'ed result for LANE2, byte 2 in aes_buf + Case(self.instruction.immediate[0:1], { + 0:[ NextValue(aes_buf[128:160], aes_buf[128:160] ^ Cat(aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16])), + NextValue(aes_buf[160:192], aes_buf[160:192] ^ Cat(aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16])), + NextValue(aes_buf[192:224], aes_buf[192:224] ^ Cat(aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16])), + NextValue(aes_buf[224:256], aes_buf[224:256] ^ Cat(aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16]))], + 1:[ NextValue(aes_buf[128:160], aes_buf[128:160] ^ Cat(Signal(8, reset = 0), aes_out[0][ 8:16], Signal(16, reset = 0))), + NextValue(aes_buf[160:192], aes_buf[160:192] ^ Cat(Signal(8, reset = 0), aes_out[1][ 8:16], Signal(16, reset = 0))), + NextValue(aes_buf[192:224], aes_buf[192:224] ^ Cat(Signal(8, reset = 0), aes_out[2][ 8:16], Signal(16, reset = 0))), + NextValue(aes_buf[224:256], aes_buf[224:256] ^ Cat(Signal(8, reset = 0), aes_out[3][ 8:16], Signal(16, reset = 0)))], + }), + # put the third byte in the lookup tables (LANE2) + NextValue(aes_in[2], self.a[144:152]), + NextValue(aes_in[3], self.a[176:184]), + NextValue(aes_in[0], self.a[208:216]), + NextValue(aes_in[1], self.a[240:248]), + NextState("LANE1_4")) + seq.act("LANE1_4", + # store the xor'ed result for LANE1, byte 3 in aes_buf + Case(self.instruction.immediate[0:1], { + 0:[ NextValue(aes_buf[ 0: 32], aes_buf[ 0: 32] ^ Cat(aes_out[0][ 8:24], aes_out[0][ 0:16])), + NextValue(aes_buf[ 32: 64], aes_buf[ 32: 64] ^ Cat(aes_out[1][ 8:24], aes_out[1][ 0:16])), + NextValue(aes_buf[ 64: 96], aes_buf[ 64: 96] ^ Cat(aes_out[2][ 8:24], aes_out[2][ 0:16])), + NextValue(aes_buf[ 96:128], aes_buf[ 96:128] ^ Cat(aes_out[3][ 8:24], aes_out[3][ 0:16]))], + 1:[ NextValue(aes_buf[ 0: 32], aes_buf[ 0: 32] ^ Cat(Signal(16, reset = 0), aes_out[0][ 8:16], Signal(8, reset = 0))), + NextValue(aes_buf[ 32: 64], aes_buf[ 32: 64] ^ Cat(Signal(16, reset = 0), aes_out[1][ 8:16], Signal(8, reset = 0))), + NextValue(aes_buf[ 64: 96], aes_buf[ 64: 96] ^ Cat(Signal(16, reset = 0), aes_out[2][ 8:16], Signal(8, reset = 0))), + NextValue(aes_buf[ 96:128], aes_buf[ 96:128] ^ Cat(Signal(16, reset = 0), aes_out[3][ 8:16], Signal(8, reset = 0)))], + }), + # put the fourth byte in the lookup tables (LANE1) + NextValue(aes_in[1], self.a[ 24: 32]), + NextValue(aes_in[2], self.a[ 56: 64]), + NextValue(aes_in[3], self.a[ 88: 96]), + NextValue(aes_in[0], self.a[120:128]), + NextState("LANE2_4")) + seq.act("LANE2_4", + # store the xor'ed result for LANE2, byte 3 in aes_buf + Case(self.instruction.immediate[0:1], { + 0:[ NextValue(aes_buf[128:160], aes_buf[128:160] ^ Cat(aes_out[0][ 8:24], aes_out[0][ 0:16])), + NextValue(aes_buf[160:192], aes_buf[160:192] ^ Cat(aes_out[1][ 8:24], aes_out[1][ 0:16])), + NextValue(aes_buf[192:224], aes_buf[192:224] ^ Cat(aes_out[2][ 8:24], aes_out[2][ 0:16])), + NextValue(aes_buf[224:256], aes_buf[224:256] ^ Cat(aes_out[3][ 8:24], aes_out[3][ 0:16]))], + 1:[ NextValue(aes_buf[128:160], aes_buf[128:160] ^ Cat(Signal(16, reset = 0), aes_out[0][ 8:16], Signal(8, reset = 0))), + NextValue(aes_buf[160:192], aes_buf[160:192] ^ Cat(Signal(16, reset = 0), aes_out[1][ 8:16], Signal(8, reset = 0))), + NextValue(aes_buf[192:224], aes_buf[192:224] ^ Cat(Signal(16, reset = 0), aes_out[2][ 8:16], Signal(8, reset = 0))), + NextValue(aes_buf[224:256], aes_buf[224:256] ^ Cat(Signal(16, reset = 0), aes_out[3][ 8:16], Signal(8, reset = 0)))], + }), + # put the fourth byte in the lookup tables (LANE2) + NextValue(aes_in[1], self.a[152:160]), + NextValue(aes_in[2], self.a[184:192]), + NextValue(aes_in[3], self.a[216:224]), + NextValue(aes_in[0], self.a[248:256]), + NextState("LANE1_F")) + seq.act("LANE1_F", + # store the xor'ed result for LANE1, byte 4 in aes_buf + Case(self.instruction.immediate[0:1], { + 0:[ NextValue(aes_buf[ 0: 32], aes_buf[ 0: 32] ^ Cat(aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8])), + NextValue(aes_buf[ 32: 64], aes_buf[ 32: 64] ^ Cat(aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8])), + NextValue(aes_buf[ 64: 96], aes_buf[ 64: 96] ^ Cat(aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8])), + NextValue(aes_buf[ 96:128], aes_buf[ 96:128] ^ Cat(aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8]))], + 1:[ NextValue(aes_buf[ 0: 32], aes_buf[ 0: 32] ^ Cat(Signal(24, reset = 0), aes_out[0][ 8:16])), + NextValue(aes_buf[ 32: 64], aes_buf[ 32: 64] ^ Cat(Signal(24, reset = 0), aes_out[1][ 8:16])), + NextValue(aes_buf[ 64: 96], aes_buf[ 64: 96] ^ Cat(Signal(24, reset = 0), aes_out[2][ 8:16])), + NextValue(aes_buf[ 96:128], aes_buf[ 96:128] ^ Cat(Signal(24, reset = 0), aes_out[3][ 8:16]))], + }), + NextState("LANE2_F")) + seq.act("LANE2_F", + # store the xor'ed result for LANE2, byte 4 in aes_buf + Case(self.instruction.immediate[0:1], { + 0:[ NextValue(aes_buf[128:160], aes_buf[128:160] ^ Cat(aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8])), + NextValue(aes_buf[160:192], aes_buf[160:192] ^ Cat(aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8])), + NextValue(aes_buf[192:224], aes_buf[192:224] ^ Cat(aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8])), + NextValue(aes_buf[224:256], aes_buf[224:256] ^ Cat(aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8]))], + 1:[ NextValue(aes_buf[128:160], aes_buf[128:160] ^ Cat(Signal(24, reset = 0), aes_out[0][ 8:16])), + NextValue(aes_buf[160:192], aes_buf[160:192] ^ Cat(Signal(24, reset = 0), aes_out[1][ 8:16])), + NextValue(aes_buf[192:224], aes_buf[192:224] ^ Cat(Signal(24, reset = 0), aes_out[2][ 8:16])), + NextValue(aes_buf[224:256], aes_buf[224:256] ^ Cat(Signal(24, reset = 0), aes_out[3][ 8:16]))], + }), + NextState("OUT")) seq.act("OUT", self.q_valid.eq(1), - self.q.eq(self.b ^ aes_buf), + self.q.eq(aes_buf), NextState("IDLE")) class ExecLS(ExecUnit, AutoDoc): diff --git a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs index 5dff469..79c9da0 100644 --- a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs +++ b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs @@ -621,75 +621,33 @@ fn main() -> std::io::Result<()> { // one full round demo xor %0, %0, %31 - aesesmi %1, %0, %30, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %30 - aesesmi %0, %1, %29, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %29 - aesesmi %1, %0, %28, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %28 - aesesmi %0, %1, %27, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %27 - aesesmi %1, %0, %26, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %26 - aesesmi %0, %1, %25, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %25 - aesesmi %1, %0, %24, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %24 - aesesmi %0, %1, %23, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %23 - aesesmi %1, %0, %22, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %22 - aesesmi %0, %1, %21, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %21 - aesesmi %1, %0, %20, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %20 - aesesmi %0, %1, %19, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %19 - aesesmi %1, %0, %18, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %18 - aesesi %0, %1, %17, #0 - aesesi %0, %1, %0, #1 - aesesi %0, %1, %0, #2 - aesesi %0, %1, %0, #3 + aesesi %0, %1, %17 fin ); @@ -713,75 +671,33 @@ fn main() -> std::io::Result<()> { genht: xor %0, %1, %31 - aesesmi %1, %0, %30, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %30 - aesesmi %0, %1, %29, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %29 - aesesmi %1, %0, %28, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %28 - aesesmi %0, %1, %27, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %27 - aesesmi %1, %0, %26, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %26 - aesesmi %0, %1, %25, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %25 - aesesmi %1, %0, %24, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %24 - aesesmi %0, %1, %23, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %23 - aesesmi %1, %0, %22, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %22 - aesesmi %0, %1, %21, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %21 - aesesmi %1, %0, %20, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %20 - aesesmi %0, %1, %19, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %19 - aesesmi %1, %0, %18, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %18 - aesesi %0, %1, %17, #0 - aesesi %0, %1, %0, #1 - aesesi %0, %1, %0, #2 - aesesi %0, %1, %0, #3 + aesesi %0, %1, %17 // if the %2 flag is cleared, we've just computed T brz afterht, %2 @@ -910,75 +826,33 @@ fn main() -> std::io::Result<()> { xor %0, %16, %31 - aesesmi %1, %0, %30, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %30 - aesesmi %0, %1, %29, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %29 - aesesmi %1, %0, %28, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %28 - aesesmi %0, %1, %27, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %27 - aesesmi %1, %0, %26, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %26 - aesesmi %0, %1, %25, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %25 - aesesmi %1, %0, %24, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %24 - aesesmi %0, %1, %23, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %23 - aesesmi %1, %0, %22, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %22 - aesesmi %0, %1, %21, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %21 - aesesmi %1, %0, %20, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %20 - aesesmi %0, %1, %19, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %19 - aesesmi %1, %0, %18, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %18 - aesesi %0, %1, %17, #0 - aesesi %0, %1, %0, #1 - aesesi %0, %1, %0, #2 - aesesi %1, %1, %0, #3 + aesesi %1, %1, %17 //gcm_brev64 %1, %0 //gcm_swap64 %1, %1, %1 @@ -1071,75 +945,33 @@ fn main() -> std::io::Result<()> { xor %0, %16, %31 - aesesmi %1, %0, %30, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %30 - aesesmi %0, %1, %29, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %29 - aesesmi %1, %0, %28, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %28 - aesesmi %0, %1, %27, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %27 - aesesmi %1, %0, %26, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %26 - aesesmi %0, %1, %25, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %25 - aesesmi %1, %0, %24, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %24 - aesesmi %0, %1, %23, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %23 - aesesmi %1, %0, %22, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %22 - aesesmi %0, %1, %21, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %21 - aesesmi %1, %0, %20, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %20 - aesesmi %0, %1, %19, #0 - aesesmi %0, %1, %0, #1 - aesesmi %0, %1, %0, #2 - aesesmi %0, %1, %0, #3 + aesesmi %0, %1, %19 - aesesmi %1, %0, %18, #0 - aesesmi %1, %0, %1, #1 - aesesmi %1, %0, %1, #2 - aesesmi %1, %0, %1, #3 + aesesmi %1, %0, %18 - aesesi %0, %1, %17, #0 - aesesi %0, %1, %0, #1 - aesesi %0, %1, %0, #2 - aesesi %1, %1, %0, #3 + aesesi %1, %1, %17 //gcm_brev64 %1, %0 //gcm_swap64 %1, %1, %1 @@ -1267,7 +1099,16 @@ fn main() -> std::io::Result<()> { let mut pos = 0; pos = 0; - println!("PFX:"); + println!("test AES:"); + while pos < aescode.len() { + print!("0x{:08x},", aescode[pos]); + pos = pos + 1; + } + println!(""); + println!("-> {}", aescode.len()); + + pos = 0; + println!("GCM PFX:"); while pos < gcm_pfx_code.len() { print!("0x{:08x},", gcm_pfx_code[pos]); pos = pos + 1; @@ -1276,7 +1117,7 @@ fn main() -> std::io::Result<()> { println!("-> {}", gcm_pfx_code.len()); pos = 0; - println!("AD:"); + println!("GCM AD:"); while pos < gcm_ad_code.len() { print!("0x{:08x},", gcm_ad_code[pos]); pos = pos + 1; @@ -1285,7 +1126,7 @@ fn main() -> std::io::Result<()> { println!("-> {}", gcm_ad_code.len()); pos = 0; - println!("AES:"); + println!("GCM AES:"); while pos < gcm_aes_code.len() { print!("0x{:08x},", gcm_aes_code[pos]); pos = pos + 1; @@ -1294,7 +1135,7 @@ fn main() -> std::io::Result<()> { println!("-> {}", gcm_aes_code.len()); pos = 0; - println!("FINISH:"); + println!("GCM FINISH:"); while pos < gcm_finish_code.len() { print!("0x{:08x},", gcm_finish_code[pos]); pos = pos + 1; From 6954e719b768e1904b410d373c70226fb316b1ab Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 5 Sep 2021 04:41:33 -0400 Subject: [PATCH 76/78] fix errors --- .../usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c index 675b974..acdeaef 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c @@ -897,7 +897,7 @@ static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf int err = 0; if (status & (1<sc_dev, "WRITE - Curve25519Engine status: 0x%08x, still running?\n", status); - return -ENXIO; + return ENXIO; } for (i = 0 ; i < 8 ; i ++) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(24,i), job->affine_u[i]); @@ -930,7 +930,7 @@ static int start_job(struct sbusfpga_curve25519engine_softc *sc) { uint32_t status = curve25519engine_status_read(sc); if (status & (1<sc_dev, "START - Curve25519Engine status: 0x%08x, still running?\n", status); - return -ENXIO; + return ENXIO; } curve25519engine_control_write(sc, 1); //aprint_normal_dev(sc->sc_dev, "START - Curve25519Engine status: 0x%08x\n", curve25519engine_status_read(sc)); @@ -968,13 +968,13 @@ static int wait_job(struct sbusfpga_curve25519engine_softc *sc, uint32_t param) //curve25519engine_control_write(sc, 0); if (status & (1<sc_dev, "WAIT - Curve25519Engine status: 0x%08x, did not finish in time? [inst: 0x%08x ls_status: 0x%08x]\n", status, curve25519engine_instruction_read(sc), curve25519engine_ls_status_read(sc)); - return -ENXIO; + return ENXIO; } else if (status & (1<sc_dev, "WAIT - Curve25519Engine status: 0x%08x, sigill [inst: 0x%08x ls_status: 0x%08x]\n", status, curve25519engine_instruction_read(sc), curve25519engine_ls_status_read(sc)); - return -ENXIO; + return ENXIO; } else if (status & (1<sc_dev, "WAIT - Curve25519Engine status: 0x%08x, aborted [inst: 0x%08x ls_status: 0x%08x]\n", status, curve25519engine_instruction_read(sc), curve25519engine_ls_status_read(sc)); - return -ENXIO; + return ENXIO; } else { //aprint_normal_dev(sc->sc_dev, "WAIT - Curve25519Engine status: 0x%08x [%d] ls_status: 0x%08x\n", status, count, curve25519engine_ls_status_read(sc)); } @@ -988,7 +988,7 @@ static int read_outputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf uint32_t status = curve25519engine_status_read(sc); if (status & (1<sc_dev, "READ - Curve25519Engine status: 0x%08x, still running?\n", status); - return -ENXIO; + return ENXIO; } for (i = 0 ; i < 8 ; i ++) { From 13f7dc48d2ab600c662f5bc353544bf8d864906e Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 5 Sep 2021 04:42:42 -0400 Subject: [PATCH 77/78] Put AES in the mul_clk domain --- sbus-to-ztex-gateware-migen/engine.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/engine.py b/sbus-to-ztex-gateware-migen/engine.py index f107325..cacf51c 100644 --- a/sbus-to-ztex-gateware-migen/engine.py +++ b/sbus-to-ztex-gateware-migen/engine.py @@ -1584,16 +1584,19 @@ class ExecAES(ExecUnit, AutoDoc): aes_in = Array(Signal(8) for a in range(4)) aes_out = Array(Signal(24) for a in range(4)) for i in range(4): - self.sync.eng_clk += Case(aes_in[i], { 0x00: aes_out[i].eq(0xa563c6), 0x01: aes_out[i].eq(0x847cf8), 0x02: aes_out[i].eq(0x9977ee), 0x03: aes_out[i].eq(0x8d7bf6), 0x04: aes_out[i].eq(0x0df2ff), 0x05: aes_out[i].eq(0xbd6bd6), 0x06: aes_out[i].eq(0xb16fde), 0x07: aes_out[i].eq(0x54c591), 0x08: aes_out[i].eq(0x503060), 0x09: aes_out[i].eq(0x030102), 0x0a: aes_out[i].eq(0xa967ce), 0x0b: aes_out[i].eq(0x7d2b56), 0x0c: aes_out[i].eq(0x19fee7), 0x0d: aes_out[i].eq(0x62d7b5), 0x0e: aes_out[i].eq(0xe6ab4d), 0x0f: aes_out[i].eq(0x9a76ec), 0x10: aes_out[i].eq(0x45ca8f), 0x11: aes_out[i].eq(0x9d821f), 0x12: aes_out[i].eq(0x40c989), 0x13: aes_out[i].eq(0x877dfa), 0x14: aes_out[i].eq(0x15faef), 0x15: aes_out[i].eq(0xeb59b2), 0x16: aes_out[i].eq(0xc9478e), 0x17: aes_out[i].eq(0x0bf0fb), 0x18: aes_out[i].eq(0xecad41), 0x19: aes_out[i].eq(0x67d4b3), 0x1a: aes_out[i].eq(0xfda25f), 0x1b: aes_out[i].eq(0xeaaf45), 0x1c: aes_out[i].eq(0xbf9c23), 0x1d: aes_out[i].eq(0xf7a453), 0x1e: aes_out[i].eq(0x9672e4), 0x1f: aes_out[i].eq(0x5bc09b), 0x20: aes_out[i].eq(0xc2b775), 0x21: aes_out[i].eq(0x1cfde1), 0x22: aes_out[i].eq(0xae933d), 0x23: aes_out[i].eq(0x6a264c), 0x24: aes_out[i].eq(0x5a366c), 0x25: aes_out[i].eq(0x413f7e), 0x26: aes_out[i].eq(0x02f7f5), 0x27: aes_out[i].eq(0x4fcc83), 0x28: aes_out[i].eq(0x5c3468), 0x29: aes_out[i].eq(0xf4a551), 0x2a: aes_out[i].eq(0x34e5d1), 0x2b: aes_out[i].eq(0x08f1f9), 0x2c: aes_out[i].eq(0x9371e2), 0x2d: aes_out[i].eq(0x73d8ab), 0x2e: aes_out[i].eq(0x533162), 0x2f: aes_out[i].eq(0x3f152a), 0x30: aes_out[i].eq(0x0c0408), 0x31: aes_out[i].eq(0x52c795), 0x32: aes_out[i].eq(0x652346), 0x33: aes_out[i].eq(0x5ec39d), 0x34: aes_out[i].eq(0x281830), 0x35: aes_out[i].eq(0xa19637), 0x36: aes_out[i].eq(0x0f050a), 0x37: aes_out[i].eq(0xb59a2f), 0x38: aes_out[i].eq(0x09070e), 0x39: aes_out[i].eq(0x361224), 0x3a: aes_out[i].eq(0x9b801b), 0x3b: aes_out[i].eq(0x3de2df), 0x3c: aes_out[i].eq(0x26ebcd), 0x3d: aes_out[i].eq(0x69274e), 0x3e: aes_out[i].eq(0xcdb27f), 0x3f: aes_out[i].eq(0x9f75ea), 0x40: aes_out[i].eq(0x1b0912), 0x41: aes_out[i].eq(0x9e831d), 0x42: aes_out[i].eq(0x742c58), 0x43: aes_out[i].eq(0x2e1a34), 0x44: aes_out[i].eq(0x2d1b36), 0x45: aes_out[i].eq(0xb26edc), 0x46: aes_out[i].eq(0xee5ab4), 0x47: aes_out[i].eq(0xfba05b), 0x48: aes_out[i].eq(0xf652a4), 0x49: aes_out[i].eq(0x4d3b76), 0x4a: aes_out[i].eq(0x61d6b7), 0x4b: aes_out[i].eq(0xceb37d), 0x4c: aes_out[i].eq(0x7b2952), 0x4d: aes_out[i].eq(0x3ee3dd), 0x4e: aes_out[i].eq(0x712f5e), 0x4f: aes_out[i].eq(0x978413), 0x50: aes_out[i].eq(0xf553a6), 0x51: aes_out[i].eq(0x68d1b9), 0x52: aes_out[i].eq(0x000000), 0x53: aes_out[i].eq(0x2cedc1), 0x54: aes_out[i].eq(0x602040), 0x55: aes_out[i].eq(0x1ffce3), 0x56: aes_out[i].eq(0xc8b179), 0x57: aes_out[i].eq(0xed5bb6), 0x58: aes_out[i].eq(0xbe6ad4), 0x59: aes_out[i].eq(0x46cb8d), 0x5a: aes_out[i].eq(0xd9be67), 0x5b: aes_out[i].eq(0x4b3972), 0x5c: aes_out[i].eq(0xde4a94), 0x5d: aes_out[i].eq(0xd44c98), 0x5e: aes_out[i].eq(0xe858b0), 0x5f: aes_out[i].eq(0x4acf85), 0x60: aes_out[i].eq(0x6bd0bb), 0x61: aes_out[i].eq(0x2aefc5), 0x62: aes_out[i].eq(0xe5aa4f), 0x63: aes_out[i].eq(0x16fbed), 0x64: aes_out[i].eq(0xc54386), 0x65: aes_out[i].eq(0xd74d9a), 0x66: aes_out[i].eq(0x553366), 0x67: aes_out[i].eq(0x948511), 0x68: aes_out[i].eq(0xcf458a), 0x69: aes_out[i].eq(0x10f9e9), 0x6a: aes_out[i].eq(0x060204), 0x6b: aes_out[i].eq(0x817ffe), 0x6c: aes_out[i].eq(0xf050a0), 0x6d: aes_out[i].eq(0x443c78), 0x6e: aes_out[i].eq(0xba9f25), 0x6f: aes_out[i].eq(0xe3a84b), 0x70: aes_out[i].eq(0xf351a2), 0x71: aes_out[i].eq(0xfea35d), 0x72: aes_out[i].eq(0xc04080), 0x73: aes_out[i].eq(0x8a8f05), 0x74: aes_out[i].eq(0xad923f), 0x75: aes_out[i].eq(0xbc9d21), 0x76: aes_out[i].eq(0x483870), 0x77: aes_out[i].eq(0x04f5f1), 0x78: aes_out[i].eq(0xdfbc63), 0x79: aes_out[i].eq(0xc1b677), 0x7a: aes_out[i].eq(0x75daaf), 0x7b: aes_out[i].eq(0x632142), 0x7c: aes_out[i].eq(0x301020), 0x7d: aes_out[i].eq(0x1affe5), 0x7e: aes_out[i].eq(0x0ef3fd), 0x7f: aes_out[i].eq(0x6dd2bf), 0x80: aes_out[i].eq(0x4ccd81), 0x81: aes_out[i].eq(0x140c18), 0x82: aes_out[i].eq(0x351326), 0x83: aes_out[i].eq(0x2fecc3), 0x84: aes_out[i].eq(0xe15fbe), 0x85: aes_out[i].eq(0xa29735), 0x86: aes_out[i].eq(0xcc4488), 0x87: aes_out[i].eq(0x39172e), 0x88: aes_out[i].eq(0x57c493), 0x89: aes_out[i].eq(0xf2a755), 0x8a: aes_out[i].eq(0x827efc), 0x8b: aes_out[i].eq(0x473d7a), 0x8c: aes_out[i].eq(0xac64c8), 0x8d: aes_out[i].eq(0xe75dba), 0x8e: aes_out[i].eq(0x2b1932), 0x8f: aes_out[i].eq(0x9573e6), 0x90: aes_out[i].eq(0xa060c0), 0x91: aes_out[i].eq(0x988119), 0x92: aes_out[i].eq(0xd14f9e), 0x93: aes_out[i].eq(0x7fdca3), 0x94: aes_out[i].eq(0x662244), 0x95: aes_out[i].eq(0x7e2a54), 0x96: aes_out[i].eq(0xab903b), 0x97: aes_out[i].eq(0x83880b), 0x98: aes_out[i].eq(0xca468c), 0x99: aes_out[i].eq(0x29eec7), 0x9a: aes_out[i].eq(0xd3b86b), 0x9b: aes_out[i].eq(0x3c1428), 0x9c: aes_out[i].eq(0x79dea7), 0x9d: aes_out[i].eq(0xe25ebc), 0x9e: aes_out[i].eq(0x1d0b16), 0x9f: aes_out[i].eq(0x76dbad), 0xa0: aes_out[i].eq(0x3be0db), 0xa1: aes_out[i].eq(0x563264), 0xa2: aes_out[i].eq(0x4e3a74), 0xa3: aes_out[i].eq(0x1e0a14), 0xa4: aes_out[i].eq(0xdb4992), 0xa5: aes_out[i].eq(0x0a060c), 0xa6: aes_out[i].eq(0x6c2448), 0xa7: aes_out[i].eq(0xe45cb8), 0xa8: aes_out[i].eq(0x5dc29f), 0xa9: aes_out[i].eq(0x6ed3bd), 0xaa: aes_out[i].eq(0xefac43), 0xab: aes_out[i].eq(0xa662c4), 0xac: aes_out[i].eq(0xa89139), 0xad: aes_out[i].eq(0xa49531), 0xae: aes_out[i].eq(0x37e4d3), 0xaf: aes_out[i].eq(0x8b79f2), 0xb0: aes_out[i].eq(0x32e7d5), 0xb1: aes_out[i].eq(0x43c88b), 0xb2: aes_out[i].eq(0x59376e), 0xb3: aes_out[i].eq(0xb76dda), 0xb4: aes_out[i].eq(0x8c8d01), 0xb5: aes_out[i].eq(0x64d5b1), 0xb6: aes_out[i].eq(0xd24e9c), 0xb7: aes_out[i].eq(0xe0a949), 0xb8: aes_out[i].eq(0xb46cd8), 0xb9: aes_out[i].eq(0xfa56ac), 0xba: aes_out[i].eq(0x07f4f3), 0xbb: aes_out[i].eq(0x25eacf), 0xbc: aes_out[i].eq(0xaf65ca), 0xbd: aes_out[i].eq(0x8e7af4), 0xbe: aes_out[i].eq(0xe9ae47), 0xbf: aes_out[i].eq(0x180810), 0xc0: aes_out[i].eq(0xd5ba6f), 0xc1: aes_out[i].eq(0x8878f0), 0xc2: aes_out[i].eq(0x6f254a), 0xc3: aes_out[i].eq(0x722e5c), 0xc4: aes_out[i].eq(0x241c38), 0xc5: aes_out[i].eq(0xf1a657), 0xc6: aes_out[i].eq(0xc7b473), 0xc7: aes_out[i].eq(0x51c697), 0xc8: aes_out[i].eq(0x23e8cb), 0xc9: aes_out[i].eq(0x7cdda1), 0xca: aes_out[i].eq(0x9c74e8), 0xcb: aes_out[i].eq(0x211f3e), 0xcc: aes_out[i].eq(0xdd4b96), 0xcd: aes_out[i].eq(0xdcbd61), 0xce: aes_out[i].eq(0x868b0d), 0xcf: aes_out[i].eq(0x858a0f), 0xd0: aes_out[i].eq(0x9070e0), 0xd1: aes_out[i].eq(0x423e7c), 0xd2: aes_out[i].eq(0xc4b571), 0xd3: aes_out[i].eq(0xaa66cc), 0xd4: aes_out[i].eq(0xd84890), 0xd5: aes_out[i].eq(0x050306), 0xd6: aes_out[i].eq(0x01f6f7), 0xd7: aes_out[i].eq(0x120e1c), 0xd8: aes_out[i].eq(0xa361c2), 0xd9: aes_out[i].eq(0x5f356a), 0xda: aes_out[i].eq(0xf957ae), 0xdb: aes_out[i].eq(0xd0b969), 0xdc: aes_out[i].eq(0x918617), 0xdd: aes_out[i].eq(0x58c199), 0xde: aes_out[i].eq(0x271d3a), 0xdf: aes_out[i].eq(0xb99e27), 0xe0: aes_out[i].eq(0x38e1d9), 0xe1: aes_out[i].eq(0x13f8eb), 0xe2: aes_out[i].eq(0xb3982b), 0xe3: aes_out[i].eq(0x331122), 0xe4: aes_out[i].eq(0xbb69d2), 0xe5: aes_out[i].eq(0x70d9a9), 0xe6: aes_out[i].eq(0x898e07), 0xe7: aes_out[i].eq(0xa79433), 0xe8: aes_out[i].eq(0xb69b2d), 0xe9: aes_out[i].eq(0x221e3c), 0xea: aes_out[i].eq(0x928715), 0xeb: aes_out[i].eq(0x20e9c9), 0xec: aes_out[i].eq(0x49ce87), 0xed: aes_out[i].eq(0xff55aa), 0xee: aes_out[i].eq(0x782850), 0xef: aes_out[i].eq(0x7adfa5), 0xf0: aes_out[i].eq(0x8f8c03), 0xf1: aes_out[i].eq(0xf8a159), 0xf2: aes_out[i].eq(0x808909), 0xf3: aes_out[i].eq(0x170d1a), 0xf4: aes_out[i].eq(0xdabf65), 0xf5: aes_out[i].eq(0x31e6d7), 0xf6: aes_out[i].eq(0xc64284), 0xf7: aes_out[i].eq(0xb868d0), 0xf8: aes_out[i].eq(0xc34182), 0xf9: aes_out[i].eq(0xb09929), 0xfa: aes_out[i].eq(0x772d5a), 0xfb: aes_out[i].eq(0x110f1e), 0xfc: aes_out[i].eq(0xcbb07b), 0xfd: aes_out[i].eq(0xfc54a8), 0xfe: aes_out[i].eq(0xd6bb6d), 0xff: aes_out[i].eq(0x3a162c) } ) + self.sync.mul_clk += Case(aes_in[i], { 0x00: aes_out[i].eq(0xa563c6), 0x01: aes_out[i].eq(0x847cf8), 0x02: aes_out[i].eq(0x9977ee), 0x03: aes_out[i].eq(0x8d7bf6), 0x04: aes_out[i].eq(0x0df2ff), 0x05: aes_out[i].eq(0xbd6bd6), 0x06: aes_out[i].eq(0xb16fde), 0x07: aes_out[i].eq(0x54c591), 0x08: aes_out[i].eq(0x503060), 0x09: aes_out[i].eq(0x030102), 0x0a: aes_out[i].eq(0xa967ce), 0x0b: aes_out[i].eq(0x7d2b56), 0x0c: aes_out[i].eq(0x19fee7), 0x0d: aes_out[i].eq(0x62d7b5), 0x0e: aes_out[i].eq(0xe6ab4d), 0x0f: aes_out[i].eq(0x9a76ec), 0x10: aes_out[i].eq(0x45ca8f), 0x11: aes_out[i].eq(0x9d821f), 0x12: aes_out[i].eq(0x40c989), 0x13: aes_out[i].eq(0x877dfa), 0x14: aes_out[i].eq(0x15faef), 0x15: aes_out[i].eq(0xeb59b2), 0x16: aes_out[i].eq(0xc9478e), 0x17: aes_out[i].eq(0x0bf0fb), 0x18: aes_out[i].eq(0xecad41), 0x19: aes_out[i].eq(0x67d4b3), 0x1a: aes_out[i].eq(0xfda25f), 0x1b: aes_out[i].eq(0xeaaf45), 0x1c: aes_out[i].eq(0xbf9c23), 0x1d: aes_out[i].eq(0xf7a453), 0x1e: aes_out[i].eq(0x9672e4), 0x1f: aes_out[i].eq(0x5bc09b), 0x20: aes_out[i].eq(0xc2b775), 0x21: aes_out[i].eq(0x1cfde1), 0x22: aes_out[i].eq(0xae933d), 0x23: aes_out[i].eq(0x6a264c), 0x24: aes_out[i].eq(0x5a366c), 0x25: aes_out[i].eq(0x413f7e), 0x26: aes_out[i].eq(0x02f7f5), 0x27: aes_out[i].eq(0x4fcc83), 0x28: aes_out[i].eq(0x5c3468), 0x29: aes_out[i].eq(0xf4a551), 0x2a: aes_out[i].eq(0x34e5d1), 0x2b: aes_out[i].eq(0x08f1f9), 0x2c: aes_out[i].eq(0x9371e2), 0x2d: aes_out[i].eq(0x73d8ab), 0x2e: aes_out[i].eq(0x533162), 0x2f: aes_out[i].eq(0x3f152a), 0x30: aes_out[i].eq(0x0c0408), 0x31: aes_out[i].eq(0x52c795), 0x32: aes_out[i].eq(0x652346), 0x33: aes_out[i].eq(0x5ec39d), 0x34: aes_out[i].eq(0x281830), 0x35: aes_out[i].eq(0xa19637), 0x36: aes_out[i].eq(0x0f050a), 0x37: aes_out[i].eq(0xb59a2f), 0x38: aes_out[i].eq(0x09070e), 0x39: aes_out[i].eq(0x361224), 0x3a: aes_out[i].eq(0x9b801b), 0x3b: aes_out[i].eq(0x3de2df), 0x3c: aes_out[i].eq(0x26ebcd), 0x3d: aes_out[i].eq(0x69274e), 0x3e: aes_out[i].eq(0xcdb27f), 0x3f: aes_out[i].eq(0x9f75ea), 0x40: aes_out[i].eq(0x1b0912), 0x41: aes_out[i].eq(0x9e831d), 0x42: aes_out[i].eq(0x742c58), 0x43: aes_out[i].eq(0x2e1a34), 0x44: aes_out[i].eq(0x2d1b36), 0x45: aes_out[i].eq(0xb26edc), 0x46: aes_out[i].eq(0xee5ab4), 0x47: aes_out[i].eq(0xfba05b), 0x48: aes_out[i].eq(0xf652a4), 0x49: aes_out[i].eq(0x4d3b76), 0x4a: aes_out[i].eq(0x61d6b7), 0x4b: aes_out[i].eq(0xceb37d), 0x4c: aes_out[i].eq(0x7b2952), 0x4d: aes_out[i].eq(0x3ee3dd), 0x4e: aes_out[i].eq(0x712f5e), 0x4f: aes_out[i].eq(0x978413), 0x50: aes_out[i].eq(0xf553a6), 0x51: aes_out[i].eq(0x68d1b9), 0x52: aes_out[i].eq(0x000000), 0x53: aes_out[i].eq(0x2cedc1), 0x54: aes_out[i].eq(0x602040), 0x55: aes_out[i].eq(0x1ffce3), 0x56: aes_out[i].eq(0xc8b179), 0x57: aes_out[i].eq(0xed5bb6), 0x58: aes_out[i].eq(0xbe6ad4), 0x59: aes_out[i].eq(0x46cb8d), 0x5a: aes_out[i].eq(0xd9be67), 0x5b: aes_out[i].eq(0x4b3972), 0x5c: aes_out[i].eq(0xde4a94), 0x5d: aes_out[i].eq(0xd44c98), 0x5e: aes_out[i].eq(0xe858b0), 0x5f: aes_out[i].eq(0x4acf85), 0x60: aes_out[i].eq(0x6bd0bb), 0x61: aes_out[i].eq(0x2aefc5), 0x62: aes_out[i].eq(0xe5aa4f), 0x63: aes_out[i].eq(0x16fbed), 0x64: aes_out[i].eq(0xc54386), 0x65: aes_out[i].eq(0xd74d9a), 0x66: aes_out[i].eq(0x553366), 0x67: aes_out[i].eq(0x948511), 0x68: aes_out[i].eq(0xcf458a), 0x69: aes_out[i].eq(0x10f9e9), 0x6a: aes_out[i].eq(0x060204), 0x6b: aes_out[i].eq(0x817ffe), 0x6c: aes_out[i].eq(0xf050a0), 0x6d: aes_out[i].eq(0x443c78), 0x6e: aes_out[i].eq(0xba9f25), 0x6f: aes_out[i].eq(0xe3a84b), 0x70: aes_out[i].eq(0xf351a2), 0x71: aes_out[i].eq(0xfea35d), 0x72: aes_out[i].eq(0xc04080), 0x73: aes_out[i].eq(0x8a8f05), 0x74: aes_out[i].eq(0xad923f), 0x75: aes_out[i].eq(0xbc9d21), 0x76: aes_out[i].eq(0x483870), 0x77: aes_out[i].eq(0x04f5f1), 0x78: aes_out[i].eq(0xdfbc63), 0x79: aes_out[i].eq(0xc1b677), 0x7a: aes_out[i].eq(0x75daaf), 0x7b: aes_out[i].eq(0x632142), 0x7c: aes_out[i].eq(0x301020), 0x7d: aes_out[i].eq(0x1affe5), 0x7e: aes_out[i].eq(0x0ef3fd), 0x7f: aes_out[i].eq(0x6dd2bf), 0x80: aes_out[i].eq(0x4ccd81), 0x81: aes_out[i].eq(0x140c18), 0x82: aes_out[i].eq(0x351326), 0x83: aes_out[i].eq(0x2fecc3), 0x84: aes_out[i].eq(0xe15fbe), 0x85: aes_out[i].eq(0xa29735), 0x86: aes_out[i].eq(0xcc4488), 0x87: aes_out[i].eq(0x39172e), 0x88: aes_out[i].eq(0x57c493), 0x89: aes_out[i].eq(0xf2a755), 0x8a: aes_out[i].eq(0x827efc), 0x8b: aes_out[i].eq(0x473d7a), 0x8c: aes_out[i].eq(0xac64c8), 0x8d: aes_out[i].eq(0xe75dba), 0x8e: aes_out[i].eq(0x2b1932), 0x8f: aes_out[i].eq(0x9573e6), 0x90: aes_out[i].eq(0xa060c0), 0x91: aes_out[i].eq(0x988119), 0x92: aes_out[i].eq(0xd14f9e), 0x93: aes_out[i].eq(0x7fdca3), 0x94: aes_out[i].eq(0x662244), 0x95: aes_out[i].eq(0x7e2a54), 0x96: aes_out[i].eq(0xab903b), 0x97: aes_out[i].eq(0x83880b), 0x98: aes_out[i].eq(0xca468c), 0x99: aes_out[i].eq(0x29eec7), 0x9a: aes_out[i].eq(0xd3b86b), 0x9b: aes_out[i].eq(0x3c1428), 0x9c: aes_out[i].eq(0x79dea7), 0x9d: aes_out[i].eq(0xe25ebc), 0x9e: aes_out[i].eq(0x1d0b16), 0x9f: aes_out[i].eq(0x76dbad), 0xa0: aes_out[i].eq(0x3be0db), 0xa1: aes_out[i].eq(0x563264), 0xa2: aes_out[i].eq(0x4e3a74), 0xa3: aes_out[i].eq(0x1e0a14), 0xa4: aes_out[i].eq(0xdb4992), 0xa5: aes_out[i].eq(0x0a060c), 0xa6: aes_out[i].eq(0x6c2448), 0xa7: aes_out[i].eq(0xe45cb8), 0xa8: aes_out[i].eq(0x5dc29f), 0xa9: aes_out[i].eq(0x6ed3bd), 0xaa: aes_out[i].eq(0xefac43), 0xab: aes_out[i].eq(0xa662c4), 0xac: aes_out[i].eq(0xa89139), 0xad: aes_out[i].eq(0xa49531), 0xae: aes_out[i].eq(0x37e4d3), 0xaf: aes_out[i].eq(0x8b79f2), 0xb0: aes_out[i].eq(0x32e7d5), 0xb1: aes_out[i].eq(0x43c88b), 0xb2: aes_out[i].eq(0x59376e), 0xb3: aes_out[i].eq(0xb76dda), 0xb4: aes_out[i].eq(0x8c8d01), 0xb5: aes_out[i].eq(0x64d5b1), 0xb6: aes_out[i].eq(0xd24e9c), 0xb7: aes_out[i].eq(0xe0a949), 0xb8: aes_out[i].eq(0xb46cd8), 0xb9: aes_out[i].eq(0xfa56ac), 0xba: aes_out[i].eq(0x07f4f3), 0xbb: aes_out[i].eq(0x25eacf), 0xbc: aes_out[i].eq(0xaf65ca), 0xbd: aes_out[i].eq(0x8e7af4), 0xbe: aes_out[i].eq(0xe9ae47), 0xbf: aes_out[i].eq(0x180810), 0xc0: aes_out[i].eq(0xd5ba6f), 0xc1: aes_out[i].eq(0x8878f0), 0xc2: aes_out[i].eq(0x6f254a), 0xc3: aes_out[i].eq(0x722e5c), 0xc4: aes_out[i].eq(0x241c38), 0xc5: aes_out[i].eq(0xf1a657), 0xc6: aes_out[i].eq(0xc7b473), 0xc7: aes_out[i].eq(0x51c697), 0xc8: aes_out[i].eq(0x23e8cb), 0xc9: aes_out[i].eq(0x7cdda1), 0xca: aes_out[i].eq(0x9c74e8), 0xcb: aes_out[i].eq(0x211f3e), 0xcc: aes_out[i].eq(0xdd4b96), 0xcd: aes_out[i].eq(0xdcbd61), 0xce: aes_out[i].eq(0x868b0d), 0xcf: aes_out[i].eq(0x858a0f), 0xd0: aes_out[i].eq(0x9070e0), 0xd1: aes_out[i].eq(0x423e7c), 0xd2: aes_out[i].eq(0xc4b571), 0xd3: aes_out[i].eq(0xaa66cc), 0xd4: aes_out[i].eq(0xd84890), 0xd5: aes_out[i].eq(0x050306), 0xd6: aes_out[i].eq(0x01f6f7), 0xd7: aes_out[i].eq(0x120e1c), 0xd8: aes_out[i].eq(0xa361c2), 0xd9: aes_out[i].eq(0x5f356a), 0xda: aes_out[i].eq(0xf957ae), 0xdb: aes_out[i].eq(0xd0b969), 0xdc: aes_out[i].eq(0x918617), 0xdd: aes_out[i].eq(0x58c199), 0xde: aes_out[i].eq(0x271d3a), 0xdf: aes_out[i].eq(0xb99e27), 0xe0: aes_out[i].eq(0x38e1d9), 0xe1: aes_out[i].eq(0x13f8eb), 0xe2: aes_out[i].eq(0xb3982b), 0xe3: aes_out[i].eq(0x331122), 0xe4: aes_out[i].eq(0xbb69d2), 0xe5: aes_out[i].eq(0x70d9a9), 0xe6: aes_out[i].eq(0x898e07), 0xe7: aes_out[i].eq(0xa79433), 0xe8: aes_out[i].eq(0xb69b2d), 0xe9: aes_out[i].eq(0x221e3c), 0xea: aes_out[i].eq(0x928715), 0xeb: aes_out[i].eq(0x20e9c9), 0xec: aes_out[i].eq(0x49ce87), 0xed: aes_out[i].eq(0xff55aa), 0xee: aes_out[i].eq(0x782850), 0xef: aes_out[i].eq(0x7adfa5), 0xf0: aes_out[i].eq(0x8f8c03), 0xf1: aes_out[i].eq(0xf8a159), 0xf2: aes_out[i].eq(0x808909), 0xf3: aes_out[i].eq(0x170d1a), 0xf4: aes_out[i].eq(0xdabf65), 0xf5: aes_out[i].eq(0x31e6d7), 0xf6: aes_out[i].eq(0xc64284), 0xf7: aes_out[i].eq(0xb868d0), 0xf8: aes_out[i].eq(0xc34182), 0xf9: aes_out[i].eq(0xb09929), 0xfa: aes_out[i].eq(0x772d5a), 0xfb: aes_out[i].eq(0x110f1e), 0xfc: aes_out[i].eq(0xcbb07b), 0xfd: aes_out[i].eq(0xfc54a8), 0xfe: aes_out[i].eq(0xd6bb6d), 0xff: aes_out[i].eq(0x3a162c) } ) self.sync.eng_clk += [ #self.q_valid.eq(self.start), self.instruction_out.eq(self.instruction_in), ] - self.submodules.seq = seq = ClockDomainsRenamer("eng_clk")(FSM(reset_state="IDLE")) + start_pipe = Signal() + self.sync.mul_clk += start_pipe.eq(self.start) # break critical path of instruction decode -> SETUP_A state muxes + + self.submodules.seq = seq = ClockDomainsRenamer("mul_clk")(FSM(reset_state="IDLE")) seq.act("IDLE", - If(self.start, + If(start_pipe, # put the first byte in the lookup tables (LANE1) # [ NextValue(aes_in[i], self.a[32*i:32*i+8]) for i in range(0, 4) ], NextValue(aes_in[0], self.a[ 0: 8]), @@ -1741,11 +1744,20 @@ class ExecAES(ExecUnit, AutoDoc): NextValue(aes_buf[192:224], aes_buf[192:224] ^ Cat(Signal(24, reset = 0), aes_out[2][ 8:16])), NextValue(aes_buf[224:256], aes_buf[224:256] ^ Cat(Signal(24, reset = 0), aes_out[3][ 8:16]))], }), - NextState("OUT")) - seq.act("OUT", + NextState("AES_EVEN1")) + seq.act("AES_EVEN1", + NextState("AES_EVEN2")) + seq.act("AES_EVEN2", + NextState("IDLE")) + + self.sync.mul_clk += [ + If(seq.ongoing("AES_EVEN1") | seq.ongoing("AES_EVEN2"), self.q_valid.eq(1), self.q.eq(aes_buf), - NextState("IDLE")) + ).Else( + self.q_valid.eq(0), + ) + ] class ExecLS(ExecUnit, AutoDoc): def __init__(self, width=256, interface=None): From e710b6b2ffc1f3c3f49d1860a4632d37a57b0681 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 5 Sep 2021 09:56:14 -0400 Subject: [PATCH 78/78] option to disable upper lane in AES/GCM isntructions; disable them in the code --- .../sys/dev/sbus/sbusfpga_curve25519engine.c | 62 ++++++++----------- sbus-to-ztex-gateware-migen/engine.py | 53 +++++++++++----- .../engine_code/engine_code.rs | 28 ++++----- 3 files changed, 76 insertions(+), 67 deletions(-) diff --git a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c index acdeaef..1f623d6 100644 --- a/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c +++ b/NetBSD/9.0/usr/src/sys/dev/sbus/sbusfpga_curve25519engine.c @@ -173,9 +173,9 @@ static const uint32_t program_aes[16] = {0x0001f003,0x0005e012,0x0001d052,0x0005 static const uint32_t program_gcm_pfx[30] = {0x01400411,0x00080840,0x00040800,0x0001f043,0x0005e012,0x0001d052,0x0005c012,0x0001b052,0x0005a012,0x00019052,0x00058012,0x00017052,0x00056012,0x00015052,0x00054012,0x00013052,0x00052012,0x00811052,0x03800089,0x003c0000,0x01400411,0x0042b405,0x01400411,0x00080800,0x00040400,0xf4800809,0x00380000,0x01bc03d1,0x003cf3d1,0x00340800 }; -static const uint32_t program_gcm_ad[29] = {0x0d800309,0x000000d3,0x01800011,0x00000011,0x0000d003,0x000ec0c5,0x0032d306,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x00800309,0xf2800809,0x0000000a }; +static const uint32_t program_gcm_ad[29] = {0x0d800309,0x000000d3,0x01800011,0x00000011,0x0000d003,0x000f00c5,0x00321306,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x00800309,0xf2800809,0x0000000a }; -static const uint32_t program_gcm_aes[50] = {0x18000309,0x01400411,0x0042b405,0x01400411,0x0001f403,0x0005e012,0x0001d052,0x0005c012,0x0001b052,0x0005a012,0x00019052,0x00058012,0x00017052,0x00056012,0x00015052,0x00054012,0x00013052,0x00052012,0x00851052,0x000000d3,0x00001003,0x00ac02d3,0x01800011,0x00000011,0x0000d003,0x000ec0c5,0x002ec2c5,0x0032d306,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x00800309,0xe8000809,0x0000000a }; +static const uint32_t program_gcm_aes[50] = {0x18000309,0x01400411,0x0042b405,0x01400411,0x0001f403,0x0005e012,0x0001d052,0x0005c012,0x0001b052,0x0005a012,0x00019052,0x00058012,0x00017052,0x00056012,0x00015052,0x00054012,0x00013052,0x00052012,0x00851052,0x000000d3,0x00001003,0x00ac02d3,0x01800011,0x00000011,0x0000d003,0x000f00c5,0x002f02c5,0x00321306,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x00800309,0xe8000809,0x0000000a }; static const uint32_t program_gcm_finish[71] = {0x16000309,0x01400411,0x0042b405,0x01400411,0x0001f403,0x0005e012,0x0001d052,0x0005c012,0x0001b052,0x0005a012,0x00019052,0x00058012,0x00017052,0x00056012,0x00015052,0x00054012,0x00013052,0x00052012,0x00851052,0x0004a054,0x000000d3,0x00001003,0x00ac02d3,0x01800011,0x00000011,0x0000d003,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x01a40251,0x00249251,0x0000d243,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x01b40351,0x0034d351,0x0020e343,0x0000000a }; @@ -532,25 +532,20 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st curve25519engine_window_write(sc, unit); /* to each session its own register file */ /* read_addr */ - for (i = 0 ; i < 8 ; i ++) { - /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */ - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); + for (i = 0 ; i < 4 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), (i == 0) ? ((uint32_t)rd_ptr) : 0); } - /* write_addr */ - /* for (i = 0 ; i < 8 ; i ++) { */ - /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(4,i), ((i & 3) == 0) ? ((uint32_t)wr_ptr) : 0); */ - /* } */ /* write_len */ - for (i = 0 ; i < 8 ; i ++) { - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i&3) == 0) ? ((uint32_t)job->len) : 0); + for (i = 0 ; i < 8 ; i ++) { // all the way to 8 to make sure we have zero in every bit checked by BRZ + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), (i == 0) ? ((uint32_t)job->len) : 0); } /* data */ - for (i = 0 ; i < 8 ; i ++) { + for (i = 0 ; i < 4 ; i ++) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(16,i), job->data[i]); } for (reg = 31 ; reg > 16 ; reg--) { - for (i = 0 ; i < 8 ; i ++) { - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i), job->keys[(i&3)+4*(31-reg)]); + for (i = 0 ; i < 4 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i), job->keys[i+4*(31-reg)]); } } @@ -597,13 +592,12 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st curve25519engine_window_write(sc, unit); /* to each session its own register file */ /* read_addr */ - for (i = 0 ; i < 8 ; i ++) { - /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */ - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); + for (i = 0 ; i < 4 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), (i == 0) ? ((uint32_t)rd_ptr) : 0); } /* write_len */ - for (i = 0 ; i < 8 ; i ++) { - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i & 3) == 0) ? ((uint32_t)job->len) : 0); + for (i = 0 ; i < 8 ; i ++) { // all the way to 8 to make sure we have zero in every bit checked by BRZ + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), (i == 0) ? ((uint32_t)job->len) : 0); } err = start_job(sc); @@ -650,17 +644,16 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st curve25519engine_window_write(sc, unit); /* to each session its own register file */ /* read_addr */ - for (i = 0 ; i < 8 ; i ++) { - /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */ - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); + for (i = 0 ; i < 4 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), (i == 0) ? ((uint32_t)rd_ptr) : 0); } /* write_addr */ - for (i = 0 ; i < 8 ; i ++) { - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(11,i), ((i & 3) == 0) ? ((uint32_t)wr_ptr) : 0); + for (i = 0 ; i < 4 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(11,i), (i == 0) ? ((uint32_t)wr_ptr) : 0); } /* write_len */ - for (i = 0 ; i < 8 ; i ++) { - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i & 3) == 0) ? ((uint32_t)job->len) : 0); + for (i = 0 ; i < 8 ; i ++) { // all the way to 8 to make sure we have zero in every bit checked by BRZ + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), (i == 0) ? ((uint32_t)job->len) : 0); } err = start_job(sc); @@ -707,22 +700,20 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st curve25519engine_window_write(sc, unit); /* to each session its own register file */ /* read_addr */ - for (i = 0 ; i < 8 ; i ++) { - /* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */ - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); + for (i = 0 ; i < 4 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), (i == 0) ? ((uint32_t)rd_ptr) : 0); } /* write_addr */ - for (i = 0 ; i < 8 ; i ++) { - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(11,i), ((i & 3) == 0) ? ((uint32_t)wr_ptr) : 0); + for (i = 0 ; i < 4 ; i ++) { + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(11,i), (i == 0) ? ((uint32_t)wr_ptr) : 0); } /* write_len */ - for (i = 0 ; i < 8 ; i ++) { - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i & 3) == 0) ? ((uint32_t)job->len) : 0); + for (i = 0 ; i < 8 ; i ++) { // all the way to 8 to make sure we have zero in every bit checked by BRZ + bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), (i == 0) ? ((uint32_t)job->len) : 0); } /* final block */ for (i = 0 ; i < 4 ; i ++) { bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(9,i), job->data[i]); - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(9,i+4), job->data[i]); } /* create and generate MMASK */ for (i = 0 ; i < 4 ; i ++) { @@ -736,7 +727,6 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st mask = 0xFFFFFFFF >> (8*(4-(job->len%4))); } bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(10,i), mask); - bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(10,(i+4)), mask); } @@ -749,7 +739,7 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st return err; /* final accum */ - for (i = 0 ; i < 8 ; i ++) { + for (i = 0 ; i < 4 ; i ++) { job->data[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(8,i)); } diff --git a/sbus-to-ztex-gateware-migen/engine.py b/sbus-to-ztex-gateware-migen/engine.py index cacf51c..d18d232 100644 --- a/sbus-to-ztex-gateware-migen/engine.py +++ b/sbus-to-ztex-gateware-migen/engine.py @@ -25,14 +25,17 @@ opcodes = { # mnemonic : [bit coding, docstring] "FIN" : [10, "halt execution and assert interrupt to host CPU that microcode execution is done"], "SHL" : [11, "Wd $\gets$ Ra << 1 // shift Ra left by one and store in Wd"], "XBT" : [12, "Wd[0] $\gets$ Ra[254] // extract the 255th bit of Ra and put it into the 0th bit of Wd"], + "AND" : [20, "Wd $\gets$ Ra & Rb // bitwise AND"], + # for CLMUL, bit #31 indicates both lanes are needed; currently same speed "CLMUL": [13, "carry-less multiplication; reg-reg only; per 128-bits block"], # basically 256-bits form of vpclmulqdq "GCM_SHLMI": [14, "Shift A left by imm, insert B MSB as dest LSB; reg-reg or reg-imm; per 128-bits block"], # make SHL redundant: SHL %rd, %ra == GCM_SHLMI %rd, %ra, #0, #1 "GCM_SHRMI": [15, "Shift A right by imm, insert B LSB as dest MSB; reg-reg or reg-imm; per 128-bits block"], # "GCM_CMPD": [16, "Compute D:X0 from X1:X0; reg ; per 128-bits block"], # specific "GCM_SWAP64": [17, "Swap doubleword (64 bits) ; reg-reg or imm-reg or reg-imm; per 128-bits block ; imm != 0 -> BYTEREV*"], # + # for AESESMI, bit #31 indicates both lanes are needed; currently same speed "AESESMI" : [18, "AES ; reg-reg ; per 128-bits block; imm[0] is 1 for aesesi (shared opcode)" ], + # for MEM, bit #31 indicates both lanes are needed; b[31] == 0 faster as the second access is not done "MEM" : [19, "MEM ; imm[0] == 0 for LOAD, imm[0] == 1 for STORE (beware, store copy the address in the output reg)" ], - "AND" : [20, "Wd $\gets$ Ra & Rb // bitwise AND"], "MAX" : [21, "Maximum opcode number (for bounds checking)"], } @@ -239,7 +242,10 @@ class Curve25519Const(Module, AutoDoc): 10: [254, "two hundred fifty four", "The number 254 (iteration count)"], 11: [0x00000001_00000000_00000000_00000000_00000001_00000000_00000000_00000000, "increment for GCM counter (LE)", "increment for GCM counter (LE)"], 12: [0x00000000_00000000_00000000_00000010_00000000_00000000_00000000_00000010, "sixteen (twice)", "The number 16 (for block-size address increment)"], - 13: [0x00000000_00000000_00000000_00000001_00000000_00000000_00000000_00000001, "decrement for GCM dual-loops (LE)", "decrement for GCM dual-loops"] + 13: [0x00000000_00000000_00000000_00000001_00000000_00000000_00000000_00000001, "decrement for GCM dual-loops (LE)", "decrement for GCM dual-loops"], + # 14 + # 15 + 16: [16, "sixteen", "The number 16"], } self.adr = Signal(5) self.const = Signal(256) @@ -1462,7 +1468,6 @@ class ExecClmul(ExecUnit, AutoDoc): #self.q_valid.eq(self.start), self.instruction_out.eq(self.instruction_in), ] - self.submodules.seq = seq = ClockDomainsRenamer("eng_clk")(FSM(reset_state="IDLE")) seq.act("IDLE", @@ -1494,9 +1499,12 @@ class ExecClmul(ExecUnit, AutoDoc): })) seq.act("OUT", self.q_valid.eq(1), - self.q.eq(clmul_buf), - NextState("IDLE"), - ); + If(self.instruction.immediate[8:9], + self.q.eq(clmul_buf), + ).Else( + self.q.eq(Cat(clmul_buf[0:128], Signal(128, reset = 0))) + ), + NextState("IDLE")); class ExecGCMShifts(ExecUnit, AutoDoc): @@ -1753,7 +1761,11 @@ class ExecAES(ExecUnit, AutoDoc): self.sync.mul_clk += [ If(seq.ongoing("AES_EVEN1") | seq.ongoing("AES_EVEN2"), self.q_valid.eq(1), - self.q.eq(aes_buf), + If(self.instruction.immediate[8:9], + self.q.eq(aes_buf), + ).Else( + self.q.eq(Cat(aes_buf[0:128], Signal(128, reset = 0))), + ) ).Else( self.q_valid.eq(0), ) @@ -1824,15 +1836,24 @@ class ExecLS(ExecUnit, AutoDoc): lsseq.act("MEMl2", NextValue(cpar, cpar ^ 1), If(~interface.ack, - NextValue(interface.cyc, 1), - NextValue(interface.stb, 1), - NextValue(interface.sel, 2**len(interface.sel)-1), - NextValue(interface.adr, self.a[132:160]), - NextValue(interface.we, self.instruction.immediate[0]), - NextValue(timeout, 2047), - If(self.instruction.immediate[0], - NextValue(interface.dat_w, self.b[128:256])), - NextState("MEMh") + If(self.instruction.immediate[8:9], + NextValue(interface.cyc, 1), + NextValue(interface.stb, 1), + NextValue(interface.sel, 2**len(interface.sel)-1), + NextValue(interface.adr, self.a[132:160]), + NextValue(interface.we, self.instruction.immediate[0]), + NextValue(timeout, 2047), + If(self.instruction.immediate[0], + NextValue(interface.dat_w, self.b[128:256])), + NextState("MEMh") + ).Else( + NextValue(lbuf[128:256], 0), + If(cpar, ## checkme + NextState("MEM_ODD") + ).Else( + NextState("MEM_EVEN1") + ) + ) )) lsseq.act("MEMh", NextValue(cpar, cpar ^ 1), diff --git a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs index 79c9da0..42974be 100644 --- a/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs +++ b/sbus-to-ztex-gateware-migen/engine_code/engine_code.rs @@ -748,9 +748,8 @@ fn main() -> std::io::Result<()> { gcm_swap64 %0, %0, %0 xor %0, %0, %13 - add %3, %3, #12 // #12 is 16 in both 128 bits halves - // #13 is 1 in both 128 bits halves - sub %12, %12, #13 + add %3, %3, #16 + sub %12, %12, #1 // // poly mult accum = ((accum^ad) * H) // C @@ -865,10 +864,10 @@ fn main() -> std::io::Result<()> { gcm_swap64 %0, %0, %0 xor %0, %0, %13 - add %3, %3, #12 // #12 is 16 in both 128 bits halves - add %11, %11, #12 // #12 is 16 in both 128 bits halves - // #13 is 1 in both 128 bits halves - sub %12, %12, #13 + add %3, %3, #16 + add %11, %11, #16 + + sub %12, %12, #1 // // poly mult accum = ((accum^ad) * H) // C @@ -986,10 +985,10 @@ fn main() -> std::io::Result<()> { gcm_swap64 %0, %0, %0 xor %0, %0, %13 - //add %3, %3, #12 // #12 is 16 in both 128 bits halves - //add %11, %11, #12 // #12 is 16 in both 128 bits halves - // #13 is 1 in both 128 bits halves - //sub %12, %12, #13 + //add %3, %3, #16 + //add %11, %11, #16 + + //sub %12, %12, #1 // // poly mult accum = ((accum^ad) * H) // C @@ -1039,10 +1038,9 @@ fn main() -> std::io::Result<()> { gcm_brev64 %9, %9 gcm_swap64 %9, %9, %9 xor %0, %9, %13 - //add %3, %3, #12 // #12 is 16 in both 128 bits halves - //add %11, %11, #12 // #12 is 16 in both 128 bits halves - // #13 is 1 in both 128 bits halves - //sub %12, %12, #13 + //add %3, %3, #16 + //add %11, %11, #16 + //sub %12, %12, #1 // // poly mult accum = ((accum^ad) * H) // C