1
0
mirror of https://github.com/antonblanchard/microwatt.git synced 2026-01-11 23:43:15 +00:00
antonblanchard.microwatt/dmi_dtm_xilinx.vhdl
Benjamin Herrenschmidt ee52fd4d80 Add a debug (DMI) bus and a JTAG interface to it on Xilinx FPGAs
This adds a simple bus that can be mastered from an external
system via JTAG, which will be used to hookup various debug
modules.

It's loosely based on the RiscV model (hence the DMI name).

The module currently only supports hooking up to a Xilinx BSCANE2
but it shouldn't be too hard to adapt it to support different TAPs
if necessary.

The JTAG protocol proper is not exactly the RiscV one at this point,
though I might still change it.

This comes with some sim variants of Xilinx BSCANE2 and BUFG and a
test bench.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2019-09-20 15:07:49 +10:00

277 lines
8.4 KiB
VHDL

-- Xilinx internal JTAG to DMI interface
--
-- DMI bus
--
-- req : ____/------------\_____
-- addr: xxxx< >xxxxx
-- dout: xxxx< >xxxxx
-- wr : xxxx< >xxxxx
-- din : xxxxxxxxxxxx< >xxx
-- ack : ____________/------\___
--
-- * addr/dout set along with req, can be latched on same cycle by slave
-- * ack & din remain up until req is dropped by master, the slave must
-- provide a stable output on din on reads during that time.
-- * req remains low at until at least one sysclk after ack seen down.
--
-- JTAG (tck) DMI (sys_clk)
--
-- * jtag_req = 1
-- (jtag_req_0) *
-- (jtag_req_1) -> * dmi_req = 1 >
-- *.../...
-- * dmi_ack = 1 <
-- * (dmi_ack_0)
-- * <- (dmi_ack_1)
-- * jtag_req = 0 (and latch dmi_din)
-- (jtag_req_0) *
-- (jtag_req_1) -> * dmi_req = 0 >
-- * dmi_ack = 0 <
-- * (dmi_ack_0)
-- * <- (dmi_ack_1)
--
-- jtag_req can go back to 1 when jtag_rsp_1 is 0
--
-- Questions/TODO:
-- - I use 2 flip fops for sync, is that enough ?
-- - I treat the jtag_reset as an async reset, is that necessary ?
-- - Dbl check reset situation since we have two different resets
-- each only resetting part of the logic...
-- - Look at optionally removing the synchronizer on the ack path,
-- assuming JTAG is always slow enough that ack will have been
-- stable long enough by the time CAPTURE comes in.
-- - We could avoid the latched request by not shifting while a
-- request is in progress (and force TDO to 1 to return a busy
-- status).
--
-- WARNING: This isn't the real DMI JTAG protocol (at least not yet).
-- a command while busy will be ignored. A response of "11"
-- means the previous command is still going, try again.
-- As such We don't implement the DMI "error" status, and
-- we don't implement DTMCS yet... This may still all change
-- but for now it's easier that way as the real DMI protocol
-- requires for a command to work properly that enough TCK
-- are sent while IDLE and I'm having trouble getting that
-- working with UrJtag and the Xilinx BSCAN2 for now.
library ieee;
use ieee.std_logic_1164.all;
use ieee.math_real.all;
library work;
use work.wishbone_types.all;
library unisim;
use unisim.vcomponents.all;
entity dmi_dtm is
generic(ABITS : INTEGER:=8;
DBITS : INTEGER:=32);
port(sys_clk : in std_ulogic;
sys_reset : in std_ulogic;
dmi_addr : out std_ulogic_vector(ABITS - 1 downto 0);
dmi_din : in std_ulogic_vector(DBITS - 1 downto 0);
dmi_dout : out std_ulogic_vector(DBITS - 1 downto 0);
dmi_req : out std_ulogic;
dmi_wr : out std_ulogic;
dmi_ack : in std_ulogic
-- dmi_err : in std_ulogic TODO: Add error response
);
end entity dmi_dtm;
architecture behaviour of dmi_dtm is
-- Signals coming out of the BSCANE2 block
signal jtag_reset : std_ulogic;
signal capture : std_ulogic;
signal update : std_ulogic;
signal drck : std_ulogic;
signal jtag_clk : std_ulogic;
signal sel : std_ulogic;
signal shift : std_ulogic;
signal tdi : std_ulogic;
signal tdo : std_ulogic;
signal tck : std_ulogic;
-- ** JTAG clock domain **
-- Shift register
signal shiftr : std_ulogic_vector(ABITS + DBITS + 1 downto 0);
-- Latched request
signal request : std_ulogic_vector(ABITS + DBITS + 1 downto 0);
-- A request is present
signal jtag_req : std_ulogic;
-- Synchronizer for jtag_rsp (sys clk -> jtag_clk)
signal dmi_ack_0 : std_ulogic;
signal dmi_ack_1 : std_ulogic;
-- ** sys clock domain **
-- Synchronizer for jtag_req (jtag clk -> sys clk)
signal jtag_req_0 : std_ulogic;
signal jtag_req_1 : std_ulogic;
-- ** combination signals
signal jtag_bsy : std_ulogic;
signal op_valid : std_ulogic;
signal rsp_op : std_ulogic_vector(1 downto 0);
-- ** Constants **
constant DMI_REQ_NOP : std_ulogic_vector(1 downto 0) := "00";
constant DMI_REQ_RD : std_ulogic_vector(1 downto 0) := "01";
constant DMI_REQ_WR : std_ulogic_vector(1 downto 0) := "10";
constant DMI_RSP_OK : std_ulogic_vector(1 downto 0) := "00";
constant DMI_RSP_BSY : std_ulogic_vector(1 downto 0) := "11";
begin
-- Implement the Xilinx bscan2 for series 7 devices (TODO: use PoC to
-- wrap this if compatibility is required with older devices).
bscan : BSCANE2
generic map (
JTAG_CHAIN => 2
)
port map (
CAPTURE => capture,
DRCK => drck,
RESET => jtag_reset,
RUNTEST => open,
SEL => sel,
SHIFT => shift,
TCK => tck,
TDI => tdi,
TMS => open,
UPDATE => update,
TDO => tdo
);
-- Some examples out there suggest buffering the clock so it's
-- treated as a proper clock net. This is probably needed when using
-- drck (the gated clock) but I'm using the real tck here to avoid
-- missing the update phase so maybe not...
--
clkbuf : BUFG
port map (
-- I => drck,
I => tck,
O => jtag_clk
);
-- dmi_req synchronization
dmi_req_sync : process(sys_clk)
begin
-- sys_reset is synchronous
if rising_edge(sys_clk) then
if (sys_reset = '1') then
jtag_req_0 <= '0';
jtag_req_1 <= '0';
else
jtag_req_0 <= jtag_req;
jtag_req_1 <= jtag_req_0;
end if;
end if;
end process;
dmi_req <= jtag_req_1;
-- dmi_ack synchronization
dmi_ack_sync: process(jtag_clk, jtag_reset)
begin
-- jtag_reset is async (see comments)
if jtag_reset = '1' then
dmi_ack_0 <= '0';
dmi_ack_1 <= '0';
elsif rising_edge(jtag_clk) then
dmi_ack_0 <= dmi_ack;
dmi_ack_1 <= dmi_ack_0;
end if;
end process;
-- jtag_bsy indicates whether we can start a new request, we can when
-- we aren't already processing one (jtag_req) and the synchronized ack
-- of the previous one is 0.
--
jtag_bsy <= jtag_req or dmi_ack_1;
-- decode request type in shift register
with shiftr(1 downto 0) select op_valid <=
'1' when DMI_REQ_RD,
'1' when DMI_REQ_WR,
'0' when others;
-- encode response op
rsp_op <= DMI_RSP_BSY when jtag_bsy = '1' else DMI_RSP_OK;
-- Some DMI out signals are directly driven from the request register
dmi_addr <= request(ABITS + DBITS + 1 downto DBITS + 2);
dmi_dout <= request(DBITS + 1 downto 2);
dmi_wr <= '1' when request(1 downto 0) = DMI_REQ_WR else '0';
-- TDO is wired to shift register bit 0
tdo <= shiftr(0);
-- Main state machine. Handles shift registers, request latch and
-- jtag_req latch. Could be split into 3 processes but it's probably
-- not worthwhile.
--
shifter: process(jtag_clk, jtag_reset)
begin
if jtag_reset = '1' then
shiftr <= (others => '0');
request <= (others => '0');
jtag_req <= '0';
elsif rising_edge(jtag_clk) then
-- Handle jtag "commands" when sel is 1
if sel = '1' then
-- Shift state, rotate the register
if shift = '1' then
shiftr <= tdi & shiftr(ABITS + DBITS + 1 downto 1);
end if;
-- Update state (trigger)
--
-- Latch the request if we aren't already processing one and
-- it has a valid command opcode.
--
if update = '1' and op_valid = '1' then
if jtag_bsy = '0' then
request <= shiftr;
jtag_req <= '1';
end if;
-- Set the shift register "op" to "busy". This will prevent
-- us from re-starting the command on the next update if
-- the command completes before that.
shiftr(1 downto 0) <= DMI_RSP_BSY;
end if;
-- Request completion.
--
-- Capture the response data for reads and clear request flag.
--
-- Note: We clear req (and thus dmi_req) here which relies on tck
-- ticking and sel set. This means we are stuck with dmi_req up if
-- the jtag interface stops. Slaves must be resilient to this.
--
if jtag_req = '1' and dmi_ack_1 = '1' then
jtag_req <= '0';
if request(1 downto 0) = DMI_REQ_RD then
request(DBITS + 1 downto 2) <= dmi_din;
end if;
end if;
-- Capture state, grab latch content with updated status
if capture = '1' then
shiftr <= request(ABITS + DBITS + 1 downto 2) & rsp_op;
end if;
end if;
end if;
end process;
end architecture behaviour;