1
0
mirror of https://github.com/antonblanchard/microwatt.git synced 2026-01-11 23:43:15 +00:00
antonblanchard.microwatt/spi_rxtx.vhdl
Benjamin Herrenschmidt cc4dcb3597 spi: Add SPI Flash controller
This adds an SPI flash controller which supports direct
memory-mapped access to the flash along with a manual
mode to send commands.

The direct mode can be set via generic to default to single
wire or quad mode. The controller supports normal, dual and quad
accesses with configurable commands, clock divider, dummy clocks
etc...

The SPI clock can be an even divider of sys_clk starting at 2
(so max 50Mhz with our typical Arty designs).

A flash offset is carried via generics to syscon to tell SW about
which portion of the flash is reserved for the FPGA bitfile. There
is currently no plumbing to make the CPU reset past that address (TBD).

Note: Operating at 50Mhz has proven unreliable without adding some
delay to the sampling of the input data. I'm working in improving
this, in the meantime, I'm leaving the default set at 25 Mhz.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2020-06-13 00:01:48 +10:00

387 lines
12 KiB
VHDL

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.wishbone_types.all;
entity spi_rxtx is
generic (
DATA_LINES : positive := 1; -- Number of data lines
-- 1=MISO/MOSI, otherwise 2 or 4
INPUT_DELAY : natural range 0 to 1 := 1 -- Delay latching of SPI input:
-- 0=no delay, 1=clk/2
);
port (
clk : in std_ulogic;
rst : in std_ulogic;
--
-- Clock divider
-- SCK = CLK/((CLK_DIV+1)*2) : 0=CLK/2, 1=CLK/4, 2=CLK/6....
--
-- This need to be changed before a command.
-- XX TODO add handshake
clk_div_i : in natural range 0 to 255;
--
-- Command port (includes write data)
--
-- Valid & ready: command sampled when valid=1 and ready=1
cmd_valid_i : in std_ulogic;
cmd_ready_o : out std_ulogic;
-- Command modes:
-- 000 : Single bit read+write
-- 010 : Single bit read
-- 011 : Single bit write
-- 100 : Dual read
-- 101 : Dual write
-- 110 : Quad read
-- 111 : Quad write
cmd_mode_i : in std_ulogic_vector(2 downto 0);
-- # clocks-1 in a command (#bits-1)
cmd_clks_i : in std_ulogic_vector(2 downto 0);
-- Write data (sampled with command)
cmd_txd_i : in std_ulogic_vector(7 downto 0);
--
-- Read data port. Data valid when d_ack=1, no ready
-- signal, receiver must be ready
--
d_rxd_o : out std_ulogic_vector(7 downto 0);
d_ack_o : out std_ulogic := '0';
-- Set when all commands are done. Needed for callers to know when
-- to release CS#
bus_idle_o : out std_ulogic;
--
-- SPI port. These might need to go into special IOBUFs or STARTUPE2 on
-- Xilinx.
--
-- Data lines are organized as follow:
--
-- DATA_LINES = 1
--
-- sdat_o(0) is MOSI (master output slave input)
-- sdat_i(0) is MISO (master input slave output)
--
-- DATA_LINES > 1
--
-- sdat_o(0..n) are DQ(0..n)
-- sdat_i(0..n) are DQ(0..n)
--
-- as such, beware that:
--
-- sdat_o(0) is MOSI (master output slave input)
-- sdat_i(1) is MISO (master input slave output)
--
-- In order to leave dealing with the details of how to wire the tristate
-- and bidirectional pins to the system specific toplevel, we separate
-- the input and output signals, and provide a "sdat_oe" signal which
-- is the "output enable" of each line.
--
sck : out std_ulogic;
sdat_o : out std_ulogic_vector(DATA_LINES-1 downto 0);
sdat_oe : out std_ulogic_vector(DATA_LINES-1 downto 0);
sdat_i : in std_ulogic_vector(DATA_LINES-1 downto 0)
);
end entity spi_rxtx;
architecture rtl of spi_rxtx is
-- Internal clock signal. Output is gated by sck_en_int
signal sck_0 : std_ulogic;
signal sck_1 : std_ulogic;
-- Clock divider latch
signal clk_div : natural range 0 to 255;
-- 1 clk pulses indicating when to send and when to latch
--
-- Typically for CPOL=CPHA
-- sck_send is sck falling edge
-- sck_recv is sck rising edge
--
-- Those pulses are generated "ahead" of the corresponding
-- edge so then are "seen" at the rising sysclk edge matching
-- the corresponding sck edgeg.
signal sck_send : std_ulogic;
signal sck_recv : std_ulogic;
-- Command mode latch
signal cmd_mode : std_ulogic_vector(2 downto 0);
-- Output shift register (use fifo ?)
signal oreg : std_ulogic_vector(7 downto 0);
-- Input latch
signal dat_i_l : std_ulogic_vector(DATA_LINES-1 downto 0);
-- Data ack latch
signal dat_ack_l : std_ulogic;
-- Delayed recv signal for the read machine
signal sck_recv_d : std_ulogic := '0';
-- Input shift register (use fifo ?)
signal ireg : std_ulogic_vector(7 downto 0) := (others => '0');
-- Bit counter
signal bit_count : std_ulogic_vector(2 downto 0);
-- Next/start/stop command signals. Set when counter goes negative
signal next_cmd : std_ulogic;
signal start_cmd : std_ulogic;
signal end_cmd : std_ulogic;
function data_single(mode : std_ulogic_vector(2 downto 0)) return boolean is
begin
return mode(2) = '0';
end;
function data_dual(mode : std_ulogic_vector(2 downto 0)) return boolean is
begin
return mode(2 downto 1) = "10";
end;
function data_quad(mode : std_ulogic_vector(2 downto 0)) return boolean is
begin
return mode(2 downto 1) = "11";
end;
function data_write(mode : std_ulogic_vector(2 downto 0)) return boolean is
begin
return mode(0) = '1';
end;
type state_t is (STANDBY, DATA);
signal state : state_t := STANDBY;
begin
-- We don't support multiple data lines at this point
assert DATA_LINES = 1 or DATA_LINES = 2 or DATA_LINES = 4
report "Unsupported DATA_LINES configuration !" severity failure;
-- Clock generation
--
-- XX HARD WIRE CPOL=1 CPHA=1 for now
sck_gen: process(clk)
variable counter : integer range 0 to 255;
begin
if rising_edge(clk) then
if rst = '1' then
sck_0 <= '1';
sck_1 <= '1';
sck_send <= '0';
sck_recv <= '0';
clk_div <= 0;
elsif counter = clk_div then
counter := 0;
-- Latch new divider
clk_div <= clk_div_i;
-- Internal version of the clock
sck_0 <= not sck_0;
-- Generate send/receive pulses to run out state machine
sck_recv <= not sck_0;
sck_send <= sck_0;
else
counter := counter + 1;
sck_recv <= '0';
sck_send <= '0';
end if;
-- Delayed version of the clock to line up with
-- the up/down signals
--
-- XXX Figure out a better way
if (state = DATA and end_cmd = '0') or (next_cmd = '1' and cmd_valid_i = '1') then
sck_1 <= sck_0;
else
sck_1 <= '1';
end if;
end if;
end process;
-- SPI clock
sck <= sck_1;
-- Ready to start the next command. This is set on the clock down
-- after the counter goes negative.
-- Note: in addition to latching a new command, this will cause
-- the counter to be reloaded.
next_cmd <= '1' when sck_send = '1' and bit_count = "111" else '0';
-- We start a command when we have a valid request at that time.
start_cmd <= next_cmd and cmd_valid_i;
-- We end commands if we get start_cmd and there's nothing to
-- start. This sends up to standby holding CLK high
end_cmd <= next_cmd and not cmd_valid_i;
-- Generate cmd_ready. It will go up and down with sck, we could
-- gate it with cmd_valid to make it look cleaner but that would
-- add yet another combinational loop on the wishbone that I'm
-- to avoid.
cmd_ready_o <= next_cmd;
-- Generate bus_idle_o
bus_idle_o <= '1' when state = STANDBY else '0';
-- Main state machine. Also generates cmd and data ACKs
machine: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
state <= STANDBY;
cmd_mode <= "000";
else
-- First clk down of a new cycle. Latch a request if any
-- or get out.
if start_cmd = '1' then
state <= DATA;
cmd_mode <= cmd_mode_i;
elsif end_cmd = '1' then
state <= STANDBY;
end if;
end if;
end if;
end process;
-- Run the bit counter in DATA state. It will update on rising
-- SCK edges. It starts at d_clks on command latch
count_bit: process(clk)
begin
if rising_edge(clk) then
if start_cmd = '1' then
bit_count <= cmd_clks_i;
elsif state /= DATA then
bit_count <= (others => '1');
elsif sck_recv = '1' then
bit_count <= std_ulogic_vector(unsigned(bit_count) - 1);
end if;
end if;
end process;
-- Shift output data
shift_out: process(clk)
begin
if rising_edge(clk) then
-- Starting a command
if start_cmd = '1' then
oreg <= cmd_txd_i(7 downto 0);
elsif sck_send = '1' then
-- Get shift amount
if data_single(cmd_mode) then
oreg <= oreg(6 downto 0) & '0';
elsif data_dual(cmd_mode) then
oreg <= oreg(5 downto 0) & "00";
else
oreg <= oreg(3 downto 0) & "0000";
end if;
end if;
end if;
end process;
-- Data out
sdat_o(0) <= oreg(7);
dl2: if DATA_LINES > 1 generate
sdat_o(1) <= oreg(6);
end generate;
dl4: if DATA_LINES > 2 generate
sdat_o(2) <= oreg(5);
sdat_o(3) <= oreg(4);
end generate;
-- Data lines direction
dlines: process(all)
begin
for i in DATA_LINES-1 downto 0 loop
sdat_oe(i) <= '0';
if state = DATA then
-- In single mode, we always enable MOSI, otherwise
-- we control the output enable based on the direction
-- of transfer.
--
if i = 0 and (data_single(cmd_mode) or data_write(cmd_mode)) then
sdat_oe(i) <= '1';
end if;
if i = 1 and data_dual(cmd_mode) and data_write(cmd_mode) then
sdat_oe(i) <= '1';
end if;
if i > 0 and data_quad(cmd_mode) and data_write(cmd_mode) then
sdat_oe(i) <= '1';
end if;
end if;
end loop;
end process;
-- Latch input data no delay
input_delay_0: if INPUT_DELAY = 0 generate
process(clk)
begin
if rising_edge(clk) then
dat_i_l <= sdat_i;
end if;
end process;
end generate;
-- Latch input data half clock delay
input_delay_1: if INPUT_DELAY = 1 generate
process(clk)
begin
if falling_edge(clk) then
dat_i_l <= sdat_i;
end if;
end process;
end generate;
-- Shift input data
shift_in: process(clk)
begin
if rising_edge(clk) then
-- Delay the receive signal to match the input latch
if state = DATA then
sck_recv_d <= sck_recv;
else
sck_recv_d <= '0';
end if;
-- Generate read data acks
if bit_count = "000" and sck_recv = '1' then
dat_ack_l <= not cmd_mode(0);
else
dat_ack_l <= '0';
end if;
-- And delay them as well
d_ack_o <= dat_ack_l;
-- Shift register on delayed data & receive signal
if sck_recv_d = '1' then
if DATA_LINES = 1 then
ireg <= ireg(6 downto 0) & dat_i_l(0);
else
if data_dual(cmd_mode) then
ireg <= ireg(5 downto 0) & dat_i_l(1) & dat_i_l(0);
elsif data_quad(cmd_mode) then
ireg <= ireg(3 downto 0) & dat_i_l(3) & dat_i_l(2) & dat_i_l(1) & dat_i_l(0);
else
assert(data_single(cmd_mode));
ireg <= ireg(6 downto 0) & dat_i_l(1);
end if;
end if;
end if;
end if;
end process;
-- Data recieve register
d_rxd_o <= ireg;
end architecture;