1
0
mirror of https://github.com/antonblanchard/microwatt.git synced 2026-03-29 10:55:44 +00:00
Files
antonblanchard.microwatt/microwatt.core
Paul Mackerras 0fb207be60 fetch1: Implement a simple branch target cache
This implements a cache in fetch1, where each entry stores the address
of a simple branch instruction (b or bc) and the target of the branch.
When fetching sequentially, if the address being fetched matches the
cache entry, then fetching will be redirected to the branch target.
The cache has 1024 entries and is direct-mapped, i.e. indexed by bits
11..2 of the NIA.

The bus from execute1 now carries information about taken and
not-taken simple branches, which fetch1 uses to update the cache.
The cache entry is updated for both taken and not-taken branches, with
the valid bit being set if the branch was taken and cleared if the
branch was not taken.

If fetching is redirected to the branch target then that goes down the
pipe as a predicted-taken branch, and decode1 does not do any static
branch prediction.  If fetching is not redirected, then the next
instruction goes down the pipe as normal and decode1 does its static
branch prediction.

In order to make timing, the lookup of the cache is pipelined, so on
each cycle the cache entry for the current NIA + 8 is read.  This
means that after a redirect (from decode1 or execute1), only the third
and subsequent sequentially-fetched instructions will be able to be
predicted.

This improves the coremark value on the Arty A7-100 from about 180 to
about 190 (more than 5%).

The BTC is optional.  Builds for the Artix 7 35-T part have it off by
default because the extra ~1420 LUTs it takes mean that the design
doesn't fit on the Arty A7-35 board.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2021-01-18 22:32:54 +11:00

457 lines
11 KiB
Core

CAPI=2:
name : ::microwatt:0
filesets:
core:
files:
- decode_types.vhdl
- wishbone_types.vhdl
- common.vhdl
- fetch1.vhdl
- decode1.vhdl
- helpers.vhdl
- decode2.vhdl
- register_file.vhdl
- cr_file.vhdl
- crhelpers.vhdl
- ppc_fx_insns.vhdl
- sim_console.vhdl
- logical.vhdl
- countzero.vhdl
- gpr_hazard.vhdl
- cr_hazard.vhdl
- control.vhdl
- execute1.vhdl
- fpu.vhdl
- loadstore1.vhdl
- mmu.vhdl
- dcache.vhdl
- divider.vhdl
- rotator.vhdl
- writeback.vhdl
- insn_helpers.vhdl
- core.vhdl
- icache.vhdl
- plru.vhdl
- cache_ram.vhdl
- core_debug.vhdl
- utils.vhdl
file_type : vhdlSource-2008
soc:
files:
- wishbone_arbiter.vhdl
- wishbone_debug_master.vhdl
- wishbone_bram_wrapper.vhdl
- soc.vhdl
- xics.vhdl
- syscon.vhdl
- sync_fifo.vhdl
- spi_rxtx.vhdl
- spi_flash_ctrl.vhdl
file_type : vhdlSource-2008
fpga:
files:
- fpga/main_bram.vhdl
- fpga/soc_reset.vhdl
- fpga/pp_fifo.vhd
- fpga/pp_soc_uart.vhd
- fpga/pp_utilities.vhd
- fpga/firmware.hex : {copyto : firmware.hex, file_type : user}
file_type : vhdlSource-2008
xilinx_specific:
files:
- xilinx-mult.vhdl : {file_type : vhdlSource-2008}
- fpga/fpga-random.vhdl : {file_type : vhdlSource-2008}
- fpga/fpga-random.xdc : {file_type : xdc}
debug_xilinx:
files:
- dmi_dtm_xilinx.vhdl : {file_type : vhdlSource-2008}
debug_dummy:
files:
- dmi_dtm_dummy.vhdl : {file_type : vhdlSource-2008}
nexys_a7:
files:
- fpga/nexys_a7.xdc : {file_type : xdc}
- fpga/clk_gen_plle2.vhd : {file_type : vhdlSource-2008}
- fpga/top-generic.vhdl : {file_type : vhdlSource-2008}
nexys_video:
files:
- fpga/nexys-video.xdc : {file_type : xdc}
- fpga/clk_gen_plle2.vhd : {file_type : vhdlSource-2008}
- fpga/top-nexys-video.vhdl : {file_type : vhdlSource-2008}
acorn_cle_215:
files:
- fpga/acorn-cle-215.xdc : {file_type : xdc}
- fpga/clk_gen_plle2.vhd : {file_type : vhdlSource-2008}
- fpga/top-acorn-cle-215.vhdl : {file_type : vhdlSource-2008}
genesys2:
files:
- fpga/genesys2.xdc : {file_type : xdc}
- fpga/clk_gen_plle2.vhd : {file_type : vhdlSource-2008}
- fpga/top-genesys2.vhdl : {file_type : vhdlSource-2008}
arty_a7:
files:
- fpga/arty_a7.xdc : {file_type : xdc}
- fpga/clk_gen_plle2.vhd : {file_type : vhdlSource-2008}
- fpga/top-arty.vhdl : {file_type : vhdlSource-2008}
cmod_a7-35:
files:
- fpga/cmod_a7-35.xdc : {file_type : xdc}
- fpga/clk_gen_mcmm.vhd : {file_type : vhdlSource-2008}
- fpga/top-generic.vhdl : {file_type : vhdlSource-2008}
litedram:
depend : [":microwatt:litedram"]
liteeth:
depend : [":microwatt:liteeth"]
uart16550:
depend : ["::uart16550"]
targets:
nexys_a7:
default_tool: vivado
filesets: [core, nexys_a7, soc, fpga, debug_xilinx, uart16550, xilinx_specific]
parameters :
- memory_size
- ram_init_file
- clk_input
- clk_frequency
- disable_flatten_core
- log_length=2048
- uart_is_16550
- has_fpu
- has_btc
tools:
vivado: {part : xc7a100tcsg324-1}
toplevel : toplevel
acorn-cle-215-nodram:
default_tool: vivado
filesets: [core, acorn_cle_215, soc, fpga, debug_xilinx, uart16550, xilinx_specific]
parameters :
- memory_size
- ram_init_file
- clk_input
- clk_frequency
- disable_flatten_core
- spi_flash_offset=10485760
- log_length=2048
- uart_is_16550
tools:
vivado: {part : xc7a200tsbg484-2}
toplevel : toplevel
genesys2-nodram:
default_tool: vivado
filesets: [core, genesys2, soc, fpga, debug_xilinx, uart16550, xilinx_specific]
parameters :
- memory_size
- ram_init_file
- clk_frequency
- use_litedram=false
- no_bram=false
- disable_flatten_core
- spi_flash_offset=10485760
- log_length=2048
- uart_is_16550=false
tools:
vivado: {part : xc7k325tffg900-2}
toplevel : toplevel
acorn-cle-215:
default_tool: vivado
filesets: [core, acorn_cle_215, soc, fpga, debug_xilinx, litedram, uart16550, xilinx_specific]
parameters :
- memory_size
- ram_init_file
- use_litedram=true
- disable_flatten_core
- no_bram
- spi_flash_offset=10485760
- log_length=2048
- uart_is_16550
generate: [litedram_acorn_cle_215]
tools:
vivado: {part : xc7a200tsbg484-2}
toplevel : toplevel
genesys2:
default_tool: vivado
filesets: [core, genesys2, soc, fpga, debug_xilinx, litedram, uart16550, xilinx_specific]
parameters :
- memory_size
- ram_init_file
- use_litedram=true
- disable_flatten_core
- no_bram
- spi_flash_offset=10485760
- log_length=2048
- uart_is_16550=false
generate: [litedram_genesys2]
tools:
vivado: {part : xc7k325tffg900-2}
toplevel : toplevel
nexys_video-nodram:
default_tool: vivado
filesets: [core, nexys_video, soc, fpga, debug_xilinx, uart16550, xilinx_specific]
parameters :
- memory_size
- ram_init_file
- clk_input
- clk_frequency
- disable_flatten_core
- spi_flash_offset=10485760
- log_length=2048
- uart_is_16550
- has_fpu
- has_btc
tools:
vivado: {part : xc7a200tsbg484-1}
toplevel : toplevel
nexys_video:
default_tool: vivado
filesets: [core, nexys_video, soc, fpga, debug_xilinx, litedram, uart16550, xilinx_specific]
parameters:
- memory_size
- ram_init_file
- use_litedram=true
- disable_flatten_core
- no_bram
- spi_flash_offset=10485760
- log_length=2048
- uart_is_16550
- has_fpu
- has_btc
generate: [litedram_nexys_video]
tools:
vivado: {part : xc7a200tsbg484-1}
toplevel : toplevel
arty_a7-35-nodram:
default_tool: vivado
filesets: [core, arty_a7, soc, fpga, debug_xilinx, uart16550, xilinx_specific]
parameters :
- memory_size
- ram_init_file
- clk_input
- clk_frequency
- disable_flatten_core
- spi_flash_offset=3145728
- log_length=512
- uart_is_16550
- has_uart1
- has_fpu=false
- has_btc=false
tools:
vivado: {part : xc7a35ticsg324-1L}
toplevel : toplevel
arty_a7-35:
default_tool: vivado
filesets: [core, arty_a7, soc, fpga, debug_xilinx, litedram, liteeth, uart16550, xilinx_specific]
parameters :
- memory_size
- ram_init_file
- use_litedram=true
- use_liteeth=true
- disable_flatten_core
- no_bram
- spi_flash_offset=3145728
- log_length=512
- uart_is_16550
- has_uart1
- has_fpu=false
- has_btc=false
generate: [litedram_arty, liteeth_arty]
tools:
vivado: {part : xc7a35ticsg324-1L}
toplevel : toplevel
arty_a7-100-nodram:
default_tool: vivado
filesets: [core, arty_a7, soc, fpga, debug_xilinx, uart16550, xilinx_specific]
parameters :
- memory_size
- ram_init_file
- clk_input
- clk_frequency
- disable_flatten_core
- spi_flash_offset=4194304
- log_length=2048
- uart_is_16550
- has_uart1
- has_fpu
- has_btc
tools:
vivado: {part : xc7a100ticsg324-1L}
toplevel : toplevel
arty_a7-100:
default_tool: vivado
filesets: [core, arty_a7, soc, fpga, debug_xilinx, litedram, liteeth, uart16550, xilinx_specific]
parameters:
- memory_size
- ram_init_file
- use_litedram=true
- use_liteeth=true
- disable_flatten_core
- no_bram
- spi_flash_offset=4194304
- log_length=2048
- uart_is_16550
- has_uart1
- has_fpu
- has_btc
generate: [litedram_arty, liteeth_arty]
tools:
vivado: {part : xc7a100ticsg324-1L}
toplevel : toplevel
cmod_a7-35:
default_tool: vivado
filesets: [core, cmod_a7-35, soc, fpga, debug_xilinx, uart16550, xilinx_specific]
parameters :
- memory_size
- ram_init_file
- reset_low=false
- clk_input=12000000
- clk_frequency
- disable_flatten_core
- log_length=512
- uart_is_16550
- has_fpu=false
- has_btc=false
tools:
vivado: {part : xc7a35tcpg236-1}
toplevel : toplevel
synth:
filesets: [core, soc, xilinx_specific]
tools:
vivado: {pnr : none}
toplevel: core
generate:
litedram_arty:
generator: litedram_gen
parameters: {board : arty}
liteeth_arty:
generator: liteeth_gen
parameters: {board : arty}
litedram_nexys_video:
generator: litedram_gen
parameters: {board : nexys-video}
litedram_acorn_cle_215:
generator: litedram_gen
parameters: {board : acorn-cle-215}
litedram_genesys2:
generator: litedram_gen
parameters: {board : genesys2}
parameters:
memory_size:
datatype : int
description : On-chip memory size (bytes). If no_bram is set, this is the size carved out for the DRAM payload
paramtype : generic
default : 16384
ram_init_file:
datatype : file
description : Initial on-chip RAM contents
paramtype : generic
reset_low:
datatype : bool
description : External reset button polarity
paramtype : generic
clk_input:
datatype : int
description : Clock input frequency in HZ (for top-generic based boards)
paramtype : generic
default : 100000000
clk_frequency:
datatype : int
description : Generated system clock frequency in HZ (for top-generic based boards)
paramtype : generic
default : 100000000
has_fpu:
datatype : bool
description : Include a floating-point unit in the core
paramtype : generic
default : true
has_btc:
datatype : bool
description : Include a branch target cache in the core
paramtype : generic
default : true
disable_flatten_core:
datatype : bool
description : Prevent Vivado from flattening the main core components
paramtype : generic
default : false
use_litedram:
datatype : bool
description : Use liteDRAM
paramtype : generic
default : false
use_liteeth:
datatype : bool
description : Use liteEth
paramtype : generic
default : false
uart_is_16550:
datatype : bool
description : Use 16550-compatible UART from OpenCores
paramtype : generic
default : true
has_uart1:
datatype : bool
description : Enable second UART (always 16550-compatible)
paramtype : generic
default : false
no_bram:
datatype : bool
description : No internal block RAM (only DRAM and init code carrying payload)
paramtype : generic
default : false
spi_flash_offset:
datatype : int
description : Offset (in bytes) in the SPI flash of the code payload to run
paramtype : generic
log_length:
datatype : int
description : Length of the core log buffer in entries (32 bytes each)
paramtype : generic