1
0
mirror of https://github.com/Gehstock/Mist_FPGA.git synced 2026-05-04 07:19:03 +00:00
Files
Gehstock.Mist_FPGA/common/CPU/68000/FX68k/fx68kAlu.sv
2019-07-22 23:42:05 +02:00

480 lines
11 KiB
Systemverilog

//
// FX 68K
//
// M68K cycle accurate, fully synchronous
// Copyright (c) 2018 by Jorge Cwik
//
// ALU
//
`timescale 1 ns / 1 ns
localparam MASK_NBITS = 5;
localparam
OP_AND = 1,
OP_SUB = 2, OP_SUBX = 3, OP_ADD = 4,
OP_EXT = 5, OP_SBCD = 6, OP_SUB0 = 7,
OP_OR = 8, OP_EOR = 9,
OP_SUBC = 10, OP_ADDC = 11, OP_ADDX = 12,
OP_ASL = 13,
OP_ASR = 14,
OP_LSL = 15,
OP_LSR = 16,
OP_ROL = 17,
OP_ROR = 18,
OP_ROXL = 19,
OP_ROXR = 20,
OP_SLAA = 21,
OP_ABCD = 22;
module fx68kAlu ( input clk, pwrUp, enT1, enT3, enT4,
input [15:0] ird,
input [2:0] aluColumn,
input [1:0] aluDataCtrl,
input aluAddrCtrl, alueClkEn, ftu2Ccr, init, finish, aluIsByte,
input [15:0] ftu,
input [15:0] alub,
input [15:0] iDataBus, input [15:0] iAddrBus,
output ze,
output reg [15:0] alue,
output reg [7:0] ccr,
output [15:0] aluOut);
`define ALU_ROW_01 16'h0002
`define ALU_ROW_02 16'h0004
`define ALU_ROW_03 16'h0008
`define ALU_ROW_04 16'h0010
`define ALU_ROW_05 16'h0020
`define ALU_ROW_06 16'h0040
`define ALU_ROW_07 16'h0080
`define ALU_ROW_08 16'h0100
`define ALU_ROW_09 16'h0200
`define ALU_ROW_10 16'h0400
`define ALU_ROW_11 16'h0800
`define ALU_ROW_12 16'h1000
`define ALU_ROW_13 16'h2000
`define ALU_ROW_14 16'h4000
`define ALU_ROW_15 16'h8000
// Bit positions for flags in CCR
localparam CF = 0, VF = 1, ZF = 2, NF = 3, XF = 4;
reg [15:0] aluLatch;
reg [4:0] pswCcr;
reg [4:0] ccrCore;
logic [15:0] result;
logic [4:0] ccrTemp;
reg coreH; // half carry latch
logic [15:0] subResult;
logic subHcarry;
logic subCout, subOv;
assign aluOut = aluLatch;
assign ze = ~ccrCore[ ZF]; // Check polarity !!!
//
// Control
// Signals derived from IRD *must* be registered on either T3 or T4
// Signals derived from nano rom can be registered on T4.
reg [15:0] row;
reg isArX; // Don't set Z
reg noCcrEn;
reg isByte;
reg [4:0] ccrMask;
reg [4:0] oper;
logic [15:0] aOperand, dOperand;
wire isCorf = ( aluDataCtrl == 2'b10);
wire [15:0] cRow;
wire cIsArX;
wire cNoCcrEn;
rowDecoder rowDecoder(
.ird ( ird),
.row ( cRow),
.noCcrEn ( cNoCcrEn),
.isArX ( cIsArX)
);
// Get Operation & CCR Mask from row/col
// Registering them on T4 increase performance. But slowest part seems to be corf !
wire [4:0] cMask;
wire [4:0] aluOp;
aluGetOp aluGetOp(
.row ( row),
.col ( aluColumn),
.isCorf ( isCorf),
.aluOp ( aluOp)
);
ccrTable ccrTable(
.col ( aluColumn),
.row ( row),
.finish ( finish),
.ccrMask ( cMask)
);
// Inefficient, uCode could help !
wire shftIsMul = row[7];
wire shftIsDiv = row[1];
wire [31:0] shftResult;
reg [7:0] bcdLatch;
reg bcdCarry, bcdOverf;
reg isLong;
reg rIrd8;
logic isShift;
logic shftCin, shftRight, addCin;
// Register some decoded signals
always_ff @( posedge clk) begin
if( enT3) begin
row <= cRow;
isArX <= cIsArX;
noCcrEn <= cNoCcrEn;
rIrd8 <= ird[8];
isByte <= aluIsByte;
end
if( enT4) begin
// Decode if long shift
// MUL and DIV are long (but special !)
isLong <= (ird[7] & ~ird[6]) | shftIsMul | shftIsDiv;
ccrMask <= cMask;
oper <= aluOp;
end
end
always_comb begin
// Dest (addr) operand source
// If aluCsr (depends on column/row) addrbus is shifted !!
aOperand = (aluAddrCtrl ? alub : iAddrBus);
// Second (data,source) operand mux
case( aluDataCtrl)
2'b00: dOperand = iDataBus;
2'b01: dOperand = 'h0000;
2'b11: dOperand = 'hffff;
// 2'b10: dOperand = bcdResult;
2'b10: dOperand = 'X;
endcase
end
// Execution
// shift operand MSB. Input in ASR/ROL. Carry in right.
// Can't be registered because uses bus operands that aren't available early !
wire shftMsb = isLong ? alue[15] : (isByte ? aOperand[7] : aOperand[15]);
aluShifter shifter(
.data ( { alue, aOperand}),
.swapWords ( shftIsMul | shftIsDiv),
.cin ( shftCin),
.dir ( shftRight),
.isByte ( isByte),
.isLong ( isLong),
.result ( shftResult)
);
wire [7:0] bcdResult;
wire bcdC, bcdV;
aluCorf aluCorf(
.binResult ( aluLatch[7:0]),
.hCarry ( coreH),
.bAdd ( (oper != OP_SBCD) ),
.cin ( pswCcr[ XF]),
.bcdResult ( bcdResult),
.dC ( bcdC),
.ov ( bcdV));
// BCD adjust is among the slowest processing on ALU !
// Precompute and register BCD result on T1
// We don't need to wait for execution buses because corf is always added to ALU previous result
always_ff @( posedge clk)
if( enT1) begin
bcdLatch <= bcdResult;
bcdCarry <= bcdC;
bcdOverf <= bcdV;
end
// Adder carry in selector
always_comb
begin
case( oper)
OP_ADD, OP_SUB: addCin = 1'b0;
OP_SUB0: addCin = 1'b1; // NOT = 0 - op - 1
OP_ADDC,OP_SUBC: addCin = ccrCore[ CF];
OP_ADDX,OP_SUBX: addCin = pswCcr[ XF];
default: addCin = 1'bX;
endcase
end
// Shifter carry in and direction selector
always_comb begin
case( oper)
OP_LSL, OP_ASL, OP_ROL, OP_ROXL, OP_SLAA: shftRight = 1'b0;
OP_LSR, OP_ASR, OP_ROR, OP_ROXR: shftRight = 1'b1;
default: shftRight = 1'bX;
endcase
case( oper)
OP_LSR,
OP_ASL,
OP_LSL: shftCin = 1'b0;
OP_ROL,
OP_ASR: shftCin = shftMsb;
OP_ROR: shftCin = aOperand[0];
OP_ROXL,
OP_ROXR:
if( shftIsMul)
shftCin = rIrd8 ? pswCcr[NF] ^ pswCcr[VF] : pswCcr[ CF];
else
shftCin = pswCcr[ XF];
OP_SLAA: shftCin = aluColumn[1]; // col4 -> 0, col 6-> 1
default: shftCin = 'X;
endcase
end
// ALU operation selector
always_comb begin
// sub is DATA - ADDR
mySubber( aOperand, dOperand, addCin,
(oper == OP_ADD) | (oper == OP_ADDC) | (oper == OP_ADDX),
isByte, subResult, subCout, subOv);
isShift = 1'b0;
case( oper)
OP_AND: result = aOperand & dOperand;
OP_OR: result = aOperand | dOperand;
OP_EOR: result = aOperand ^ dOperand;
OP_EXT: result = { {8{aOperand[7]}}, aOperand[7:0]};
OP_SLAA,
OP_ASL, OP_ASR,
OP_LSL, OP_LSR,
OP_ROL, OP_ROR,
OP_ROXL, OP_ROXR:
begin
result = shftResult[15:0];
isShift = 1'b1;
end
OP_ADD,
OP_ADDC,
OP_ADDX,
OP_SUB,
OP_SUBC,
OP_SUB0,
OP_SUBX: result = subResult;
OP_ABCD,
OP_SBCD: result = { 8'hXX, bcdLatch};
default: result = 'X;
endcase
end
task mySubber;
input [15:0] inpa, inpb;
input cin, bAdd, isByte;
output reg [15:0] result;
output cout, ov;
// Not very efficient!
logic [16:0] rtemp;
logic rm,sm,dm,tsm;
begin
if( isByte)
begin
rtemp = bAdd ? { 1'b0, inpb[7:0]} + { 1'b0, inpa[7:0]} + cin:
{ 1'b0, inpb[7:0] } - { 1'b0, inpa[7:0]} - cin;
result = { {8{ rtemp[7]}}, rtemp[7:0]};
cout = rtemp[8];
end
else begin
rtemp = bAdd ? { 1'b0, inpb } + { 1'b0, inpa} + cin:
{ 1'b0, inpb } - { 1'b0, inpa} - cin;
result = rtemp[ 15:0];
cout = rtemp[16];
end
rm = isByte ? rtemp[7] : rtemp[15];
dm = isByte ? inpb[ 7] : inpb[ 15];
tsm = isByte ? inpa[ 7] : inpa[ 15];
sm = bAdd ? tsm : ~tsm;
ov = (sm & dm & ~rm) | (~sm & ~dm & rm);
// Store half carry for bcd correction
subHcarry = inpa[4] ^ inpb[4] ^ rtemp[4];
end
endtask
// CCR flags process
always_comb begin
ccrTemp[XF] = pswCcr[XF]; ccrTemp[CF] = 0; ccrTemp[VF] = 0;
// Not on all operators !!!
ccrTemp[ ZF] = isByte ? ~(| result[7:0]) : ~(| result);
ccrTemp[ NF] = isByte ? result[7] : result[15];
unique case( oper)
OP_EXT:
// Division overflow.
if( aluColumn == 5) begin
ccrTemp[VF] = 1'b1; ccrTemp[NF] = 1'b1;
end
OP_SUB0, // used by NOT
OP_OR,
OP_EOR:
begin
ccrTemp[CF] = 0; ccrTemp[VF] = 0;
end
OP_AND:
begin
// ROXL/ROXR indeed copy X to C in column 1 (OP_AND), executed before entering the loop.
// Needed when rotate count is zero, the ucode with the ROX operator never reached.
// C must be set to the value of X, X remains unaffected.
if( (aluColumn == 1) & (row[11] | row[8]))
ccrTemp[CF] = pswCcr[XF];
else
ccrTemp[CF] = 0;
ccrTemp[VF] = 0;
end
// Assumes col 3 of DIV use C and not X !
// V will be set in other cols (2/3) of DIV
OP_SLAA: ccrTemp[ CF] = aOperand[15];
OP_LSL,OP_ROXL:
begin
ccrTemp[ CF] = shftMsb;
ccrTemp[ XF] = shftMsb;
ccrTemp[ VF] = 1'b0;
end
OP_LSR,OP_ROXR:
begin
// 0 Needed for mul, or carry gets in high word
ccrTemp[ CF] = shftIsMul ? 1'b0 : aOperand[0];
ccrTemp[ XF] = aOperand[0];
// Not relevant for MUL, we clear it at mulm6 (1f) anyway.
// Not that MUL can never overlow!
ccrTemp[ VF] = 0;
// Z is checking here ALU (low result is actually in ALUE).
// But it is correct, see comment above.
end
OP_ASL:
begin
ccrTemp[ XF] = shftMsb; ccrTemp[ CF] = shftMsb;
// V set if msb changed on any shift.
// Otherwise clear previously on OP_AND (col 1i).
ccrTemp[ VF] = pswCcr[VF] | (shftMsb ^
(isLong ? alue[15-1] : (isByte ? aOperand[7-1] : aOperand[15-1])) );
end
OP_ASR:
begin
ccrTemp[ XF] = aOperand[0]; ccrTemp[ CF] = aOperand[0];
ccrTemp[ VF] = 0;
end
// X not changed on ROL/ROR !
OP_ROL: ccrTemp[ CF] = shftMsb;
OP_ROR: ccrTemp[ CF] = aOperand[0];
OP_ADD,
OP_ADDC,
OP_ADDX,
OP_SUB,
OP_SUBC,
OP_SUBX:
begin
ccrTemp[ CF] = subCout;
ccrTemp[ XF] = subCout;
ccrTemp[ VF] = subOv;
end
OP_ABCD,
OP_SBCD:
begin
ccrTemp[ XF] = bcdCarry;
ccrTemp[ CF] = bcdCarry;
ccrTemp[ VF] = bcdOverf;
end
endcase
end
// Core and psw latched at the same cycle
// CCR filter
// CCR out mux for Z & C flags
// Z flag for 32-bit result
// Not described, but should be used also for instructions
// that clear but not set Z (ADDX/SUBX/ABCD, etc)!
logic [4:0] ccrMasked;
always_comb begin
ccrMasked = (ccrTemp & ccrMask) | (pswCcr & ~ccrMask);
if( finish | isCorf | isArX)
ccrMasked[ ZF] = ccrTemp[ ZF] & pswCcr[ ZF];
end
always_ff @( posedge clk) begin
if( enT3) begin
// Update latches from ALU operators
if( (| aluColumn)) begin
aluLatch <= result;
coreH <= subHcarry;
// Update CCR core
if( (| aluColumn))
ccrCore <= ccrTemp; // Most bits not really used
end
if( alueClkEn)
alue <= iDataBus;
else if( isShift & (| aluColumn))
alue <= shftResult[31:16];
end
// CCR
// Originally on T3-T4 edge pulse !!
// Might be possible to update on T4 (but not after T0) from partial result registered on T3, it will increase performance!
if( pwrUp)
pswCcr <= '0;
else if( enT3 & ftu2Ccr)
pswCcr <= ftu[4:0];
else if( enT3 & ~noCcrEn & (finish | init))
pswCcr <= ccrMasked;
end
assign ccr = { 3'b0, pswCcr};
endmodule