#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "soc.h"
#include "cpu.h"

//we emulate a device with no DCL decoding (4 ram banks max)

struct CPU {
	uint32_t pc			: 24;
	uint32_t A			: 4;
	uint32_t C			: 1;
	uint32_t testSignal	: 1;

	uint32_t stack[3];	//ascending empty stack
	uint8_t src[8];
	uint8_t ramCL		: 3;	//basically a ram bank
	uint32_t stackTop	: 2;
	uint32_t stackUse	: 2;

	struct {
		uint8_t val	:4;
	} r[16];

	uint64_t cy;
	uint64_t instrs;	//not the same as above - jumps count as one instr, but two cy

};

struct CPU mCPU;


extern uint32_t numMipsCycles;
extern bool report;


uint_fast8_t cpuRegRead(uint_fast8_t regIdx)
{
	return mCPU.r[regIdx].val;
}

void cpuRegWrite(uint_fast8_t regIdx, uint_fast8_t val)
{
	mCPU.r[regIdx].val = val;
}


static uint32_t cpuPrvReadU32(uint_fast16_t ofst)
{
	uint32_t val = 0;
	uint_fast8_t i;
	
	for (i = 0; i < 8; i++)
		val = val * 16 + cpuExtRamRead(ofst + 7 - i) % 16;

	return val;
}


#define DCL1_PC			(1 * 256 + 0x00)
#define DCL1_CAUSE		(1 * 256 + 0x10)
#define DCL1_STATUS		(1 * 256 + 0x18)
#define DCL1_TMP32A		(1 * 256 + 0xB0)
#define DCL1_TMP32B		(1 * 256 + 0xA8)
#define DCL1_TMP32C		(1 * 256 + 0xA0)
#define DCL1_BADVA		(1 * 256 + 0x80)
#define DCL1_ENTRYHI	(1 * 256 + 0x90)
#define DCL1_ENTRYLO	(1 * 256 + 0x98)
#define DCL1_INSTR		(1 * 256 + 0x68)

/*
	NOTE: carry useage for subtract is NOT well documented. tests were done on a real 4004. The following is the results.
	Summary: C_in = "borrow", C_out = "not borrow", SBM acts same as SUB


	
	;SUB
	;A - reg, c = c_in -> a = a_out, c = c_out
	;6 - 7, c=0   -> a = f, c = 0
	;6 - 6, c=0   -> a = 0, c = 1
	;6 - 5, c=0   -> a = 1, c = 1
	;6 - 7, c=1   -> a = e, c = 0
	;6 - 6, c=1   -> a = f, c = 0
	;6 - 5, c=1   -> a = 0, c = 1

	;SBM
	;A - mem, c = c_in -> a = a_out, c = c_out
	;6 - 7, c=0   -> a = f, c = 0
	;6 - 6, c=0   -> a = 0, c = 1
	;6 - 5, c=0   -> a = 1, c = 1
	;6 - 7, c=1   -> a = e, c = 0
	;6 - 6, c=1   -> a = f, c = 0
	;6 - 5, c=1   -> a = 0, c = 1

*/

static uint32_t mA, mB, mA2, mB2;
static bool mulSigned, mulInvert;
static uint8_t mR12_at_inv_check;
static uint64_t startCy;


#define BACKTRACE_LENGTH		16384


struct CPU mCpuBacktrace[BACKTRACE_LENGTH];
uint8_t mPageBacktrace[BACKTRACE_LENGTH];
uint32_t mBacktraceWritePtr = 0;


//profiling
uint64_t mNumUsed[8192];

const uint64_t* cpuPrvGetProfilingData(void)
{
	return mNumUsed;
}

void cpuDumpBacktrace(void)
{
	unsigned i, cyAgo = BACKTRACE_LENGTH;

	for (i = 0; i < BACKTRACE_LENGTH; i++, cyAgo--) {
		uint32_t pos = (i + mBacktraceWritePtr) % BACKTRACE_LENGTH;

		fprintf(stderr, "@%5u cy ago: [%d,0x%03x], A=0x%x C=%u src={0x%02x 0x%02x 0x%02x 0x%02x} R(hex) = {%x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x}, stackUse=%u, DCL %u\n", cyAgo,
			mPageBacktrace[pos], (0xfff & (mCpuBacktrace[pos].pc - 1)), mCpuBacktrace[pos].A, mCpuBacktrace[pos].C, mCpuBacktrace[pos].src[0], mCpuBacktrace[pos].src[1], mCpuBacktrace[pos].src[2], mCpuBacktrace[pos].src[3],
			mCpuBacktrace[pos].r[0].val, mCpuBacktrace[pos].r[1].val, mCpuBacktrace[pos].r[2].val, mCpuBacktrace[pos].r[3].val, mCpuBacktrace[pos].r[4].val, mCpuBacktrace[pos].r[5].val, mCpuBacktrace[pos].r[6].val, mCpuBacktrace[pos].r[7].val, 
			mCpuBacktrace[pos].r[8].val, mCpuBacktrace[pos].r[9].val, mCpuBacktrace[pos].r[10].val, mCpuBacktrace[pos].r[11].val, mCpuBacktrace[pos].r[12].val, mCpuBacktrace[pos].r[13].val, mCpuBacktrace[pos].r[14].val, mCpuBacktrace[pos].r[15].val,
			mCpuBacktrace[pos].stackUse, mCpuBacktrace[pos].ramCL);
	}
}

static void cpuPrvInstr(void)
{
	uint32_t fullPc = (socGetRomPage() * 4096) + mCPU.pc;
	uint8_t instr = cpuExtRomRead(mCPU.pc++);

	mCpuBacktrace[mBacktraceWritePtr] = mCPU;
	mPageBacktrace[mBacktraceWritePtr] = socGetRomPage();
	if (++mBacktraceWritePtr == BACKTRACE_LENGTH)
		mBacktraceWritePtr = 0;

	mCPU.cy++;
	mCPU.instrs++;

	if (report) {

		fprintf(stderr, "[%d,0x%03x] = 0x%02x, A=0x%x C=%u src={0x%02x 0x%02x 0x%02x 0x%02x} R(hex) = {%x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x}, stackUse=%u, DCL %u\n",
			socGetRomPage(), (0xfff & fullPc), instr, mCPU.A, mCPU.C, mCPU.src[0], mCPU.src[1], mCPU.src[2], mCPU.src[3],
			mCPU.r[0].val, mCPU.r[1].val, mCPU.r[2].val, mCPU.r[3].val, mCPU.r[4].val, mCPU.r[5].val, mCPU.r[6].val, mCPU.r[7].val, 
			mCPU.r[8].val, mCPU.r[9].val, mCPU.r[10].val, mCPU.r[11].val, mCPU.r[12].val, mCPU.r[13].val, mCPU.r[14].val, mCPU.r[15].val,
			mCPU.stackUse, mCPU.ramCL);
	}

	mNumUsed[fullPc]++;

	switch (fullPc) {
		
	}


	switch (instr) {
		case 0x00: {				//NOP
			break;
		}

		case 0x01 ... 0x0f: {		//hyper
			uint8_t t = cpuExtHyper(instr, mCPU.A, mCPU.C);

			mCPU.A  = t;
			mCPU.C = t >> 4;
			break;
		}

		case 0x10 ... 0x1f: {		//JCN

			uint8_t second = cpuExtRomRead(mCPU.pc++);
			mCPU.cy++;

			bool cond = ((instr & 0x01) && !mCPU.testSignal) || ((instr & 0x02) && mCPU.C) || ((instr & 0x04) && !mCPU.A);

			if (instr & 0x08)
				cond = !cond;

			if (cond)
				mCPU.pc = (mCPU.pc & 0xf00) + second;
			break;
		}

		case 0x20:
		case 0x22:
		case 0x24:
		case 0x26:
		case 0x28:
		case 0x2a:
		case 0x2c:
		case 0x2e: {				//FIM
			
			uint8_t second = cpuExtRomRead(mCPU.pc++);
			mCPU.cy++;

			mCPU.r[(instr & 0x0e) + 0].val = second >> 4;
			mCPU.r[(instr & 0x0e) + 1].val = second & 15;
			break;
		}

		case 0x21:
		case 0x23:
		case 0x25:
		case 0x27:
		case 0x29:
		case 0x2b:
		case 0x2d:
		case 0x2f: {				//SRC
			
			mCPU.src[mCPU.ramCL] = 16 * mCPU.r[(instr & 0x0e) + 0].val + mCPU.r[(instr & 0x0e) + 1].val;
			break;
		}
		
		case 0x30:
		case 0x32:
		case 0x34:
		case 0x36:
		case 0x38:
		case 0x3a:
		case 0x3c:
		case 0x3e: {				//FIN
			
			uint8_t val = cpuExtRomRead((mCPU.pc & 0xf00) + 16 * mCPU.r[0].val + mCPU.r[1].val);
			mCPU.cy++;

			mCPU.r[(instr & 0x0e) + 0].val = val >> 4;
			mCPU.r[(instr & 0x0e) + 1].val = val & 15;
			break;
		}

		case 0x31:
		case 0x33:
		case 0x35:
		case 0x37:
		case 0x39:
		case 0x3b:
		case 0x3d:
		case 0x3f: {				//JIN
			
			mCPU.pc = (mCPU.pc & 0xf00) + 16 * mCPU.r[(instr & 0x0e) + 0].val + mCPU.r[(instr & 0x0e) + 1].val;
			break;
		}
		
		case 0x40 ... 0x4f: {		//JUN

			uint8_t second = cpuExtRomRead(mCPU.pc++);
			mCPU.cy++;

			mCPU.pc = 256 * (instr & 15) + second;
			break;
		}

		case 0x50 ... 0x5f:	{		//JMS
			
			uint8_t second = cpuExtRomRead(mCPU.pc++);
			mCPU.cy++;

			mCPU.stack[mCPU.stackTop] = mCPU.pc;
			mCPU.pc = 256 * (instr & 15) + second;

			if (++mCPU.stackTop == 3)
				mCPU.stackTop = 0;
			if (mCPU.stackUse < 3)
				mCPU.stackUse++;
			break;
		}

		case 0x60 ... 0x6f: {		//INC
			
			mCPU.r[instr & 15].val++;
			break;
		}

		case 0x70 ... 0x7f: {		//ISZ
			
			uint8_t second = cpuExtRomRead(mCPU.pc++);
			mCPU.cy++;

			mCPU.r[instr & 15].val++;
			if (mCPU.r[instr & 15].val)
				mCPU.pc = (mCPU.pc & 0xf00) + second;
			break;
		}

		case 0x80 ... 0x8f: {		//ADD
			
			uint8_t t = mCPU.A + mCPU.C + mCPU.r[instr & 15].val;

			mCPU.A = t;
			mCPU.C = t >> 4;
			break;
		}

		case 0x90 ... 0x9f: {		//SUB
			
			uint8_t t = mCPU.A + (1 - mCPU.C) + (15 - mCPU.r[instr & 15].val);

			mCPU.A = t;
			mCPU.C = t >> 4;
			break;
		}

		case 0xa0 ... 0xaf: {		//LD
			
			mCPU.A = mCPU.r[instr & 15].val;
			break;
		}

		case 0xb0 ... 0xbf: {		//XCH

			uint8_t t = mCPU.A;

			mCPU.A = mCPU.r[instr & 15].val;
			mCPU.r[instr & 15].val = t;
			break;
		}

		case 0xc0 ... 0xcf: {		//BBL

			if (!mCPU.stackUse) {
				fprintf(stderr, "STACK UNDERFLOW\n");
				abort();
			}
			mCPU.stackUse--;

			mCPU.stackTop = mCPU.stackTop ? mCPU.stackTop - 1 : 2;
			mCPU.A = instr & 0x0f;
			mCPU.pc = mCPU.stack[mCPU.stackTop];
			break;
		}

		case 0xd0 ... 0xdf: { 		//LDM

			mCPU.A = instr & 0x0f;
			break;
		}

		case 0xe0: {				//WRM
			
			cpuExtRamWrite(256 * mCPU.ramCL + mCPU.src[mCPU.ramCL], mCPU.A);
			break;
		}

		case 0xe1: {				//WMP
			
			cpuExtRamPortWrite(4 * mCPU.ramCL + mCPU.src[mCPU.ramCL] / 64, mCPU.A);
			break;
		}

		case 0xe4 ... 0xe7: {		//WR0 ... WR3
			
			cpuExtStatusByteWrite(64 * mCPU.ramCL + mCPU.src[mCPU.ramCL] / 16 * 4 + instr % 4, mCPU.A);
			break;
		}

		case 0xe8: {				//SBM
			
			uint8_t t = mCPU.A + (1 - mCPU.C) + (15 - cpuExtRamRead(256 * mCPU.ramCL + mCPU.src[mCPU.ramCL]));

			mCPU.A = t;
			mCPU.C = t >> 4;
			break;
		}

		case 0xe9: {				//RDM
			
			mCPU.A = cpuExtRamRead(256 * mCPU.ramCL + mCPU.src[mCPU.ramCL]);
			break;
		}

		case 0xea: {				//RDR
			
			mCPU.A = cpuExtRomPortRead();
			break;
		}

		case 0xeb: {				//ADM
			
			uint8_t t = mCPU.A + mCPU.C + cpuExtRamRead(256 * mCPU.ramCL + mCPU.src[mCPU.ramCL]);

			mCPU.A = t;
			mCPU.C = t >> 4;
			break;
		}

		case 0xec ... 0xef: {		//RD0 ... RD3
			
			mCPU.A = cpuExtStatusByteRead(64 * mCPU.ramCL + mCPU.src[mCPU.ramCL] / 16 * 4 + instr % 4);
			break;
		}

		case 0xf0: {				//CLB
			
			mCPU.A = 0;
			mCPU.C = 0;
			break;
		}

		case 0xf1: {				//CLC
			
			mCPU.C = 0;
			break;
		}

		case 0xf2: {				//IAC
			
			uint8_t t = 1 + mCPU.A;

			mCPU.A = t;
			mCPU.C = t >> 4;
			break;
		}

		case 0xf3: {				//CMC
			
			mCPU.C ^= 1;
			break;
		}

		case 0xf4: {				//CMA
			
			mCPU.A ^= 15;
			break;
		}

		case 0xf5: {				//RAL
			uint8_t t = 2 * mCPU.A + mCPU.C;

			mCPU.A = t;
			mCPU.C = t >> 4;
			break;
		}

		case 0xf6: {				//RAR
			uint8_t t = mCPU.A + 16 * mCPU.C;

			mCPU.A = t >> 1;
			mCPU.C = t & 1;
			break;
		}

		case 0xf7: {				//TCC
			
			mCPU.A = mCPU.C;
			mCPU.C = 0;
			break;
		}

		case 0xf8: {				//DAC
			
			uint8_t t = mCPU.A - 1;

			mCPU.A = t;
			mCPU.C = 1 ^ (t >> 4);
			break;
		}

		case 0xf9: {				//TCS
			
			mCPU.A = 9 + mCPU.C;
			mCPU.C = 0;
			break;
		}

		case 0xfa: {				//STC
			
			mCPU.C = 1;
			break;
		}

		case 0xfb: {				//DAA
			
			uint8_t t = mCPU.A;

			if (mCPU.C || t > 9)
				t += 6;

			mCPU.A = t;
			mCPU.C = t >> 4;
			break;
		}

		case 0xfc: {				//KBP
			
			static const uint8_t mLUT[16] = {0, 1, 2, 15, 3, 15, 15, 15, 4, 15, 15, 15, 15, 15, 15, 15, };	//why logic when you can LUT

			mCPU.A = mLUT[mCPU.A];
			break;
		}

		case 0xfd: {				//DCL
			
			mCPU.ramCL = 7 & mCPU.A;
			break;
		}

		default: {		//unused

			fprintf(stderr, "unimpl instr 0x%02x at 0x%03x\n", instr, 0xfff & (mCPU.pc - 1));
			abort();
		}

	}
}

void cpuPrvSetTestSignal(bool signal)
{
	mCPU.testSignal = signal;
}

bool cpuInit(void)
{
	atexit(cpuDumpBacktrace);
	memset(&mCPU, 0, sizeof(mCPU));
	return true;
}

void cpuRunInstr(void)
{
	cpuPrvInstr();
}

uint64_t cpuGetCy(void)
{
	return mCPU.cy;
}

uint64_t cpuGetInstrCt(void)
{
	return mCPU.instrs;
}

