#define _BUILDING_DMA_LIB

#include <kernel/drivers/platforms/xscale/xscale.h>
#include <kernel/drivers/platforms/platform.h>
#include "DmaDriver.h"
#include <FatalAlert.h>
#include <MemoryMgr.h>
#include <SerialMgr.h>
#include <StringMgr.h>
#include <LibTraps.h>
#include <stdint.h>
#include <string.h>
#include "common.h"
#include "printf.h"
#include <dal.h>
#include <ral.h>


#define NUM_STREAMS						16
#define POINTER_MANGLE_FACTOR			0x69696969

//all irq flags we know of
#define DMA_STRM_IRQ_ALL				(DMA_STRM_IRQ_DONE | DMA_STRM_IRQ_HALF | DMA_STRM_IRQ_XFER_ERR | DMA_STRM_IRQ_DIRECT_ERR | DMA_STRM_IRQ_FIFO_ERR)


struct DmaDescriptor {
	volatile uint32_t DDADR;	//next descriptor
	volatile uint32_t DSADR;	//source addr
	volatile uint32_t DTADR;	//dest addr
	volatile uint32_t DCMD;		//command for this transfer
};


struct DmaStreamCfg {			//64 bits
	
	uint32_t reserved		: 1;
	
	uint32_t enabled		: 1;
	
	uint32_t irqOnComplete	: 1;
	uint32_t irqOnHalf		: 1;
	uint32_t irqOnError		: 1;

	//helps-to-know info
	uint32_t toMem			: 1;
	uint32_t circBuf		: 1;
	uint32_t dblBuf			: 1;
	
	//configs
	uint32_t incSrc			: 1;
	uint32_t incDst			: 1;
	uint32_t trigSrc		: 1;
	uint32_t trigDst		: 1;
	uint32_t size			: 2;
	uint32_t width			: 2;
	uint32_t endIrq			: 1;
	uint32_t chan			: 6;

	uint32_t bytesPerBuf	: 12;		//in bytes
};

struct Globals {						//size set to 0x288 bytes by amdi resource...
	//global state: precisely 8 words
	uint32_t rfu[8];
	
	//per-stream state: exactly 40 bytes each, x16
	struct DmaStreamState {
		DmaLibIrqHandlerF		irqF;
		void*					irqD;
		uint32_t				addrP;
		uint32_t				addrM[2];
		struct DmaDescriptor*	descrs;
		uint32_t 				descrsPA;
		struct DmaStreamCfg		cfg;
		uint32_t				numItems;
	} strm[NUM_STREAMS];
};



static struct Globals* __attribute__((const)) dmaGlobalsGet(void)
{
	void*** ret;
	
	asm ("ldr %0, [r9]":"=r"(ret));
	return (struct Globals*)&ret[MY_LIB_ID / 4][0x10/4];
}

static struct DmaStreamState* dmaPointerUnmangle(DmaStream strm)						//preserves NULL
{
	uint32_t t = (uint32_t)strm;
	
	t += POINTER_MANGLE_FACTOR;
	t ^= POINTER_MANGLE_FACTOR;
	
	return (struct DmaStreamState*)t;
}

static DmaStream dmaPointerMangle(struct DmaStreamState* s)								//preserves NULL
{
	return (DmaStream)((((uint32_t)s) ^ POINTER_MANGLE_FACTOR) - POINTER_MANGLE_FACTOR);
}

void __attribute__((used)) impl_DmaLibGetInfo(uint32_t *numControllersP, uint32_t *numStreamsPerControllerP)
{
	if (numControllersP)
		*numControllersP = 1;
	
	if (numStreamsPerControllerP)
		*numStreamsPerControllerP = NUM_STREAMS;
}

bool __attribute__((used)) impl_DmaLibStreamSetIrqHandler(DmaStream strm, DmaLibIrqHandlerF irqFunc, void* irqHandlerData)
{
	struct DmaStreamState *s = dmaPointerUnmangle(strm);
	
	if (s->cfg.enabled)
		return false;
		
	s->irqF = irqFunc;
	s->irqD = irqHandlerData;

	return true;
}

bool __attribute__((used)) impl_DmaLibStreamSetPeriphAddr(DmaStream strm, uint32_t addr)
{
	struct DmaStreamState *s = dmaPointerUnmangle(strm);
	
	if (s->cfg.enabled)
		return false;
	
	s->addrP = addr;
	
	return true;
}

bool __attribute__((used)) impl_DmaLibStreamSetMemAddr(DmaStream strm, uint32_t bufferIdx, uint32_t addr)
{
	struct DmaStreamState *s = dmaPointerUnmangle(strm);
	
	if (s->cfg.enabled || bufferIdx > 1)
		return false;
	
	if (!s->cfg.dblBuf && bufferIdx)	//only double-buffer mode has two buffers explicitly
		return false;
	
	s->addrM[bufferIdx] = addr;
	
	return true;
}

static uint32_t irqsOff(void)
{
	uint32_t ret, dummy;
	
	asm volatile(
		"	mrs %0, cpsr		\n\t"
		"	orr %1, %0, #0x80	\n\t"
		"	msr cpsr_c, %1		\n\t"
		:"=r"(ret), "=r"(dummy)
		:
		:"memory"
	);
	
	return ret;
}

static void irqsRestore(uint32_t state)
{
	asm volatile(
		"	msr cpsr_c, %0		\n\t"
		:
		:"r"(state)
		:"memory"
	);
}

static struct PxaDMAch* dmaLibPrvGetChannelForStrm(struct DmaStreamState *s)
{
	struct PxaDMA *dma = repalmPlatPeriphP2V(PXA_BASE_DMA);
	struct Globals *g = dmaGlobalsGet();
	
	return s ? &dma->ch[s - g->strm] : NULL;
}

static bool dmaLibPrvSetEnabled(struct DmaStreamState *s, bool on)
{
	struct PxaDMA *dma = repalmPlatPeriphP2V(PXA_BASE_DMA);
	struct PxaDMAch *ch = dmaLibPrvGetChannelForStrm(s);
	struct Globals *g = dmaGlobalsGet();
	volatile uint32_t *dcsr;
	uint32_t cmd = 0;
	
	dcsr = &dma->DCSR[s - g->strm];
	
	if (!on) {
		dma->DRCMR[s->cfg.chan] = (s - g->strm) | 0x80;
		*dcsr = 0;
		s->cfg.enabled = 0;
	}
	else {		//enable is a lot of work
		
		cmd |= s->cfg.bytesPerBuf << 0;
		cmd |= s->cfg.width << 14;
		cmd |= s->cfg.size << 16;
		cmd |= s->cfg.trigDst << 28;
		cmd |= s->cfg.trigSrc << 29;
		cmd |= s->cfg.incDst << 30;
		cmd |= s->cfg.incSrc << 31;
		
		//circular and double-buffered mode require descriptor use
		if (s->cfg.dblBuf || s->cfg.circBuf) {
			
			uint32_t secondPtr = s->addrM[1];
			
			if (s->cfg.circBuf) {
				secondPtr = s->addrM[0];
				
				if ((s->cfg.toMem && s->cfg.incDst) || (!s->cfg.toMem && s->cfg.incSrc))
					secondPtr += s->cfg.bytesPerBuf;
			}
			//if we do not have descriptors, allocate them
			if (!s->descrs) {
				
				uintptr_t pa, t;
				char* va;
				
				if (!repalmPlatDmaBufAlloc(sizeof(struct DmaDescriptor) * 2 + 15 /* align */, &pa, (void**)&va)) {
					
					loge("Failed to alloc descriptors\n");
					return false;
				}
				
				t = (pa + 15) &~ 15;
				va += t - pa;
				pa = t;
			
				s->descrs = (struct DmaDescriptor*)va;
				s->descrsPA = pa;
			}
			
			//point them to each other
			s->descrs[0].DDADR = s->descrsPA + sizeof(struct DmaDescriptor);
			s->descrs[1].DDADR = s->descrsPA;
			
			if (s->cfg.toMem) {
				
				s->descrs[0].DTADR = s->addrM[0];
				s->descrs[1].DTADR = secondPtr;
				s->descrs[0].DSADR = s->addrP;
				s->descrs[1].DSADR = s->addrP;
			}
			else {
				
				s->descrs[0].DTADR = s->addrP;
				s->descrs[1].DTADR = s->addrP;
				s->descrs[0].DSADR = s->addrM[0];
				s->descrs[1].DSADR = secondPtr;
			}
			
			s->descrs[0].DCMD = cmd;
			s->descrs[1].DCMD = cmd | (s->cfg.irqOnComplete << 21);
			
			if ((s->cfg.circBuf && s->cfg.irqOnHalf) || (!s->cfg.circBuf && s->cfg.irqOnComplete))
				s->descrs[0].DCMD |= 1 << 21;
			
			*dcsr &=~ 0x40000000;	//use descriptors
			*dcsr |= 7;				//clear states
			ch->DDADR = s->descrsPA;
		}
		else {			//simple DMA - no-descr mode
			
			if (s->cfg.toMem) {
				
				ch->DSADR = s->addrP;
				ch->DTADR = s->addrM[0];
			}
			else {
				
				ch->DSADR = s->addrM[0];
				ch->DTADR = s->addrP;
			}
			
			ch->DCMD = cmd | (s->cfg.irqOnComplete << 21);
			*dcsr |= 0x40000000;	//do not descriptors
			*dcsr |= 7;				//clear states
		}
		dma->DRCMR[s->cfg.chan] = (s - g->strm) | 0x80;
		s->cfg.enabled = 1;
		*dcsr |= 0x80000000;
	}
	
	return true;
}

DmaStream __attribute__((used)) impl_DmaLibStreamReserve(uint32_t controller, uint32_t stream)
{
	struct Globals *g = dmaGlobalsGet();
	struct DmaStreamState *ret = NULL;
	
	if (controller == 0 && stream < NUM_STREAMS) {
		
		uint32_t sta;
		
		sta = irqsOff();
		
		if (!g->strm[stream].cfg.reserved) {
			ret = &g->strm[stream];
			
			g->strm[stream].cfg.reserved = true;
			g->strm[stream].cfg.irqOnComplete = 0;
			g->strm[stream].cfg.irqOnHalf = 0;
			g->strm[stream].cfg.irqOnError = 0;
		}
		
		irqsRestore(sta);
	}
	return dmaPointerMangle(ret);
}

bool __attribute__((used)) impl_DmaLibStreamRelease(DmaStream strm)
{
	struct DmaStreamState *s = dmaPointerUnmangle(strm);
	bool ret = false;
	uint32_t sta;

	sta = irqsOff();

	if (s->cfg.reserved) {
		
		if (s->cfg.enabled)
			(void)dmaLibPrvSetEnabled(s, false);
		
		s->cfg.reserved = false;
		
		ret = true;
	}
	
	irqsRestore(sta);
	
	return ret;
}

bool __attribute__((used)) impl_DmaLibStreamConfigure(DmaStream strm, const struct DmaStreamUserCfg* cfg)
{
	struct DmaStreamState *s = dmaPointerUnmangle(strm);
	uint32_t numItemsPerBuffer, numBytesPerBuffer;
	
	//cannot reconfigure an active channel
	if (s->cfg.enabled)
		return false;
	
	//ABI check
	if (cfg->magic != CFG_STRUCT_MAGIX)
		return false;
	
	//some combinations are meaningless
	if (cfg->circBuf && cfg->dblBuf)
		return false;
	
	//some things we do not support
	if (cfg->perIncr)
		return false;
	
	//some sizes must be same for this DMA controller
	if (cfg->perSz != cfg->memSz || cfg->memBurstSz != cfg->perBurstSz)
		return false;
	
	//burst is power of two 8..32
	if (cfg->perBurstSz & (cfg->perBurstSz - 1) || cfg->perBurstSz < 8 || cfg->perBurstSz > 32)
		return false;
	
	//item size of 8 not supported
	if (cfg->perSz >= 3)
		return false;
	
	numItemsPerBuffer = cfg->numItems;
	
	//circular buffering requires buffer size divisible by 2
	if (cfg->circBuf && (numItemsPerBuffer % 2))
		return false;
	
	if (cfg->circBuf)				//simulated using 2 xfers and thus has 2x limits
		numItemsPerBuffer /= 2;
	
	//calc length and enforce its limits
	numBytesPerBuffer = numItemsPerBuffer << cfg->perSz;
	if (numBytesPerBuffer >= 0x2000)
		return false;
	
	s->cfg.chan = cfg->chan;
	s->cfg.toMem = cfg->toMem;
	
	if (cfg->toMem) {
		s->cfg.incSrc = false;
		s->cfg.incDst = cfg->memIncr;
		s->cfg.trigSrc = cfg->perControlsFlow;
		s->cfg.trigDst = 0;
	}
	else {
		s->cfg.incSrc = cfg->memIncr;
		s->cfg.incDst = false;
		s->cfg.trigSrc = 0;
		s->cfg.trigDst = cfg->perControlsFlow;
	}
	s->cfg.size = 29 - __builtin_clz(cfg->perBurstSz);
	s->cfg.width = cfg->perSz + 1;
	s->cfg.bytesPerBuf = numBytesPerBuffer;
	s->cfg.dblBuf = cfg->dblBuf;
	s->cfg.circBuf = cfg->circBuf;
	s->cfg.toMem = cfg->toMem;
	
	return true;
}

bool __attribute__((used)) impl_DmaLibStreamGetCurrentTargetBuffer(DmaStream strm, uint32_t *targetBufferIdxP)
{
	struct DmaStreamState *s = dmaPointerUnmangle(strm);
	struct PxaDMAch *ch = dmaLibPrvGetChannelForStrm(s);
	uint32_t pa;
	
	if (!s->cfg.enabled)
		return false;
	
	pa = s->cfg.toMem ? ch->DTADR : ch->DSADR;
	
	if (targetBufferIdxP)
		*targetBufferIdxP = (pa >= s->addrM[0] && pa - s->addrM[0] < s->cfg.bytesPerBuf) ? 0 : 1;
	
	return true;
}

bool __attribute__((used)) impl_DmaLibStreamSetIrqState(DmaStream strm, uint32_t irqsEnabled /* mask of DMA_STRM_IRQ_* */)
{
	struct DmaStreamState *s = dmaPointerUnmangle(strm);
	
	if (s->cfg.enabled)		//we do not support setting while enabled
		return false;
	
	if (irqsEnabled &~ DMA_STRM_IRQ_ALL)
		return false;
	
	//mush all "error" flags into one as we only have one "error" flag
	s->cfg.irqOnError = !!(irqsEnabled & (DMA_STRM_IRQ_XFER_ERR | DMA_STRM_IRQ_DIRECT_ERR | DMA_STRM_IRQ_FIFO_ERR));
	s->cfg.irqOnComplete = !!(irqsEnabled & DMA_STRM_IRQ_DONE);
	s->cfg.irqOnHalf = !!(irqsEnabled & DMA_STRM_IRQ_HALF);
	
	if (s->cfg.irqOnHalf && s->cfg.dblBuf)	//not supporteable
		return false;
	
	return true;
}

bool __attribute__((used)) impl_DmaLibStreamSetEnabled(DmaStream strm, bool on)
{
	struct DmaStreamState *s = dmaPointerUnmangle(strm);
	
	return dmaLibPrvSetEnabled(s, on);
}

bool __attribute__((used)) impl_DmaLibStreamGetItemsLeftToTransfer(DmaStream strm, uint32_t *numItemsToTransferP)
{
	struct DmaStreamState *s = dmaPointerUnmangle(strm);
	struct PxaDMAch *ch = dmaLibPrvGetChannelForStrm(s);
	uint32_t cmd = ch->DCMD;
	
	if (numItemsToTransferP)
		*numItemsToTransferP = (cmd & 0x1fff) >> (((cmd >> 16) & 3) - 1);
	
	return true;
}

bool __attribute__((used)) impl_DmaLibStreamGetEnabled(DmaStream strm, bool *enabledP)
{
	struct DmaStreamState *s = dmaPointerUnmangle(strm);
	
	if (enabledP)
		*enabledP = !!s->cfg.enabled;
	
	return true;
}

static void dmaLibPrvIrqHandler(void *irqNoP)
{
	struct PxaDMA *dma = repalmPlatPeriphP2V(PXA_BASE_DMA);
	struct Globals *g = dmaGlobalsGet();
	uint32_t i, dint;
	
	while ((dint = dma->DINT) != 0) {
		
		for (i = 0; i < NUM_STREAMS; i++, dint >>= 1) {
			
			if (dint & 1) {
				
				if (!g->strm[i].cfg.enabled)
					fatal("DMA irq on a disabled channel\n");
				else {
					
					uint32_t sta = dma->DCSR[i], reportedSta = 0;
					
					dma->DCSR[i] |= sta & 7;
					if (sta & 1)	//error
						reportedSta |= DMA_STRM_IRQ_DIRECT_ERR;
					
					if (sta & 4) {	//end
						
						if (g->strm[i].cfg.circBuf) {	//special
							
							if (dma->ch[i].DDADR == g->strm[i].descrsPA)		//assumes we get here fast
								reportedSta |= DMA_STRM_IRQ_HALF;
							else
								reportedSta |= DMA_STRM_IRQ_DONE;
						}
						else
							reportedSta |= DMA_STRM_IRQ_DONE;
					}
					g->strm[i].irqF(g->strm[i].irqD, reportedSta);
				}
			}
		}
	}
}

uint32_t __attribute__((used)) PilotMain(uint16_t cmd, void* cmdPBP, uint16_t flags)
{
	struct PxaDMA *dma = repalmPlatPeriphP2V(PXA_BASE_DMA);
	struct Globals *g = dmaGlobalsGet();
	uint32_t i;

	loge("dma main 0x%04x in\n", cmd);
		
	if (cmd == RAL_CMD_LOAD) {
		
		for (i = 0; i < NUM_STREAMS; i++)
			dma->DCSR[i] = 7;
		
		for (i = 0; i < sizeof(dma->DRCMR) / sizeof(*dma->DRCMR); i++)
			dma->DRCMR[i] = 0;
		
		HALInterruptSetHandler(REPALM_IRQ_NO_MANGLE(XSCALE_IRQ_NO_DMA), dmaLibPrvIrqHandler, NULL);
		HALInterruptSetState(REPALM_IRQ_NO_MANGLE(XSCALE_IRQ_NO_DMA), true);
	}
	else if (cmd == RAL_CMD_UNLOAD) {
		
		for (i = 0; i < NUM_STREAMS; i++) {
			if (g->strm[i].cfg.enabled) {
				
				fatal("DMA Stream %u enabled while DmaDriver being unloaded!\n", i);
				dma->DCSR[i] = 0;
			}
		}
		
		HALInterruptSetState(REPALM_IRQ_NO_MANGLE(XSCALE_IRQ_NO_DMA), false);
	}
	
	loge("dma main 0x%04x out\n", cmd);
	
	return errNone;
}






