#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "CortexEmuCpu.h"
#include "soundIface.h"
#include "audiohw.h"
#include "printf.h"
#include "heap.h"
#include "irqs.h"
#include "ral.h"

#define SND_OUT_CTRL		(*(volatile uint32_t*)(SOUND_UNIT_BASE + SOUND_OFST_OUT_CTRL))
#define SND_OUT_STA			(*(volatile uint32_t*)(SOUND_UNIT_BASE + SOUND_OFST_OUT_STA))
#define SND_OUT_BUF			((volatile uint32_t*)(SOUND_UNIT_BASE + SOUND_OFST_OUT_BUF_0))
#define SND_OUT_CUR_BUF		(*(volatile uint32_t*)(SOUND_UNIT_BASE + SOUND_OFST_OUT_CUR_BUF))
#define SND_OUT_NATIVE_RATE	(*(volatile uint32_t*)(SOUND_UNIT_BASE + SOUND_OFST_OUT_NATIVE_RATE))


static AudioOutHwReadyForMoreSamplesF mIrqF = NULL;
static uint32_t mNumSubsamplesPerBuffer;
static int16_t* mHwBuffer;
static bool mStereo;


//this func assumes that num is divisible by 4, else things will break!
//it also assumes that destination is 4!!! byte aligned on v7E
static void __attribute__((naked)) audioHwConvertSamples(int16_t *dst, const int32_t *src, uint32_t num)
{
	#ifdef BUILD_FOR_THUMB_1
		//not efficient, we can make this better later!
		
		asm volatile(
			".syntax unified					\n\t"
			"	mov     r12, r1					\n\t"
			"	lsls    r2, #2					\n\t"
			"	add     r12, r2					\n\t"	//calculate end source address
			"1:									\n\t"
			"	ldmia   r1!, {r3}				\n\t"
			"	asrs    r3, #8					\n\t"	//bring into range
			"	sxth    r2, r3					\n\t"	//see if it needs saturation
			"	cmp     r3, r2					\n\t"
			"	beq     2f						\n\t"	//taken if no saturation is needed
			"	asrs    r3, #31					\n\t"	//-1 if number was negative, 0 if it was positive
		#ifdef HAVE_v8M_BASE
			"	movw    r2, #0x7fff				\n\t"
		#else
			"	ldr     r2, =0x7fff				\n\t"
		#endif
			"	subs    r3, r2, r3				\n\t"	//0x8000 if number was negative, 0x7fff if it was positive 
			"2:									\n\t"
			"	strh    r3, [r0]				\n\t"
			"	adds    r0, #2					\n\t"
			"	cmp     r1, r12					\n\t"
			"	bne     1b						\n\t"
			"	bx      lr						\n\t"
			"	.ltorg							\n\t"
			:
			:
			:"memory","cc"
		);
	
	#else		//v7 or v7E
		
		asm volatile(
			".syntax unified					\n\t"
			"	push    {r4-r8, lr}				\n\t"
			"1:									\n\t"
			"	ldmia   r1!, {r3-r8,r12,lr}		\n\t"
			"	ssat    r3,  #16, r3,  asr #8	\n\t"
			"	ssat    r4,  #16, r4,  asr #8	\n\t"
			"	ssat    r5,  #16, r5,  asr #8	\n\t"
			"	ssat    r6,  #16, r6,  asr #8	\n\t"
			"	ssat    r7,  #16, r7,  asr #8	\n\t"
			"	ssat    r8,  #16, r8,  asr #8	\n\t"
			"	ssat    r12, #16, r12, asr #8	\n\t"
			"	ssat    lr,  #16, lr,  asr #8	\n\t"
				
		#ifdef HAVE_v7E_SUPPORT
			"	pkhbt   r3,  r3,  r4, lsl #16	\n\t"
			"	pkhbt   r5,  r5,  r6, lsl #16	\n\t"
			"	pkhbt   r7,  r7,  r8, lsl #16	\n\t"
			"	pkhbt   r12, r12, lr, lsl #16	\n\t"
		#else
			"	uxth    r3,  r3					\n\t"
			"	add     r3,  r3,  r4, lsl #16	\n\t"
			"	uxth    r5,  r5					\n\t"
			"	add     r5,  r5,  r6, lsl #16	\n\t"
			"	uxth    r7,  r7					\n\t"
			"	add     r7,  r7,  r8, lsl #16	\n\t"
			"	uxth    r12, r12				\n\t"
			"	add     r12, r12, lr, lsl #16	\n\t"
		#endif
			
			"	stmia   r0!, {r3, r5, r7, r12}	\n\t"
			"	subs    r2, #8					\n\t"
			"	bne     1b						\n\t"
			"	pop     {r4-r8, pc}				\n\t"
			:
			:
			:"memory","cc"
		);
	
	#endif
}

void AudioOut_IRQHandler(void)
{
	if (mIrqF) {
		
		uint32_t r9state = ralSetSafeR9();
		audioHwConvertSamples((int16_t*)(SND_OUT_BUF[SND_OUT_CUR_BUF]), mIrqF(true), mNumSubsamplesPerBuffer);
		mIrqF(false);
		ralRestoreR9(r9state);
	}
	
	NVIC_ClearPendingIRQ(AudioOut_IRQn);
}

bool audioOutHwInit(AudioOutHwReadyForMoreSamplesF readyForSamplesF, uint32_t *numSamplesPerBufP, enum AudioSampleRate* nativeRateP, bool *nativeStereoP)
{
	uint32_t subBufSz, soundHwRates;
	enum AudioSampleRate nativeRate;
	
	if (!(SND_OUT_CTRL & SOUND_CTRL_BIT_AVAIL)) {
		loge("Audio output not available on this HW\n");
		return false;
	}
	
	soundHwRates = SND_OUT_NATIVE_RATE;
	mStereo = !!(soundHwRates >> 31);
	
	subBufSz = SND_OUT_STA;
	
	if (subBufSz & 7)
		fatal("we REQUIRE hardware buffer to be a multiple of 8 samples in size for speed\n");
	
	mIrqF = readyForSamplesF;
	if (numSamplesPerBufP)
		*numSamplesPerBufP = subBufSz;
	
	mNumSubsamplesPerBuffer = subBufSz * (mStereo ? 2 : 1);
	
	mHwBuffer = (int16_t*)kheapAlloc(subBufSz * sizeof(int16_t) * 2 * (mStereo ? 2 : 1));	//returned pointer is 4 bytes aligned as needed
	if (!mHwBuffer)
		fatal("Cannot allox audio out buffer\n");
	
	SND_OUT_BUF[0] = (uintptr_t)(mHwBuffer);
	SND_OUT_BUF[1] = (uintptr_t)(mHwBuffer + subBufSz * (mStereo ? 2 : 1));
	
	NVIC_EnableIRQ(AudioOut_IRQn);
	
	switch (soundHwRates &~ 0x80000000) {
		case 8000:				nativeRate = AudioRate8000;			break;
		case 11025:				nativeRate = AudioRate11025;		break;
		case 16000:				nativeRate = AudioRate16000;		break;
		case 22050:				nativeRate = AudioRate22050;		break;
		case 24000:				nativeRate = AudioRate24000;		break;
		case 32000:				nativeRate = AudioRate32000;		break;
		case 44100:				nativeRate = AudioRate44100;		break;
		default:
		case 48000:				nativeRate = AudioRate48000;		break;
	}
	
	if (nativeRateP)
		*nativeRateP = nativeRate;
	
	if (nativeStereoP)
		*nativeStereoP = mStereo;
	
	return true;
}

void audioOutHwSetState(bool on)
{
	if (on) {
		SND_OUT_CUR_BUF = 0;
		memset(mHwBuffer, 0, sizeof(int16_t) * 2 * mNumSubsamplesPerBuffer);
		SND_OUT_CTRL |= SOUND_CTRL_BIT_ON;
	}
	else
		SND_OUT_CTRL &=~ SOUND_CTRL_BIT_ON;
}

bool audioOnlySimpleOutInit(void)
{
	logw("Unexpected call to %s\n", __func__);
	return false;
}

void audioOnlySimpleTone(uint32_t freq, uint32_t amp)
{
	fatal("Unexpected call to %s\n", __func__);
}