#include "printf.h"
#include "audio.h"
#include "emit.h"


//REV in 4 instrs
static enum EmitStatus audioPrvEmitRev(struct EmitBuf *dest, uint32_t valReg, uint32_t tmpReg)
{
	//EOR Rtmp, Rval, Rval, ROR #16
	EMIT(LLeorReg, tmpReg, valReg, valReg, EmitShiftRor, 16, EmitLeaveFlags, false);

	//BIC Rtmp, 0x00ff0000
	EMIT(LLbicImm, tmpReg, tmpReg, 0xff, 16, EmitLeaveFlags);

	//MOV Rval, Rval, ROR #8
	EMIT(LLmov, valReg, valReg, EmitShiftRor, 8, EmitLeaveFlags, false);

	//EOR Rval, Rval, Rtmp, LSR #8
	EMIT(LLeorReg, valReg, valReg, tmpReg, EmitShiftLsr, 8, EmitLeaveFlags, false);
	
	return EmitErrNone;
}

//REV16 in 5 instrs
static enum EmitStatus audioPrvEmitRev16(struct EmitBuf *dest, uint32_t valReg, uint32_t tmpReg)
{
	//MOV Rval, Rval, ROR #16
	EMIT(LLmov, valReg, valReg, EmitShiftRor, 16, EmitLeaveFlags, false);
	
	//REV Rval, Rval
	return audioPrvEmitRev(dest, valReg, tmpReg);
}

//3 instrs (assumes on input top bits are zero, unlike real REVSH instr). This is ok for us
static enum EmitStatus audioPrvEmitBswap16S(struct EmitBuf *dest, uint32_t valReg, uint32_t tmpReg)
{
	//MOV Rtmp, Rval, LSL #24
	EMIT(LLmov, tmpReg, valReg, EmitShiftLsl, 24, EmitLeaveFlags, false);
	
	//MOV Rval, Rval, LSR #8
	EMIT(LLmov, valReg, valReg, EmitShiftLsr, 8, EmitLeaveFlags, false);
	
	//ORR Rval, Rval, Rtmp, ASR #16
	EMIT(LLorrReg, valReg, valReg, tmpReg, EmitShiftAsr, 16, EmitLeaveFlags, false);
	
	return EmitErrNone;
}

//3 instrs (assumes on input top bits are zero, unlike real REVSH instr). This is ok for us
static enum EmitStatus audioPrvEmitBswap16U(struct EmitBuf *dest, uint32_t valReg, uint32_t tmpReg)
{
	//MOV Rtmp, Rval, LSL #24
	EMIT(LLmov, tmpReg, valReg, EmitShiftLsl, 24, EmitLeaveFlags, false);
	
	//MOV Rval, Rval, LSR #8
	EMIT(LLmov, valReg, valReg, EmitShiftLsr, 8, EmitLeaveFlags, false);
	
	//ORR Rval, Rval, Rtmp, LSR #16
	EMIT(LLorrReg, valReg, valReg, tmpReg, EmitShiftLsr, 16, EmitLeaveFlags, false);
	
	return EmitErrNone;
}

//3 instrs (leaves crap in top 16 bits)
static enum EmitStatus audioPrvEmitBswap16bot(struct EmitBuf *dest, uint32_t valReg)
{
	//MOV Rval, Rval, ROR #8
	EMIT(LLmov, valReg, valReg, EmitShiftRor, 8, EmitLeaveFlags, false);
	
	//BIT Rval, Rval, #0x0000ff00
	EMIT(LLbicImm, valReg, valReg, 0xff, 24, EmitLeaveFlags);
	
	//ORR Rval, Rval, Rval, LSR #16
	EMIT(LLorrReg, valReg, valReg, valReg, EmitShiftLsr, 16, EmitLeaveFlags, false);
	
	return EmitErrNone;
}


//emit efficient code to load a sample in the proper format, convert it to our 8.24 format, and adjust the source pointer by the number of bytes consumed
//produced code will output result in dstReg, no other regs will be clobbered, flags could be clobbered, stack might be used
//produced code assumes base for source data is in r1, and will be updated
//will not work if dstReg is r0..r2 and sample type is float
static enum EmitStatus audioPrvEmitLoadSample(struct EmitBuf *dest, uint32_t dstRegL, uint32_t dstRegR, uint32_t tmpReg, enum AudioSampleType typ, enum AudioChannelConfig chCfg, bool *tmpRegUsedP)
{
	uint32_t dstRegT, i, nCh, srcReg = 1;
	enum EmitStatus now;
	bool stereo;
	
	if (tmpRegUsedP)
		*tmpRegUsedP = false;
	
	switch (chCfg) {
		case AudioMono:
			stereo = false;
			nCh = 1;
			
			if (dstRegL >= 8) {
				loge("%s: left reg must be loreg and %u >= 8\n", __func__, dstRegL);
				return EmitErrNotEncodeable;
			}
			break;
		
		case AudioStereo:
			stereo = true;
			nCh = 2;
			
			if (dstRegL >= dstRegR) {
				loge("%s: left reg must be smaller than right and %u >= %u\n", __func__, dstRegL, dstRegR);
				return EmitErrNotEncodeable;
			}
			if (dstRegR >= 8) {
				loge("%s: left reg must be loreg and %u >= 8\n", __func__, dstRegL);
				return EmitErrNotEncodeable;
			}
			break;
		
		default:
			return EmitErrNotEncodeable;
	}
	
	(void)nCh;	//shut GCC up
	
	//first: load, second: shift and adjust pointers
	//try to keep loads in order so that they can be pipelined and thus we save a cycle
	switch (typ) {
		case AudioSampleU8:
		
			if (stereo) {
				
				//load R before L so that at least one instr can be 16 bits
				
				//LDRB dstRegR, [src, #1]
				EMIT(LLloadImm, dstRegR, srcReg, 1, EmitSzByte, false, EmitAdrModeIndex);
				
				//LDRB dstRegL, [src], #2
				EMIT(LLloadImm, dstRegL, srcReg, 2, EmitSzByte, false, EmitAdrModePostindex);
				
				//dstRegR -= 0x80;
				EMIT(LLsubImm, dstRegR, dstRegR, 0x80, EmitFlagsDoNotCare, false);
			}
			else {
				//LDRB dstRegL, [src], #1
				EMIT(LLloadImm, dstRegL, srcReg, 1, EmitSzByte, false, EmitAdrModePostindex);
			}
			//dstRegL -= 0x80;
			EMIT(LLsubImm, dstRegL, dstRegL, 0x80, EmitFlagsDoNotCare, false);
			break;
		
		case AudioSampleS8:
		
			//LDRSB dstRegL, [src], #1
			EMIT(LLloadImm, dstRegL, srcReg, 1, EmitSzByte, true, EmitAdrModePostindex);
			
			if (stereo) {
				//LDRSB dstRegR, [src], #1
				EMIT(LLloadImm, dstRegL, srcReg, 1, EmitSzByte, true, EmitAdrModePostindex);
			}
			break;
		
		case AudioSampleU16LE:
		case AudioSampleS16LE:
		case AudioSampleU16BE:
		case AudioSampleS16BE:
		
			if (stereo) {
				
				if (typ == AudioSampleS16LE || typ == AudioSampleU16LE) {
					
					//LDR dstRegL, [src], #4
					EMIT(LLloadImm, dstRegL, srcReg, 4, EmitSzWord, false, EmitAdrModePostindex);
					
					//MOV dstRegR, dstRegL, ASR #16
					EMIT(LLmov, dstRegR, dstRegL, EmitShiftAsr, 16, EmitFlagsDoNotCare, false);
					
					//MOV dstRegL, dstRegL, LSL #16
					EMIT(LLmov, dstRegL, dstRegL, EmitShiftLsl, 16, EmitFlagsDoNotCare, false);
					
					//MOV dstRegL, dstRegL, ASR #16
					EMIT(LLmov, dstRegL, dstRegL, EmitShiftAsr, 16, EmitFlagsDoNotCare, false);
				}
				else {
					
					//LDR dstRegR, [src], #4
					EMIT(LLloadImm, dstRegR, srcReg, 4, EmitSzWord, false, EmitAdrModePostindex);
					
					//REV dstRegR, dstRegR
					now = audioPrvEmitRev(dest, dstRegR, tmpReg);
					if (now != EmitErrNone)
						return now;
					
					if (tmpRegUsedP)
						*tmpRegUsedP = false;
					
					//MOV dstRegL, dstRegR, ASR #16
					EMIT(LLmov, dstRegL, dstRegR, EmitShiftAsr, 16, EmitFlagsDoNotCare, false);
					
					//MOV dstRegR, dstRegR, LSL #16
					EMIT(LLmov, dstRegR, dstRegR, EmitShiftLsl, 16, EmitFlagsDoNotCare, false);
					
					//MOV dstRegR, dstRegR, ASR #16
					EMIT(LLmov, dstRegR, dstRegR, EmitShiftAsr, 16, EmitFlagsDoNotCare, false);
				}
				
				if (typ == AudioSampleU16LE || typ == AudioSampleU16BE) {
						
					//SUB dstRegL, dstRegL, #0x8000
					EMIT(LLsubImm, dstRegL, dstRegL, 0x8000, EmitFlagsDoNotCare, false);
					
					//SUB dstRegR, dstRegR, #0x8000
					EMIT(LLsubImm, dstRegL, dstRegL, 0x8000, EmitFlagsDoNotCare, false);
				}
			}
			else {
				
				if (typ == AudioSampleS16LE) {
					
					//LDRSH dstRegL, [src], #2
					EMIT(LLloadImm, dstRegL, srcReg, 2, EmitSzHalfword, true, EmitAdrModePostindex);
				}
				else {
					
					//LDRH dstRegL, [src], #2
					EMIT(LLloadImm, dstRegL, srcReg, 2, EmitSzHalfword, false, EmitAdrModePostindex);
					
					if (typ == AudioSampleS16BE) {
						
						//dstRegL = signed_swap16(dstRegL)
						now = audioPrvEmitBswap16S(dest, dstRegL, tmpReg);
						if (now != EmitErrNone)
							return now;
						
						if (tmpRegUsedP)
							*tmpRegUsedP = false;
					}
					else {
						
						if (typ == AudioSampleU16BE) {
							
							//dstRegL =unsigned_swap16(dstRegL)
							now = audioPrvEmitBswap16U(dest, dstRegL, tmpReg);
							if (now != EmitErrNone)
								return now;
						}
						
						if (tmpRegUsedP)
							*tmpRegUsedP = false;
						
						//SUB dstRegL, dstRegL, #0x8000
						EMIT(LLsubImm, dstRegL, dstRegL, 0x8000, EmitFlagsDoNotCare, false);
					}
				}
			}
			break;

		case AudioSampleU32LE:
		case AudioSampleU32BE:
		case AudioSampleS32LE:
		case AudioSampleS32BE:
		
			//ldmia src!, {dstRegL, [dstRegR]}
			EMIT(LLldmia, srcReg, (1 << dstRegL) | (stereo ? (1 << dstRegR) : 0), true);

			if (typ == AudioSampleU32BE || typ == AudioSampleS32BE) {
				
				//REV dstRegL, dstRegL
				now = audioPrvEmitRev(dest, dstRegL, tmpReg);
				if (now != EmitErrNone)
					return now;
				
				if (tmpRegUsedP)
					*tmpRegUsedP = false;
				
				if (stereo) {
					
					//REV dstRegR, dstRegR
					now = audioPrvEmitRev(dest, dstRegR, tmpReg);
					if (now != EmitErrNone)
						return now;
				}
			}
		
			if (typ == AudioSampleU32LE || typ == AudioSampleU32BE) {
				
				//SUB dstRegL, dstRegL, #0x80000000
				EMIT(LLsubImm, dstRegL, dstRegL, 0x80000000, EmitFlagsDoNotCare, false);
				
				if (stereo) {
					
					//SUB dstRegR, dstRegR, #0x80000000
					EMIT(LLsubImm, dstRegR, dstRegR, 0x80000000, EmitFlagsDoNotCare, false);
				}
			}
			break;
		
		case AudioSampleFloatLE:
		case AudioSampleFloatBE:
		
			// abbreviated float reading code that only does what we care for. we need two temp regs and do this for each value
			//  inf, nan, and all out of bounds values become zero (this might introduce clipping if your samples are too big)
		
			//push {r0-r2}
			EMIT(HLpush, 0x0007);
			
			//ldmia src!, {dstRegL, [dstRegR]}
			EMIT(LLldmia, srcReg, (1 << dstRegL) | (stereo ? (1 << dstRegR) : 0), true);
			
			for (i = 0; i < nCh; i++) {
				
				uint32_t rvNo = i ? dstRegR : dstRegL;
				struct EmitBuf bcsToOut;
		
				if (typ == AudioSampleFloatBE) {
					
					//rev rvNo, rvNo
					now = audioPrvEmitRev(dest, rvNo, 2);
					if (now != EmitErrNone)
						return now;
				}
			
				//ubfx r0, rV, #23, #8 =>   mov r0, rv, lsl # 1;  mov r0, r0m, lsr # 24
				EMIT(LLmov, 0, rvNo, EmitShiftLsl, 1, EmitFlagsDoNotCare, false);
				EMIT(LLmov, 0, 0, EmitShiftLsr, 24, EmitFlagsDoNotCare, false);
				
				//subs r0, #104
				EMIT(LLsubImm, 0, 0, 104, EmitSetFlags, false);
				
				//cmp r0, #127 - 104
				EMIT(LLcmpImm, 0, 127 - 104);
				
				//mov[cs] rV, #0
				EMIT(SetCond, EmitCcCs);
				EMIT(HLloadImmToReg, rvNo, 0, false, false, true);
				EMIT(SetCond, EmitCcAl);
				
				//bcs out
				EMIT(SaveSpace, &bcsToOut, 2);
				
				//lsl r1, rV, #8
				EMIT(LLmov, 1, rvNo, EmitShiftLsl, 8, EmitFlagsDoNotCare, false);
				
				//orr r1, #0x80000000
				EMIT(LLorrImm, 1, 1, 0x80000000, 0, EmitFlagsDoNotCare);
				
				//rsb r0, r0, #(127 - 104) + 9
				EMIT(LLrsbImm, 0, 0, (127 - 104) + 9, EmitFlagsDoNotCare, false);
				
				//lsrs r1, r0
				EMIT(LLshiftByReg, 1, 1, 0, EmitShiftLsr, EmitFlagsDoNotCare, false);
				
				//lsls rV, #1
				EMIT(LLmov, rvNo, rvNo, EmitShiftLsl, 1, EmitSetFlags, false);
				
				//neg[cs] rV, r1
				EMIT(SetCond, EmitCcCs);
				EMIT(LLrsbImm, rvNo, 1, 0, EmitLeaveFlags, true);
				EMIT(SetCond, EmitCcAl);
				
				//mov[cc] rV, r1
				EMIT(SetCond, EmitCcCc);
				EMIT(LLmov, rvNo, 1, EmitShiftLsl, 0, EmitFlagsDoNotCare, true);
				EMIT(SetCond, EmitCcAl);
				
				//"out" label is here
				EMIT_TO(LLbranch, &bcsToOut, emitGetPtrToJumpHere(dest), EmitCcCs);
			}
			
			//pop {r0-r2}
			EMIT(HLpop, 0x0007);
			break;
		default:
			return EmitErrNotEncodeable;
	}
	
	//shift sample into place
	switch (typ) {
		case AudioSampleU8:
		case AudioSampleS8:
			//lsls dstRegL, 16
			EMIT(LLmov, dstRegL, dstRegL, EmitShiftLsl, 16, EmitFlagsDoNotCare, false);
			
			if (stereo) {
				//lsls dstRegR, 16
				EMIT(LLmov, dstRegR, dstRegR, EmitShiftLsl, 16, EmitFlagsDoNotCare, false);
			}
			break;
		case AudioSampleU16LE:
		case AudioSampleU16BE:
		case AudioSampleS16LE:
		case AudioSampleS16BE:
			//lsl dstRegL, 8
			EMIT(LLmov, dstRegL, dstRegL, EmitShiftLsl, 8, EmitFlagsDoNotCare, false);
			
			if (stereo) {
				//lsls dstRegR, 8
				EMIT(LLmov, dstRegR, dstRegR, EmitShiftLsl, 8, EmitFlagsDoNotCare, false);
			}
			break;
		case AudioSampleU32LE:
		case AudioSampleU32BE:
		case AudioSampleS32LE:
		case AudioSampleS32BE:
			//asr dstRegL, 8
			EMIT(LLmov, dstRegL, dstRegL, EmitShiftAsr, 8, EmitFlagsDoNotCare, false);
			
			if (stereo) {
				//asr dstRegR, 8
				EMIT(LLmov, dstRegR, dstRegR, EmitShiftAsr, 8, EmitFlagsDoNotCare, false);
			}
			break;
		case AudioSampleFloatLE:
		case AudioSampleFloatBE:
			break;
		default:
			//nothing
			return EmitErrNotEncodeable;
	}
	
	return EmitErrNone;
}

//tmpReg may not equal dstReg, all else allowed
static enum EmitStatus audioPrvEmitVolumeScale(struct EmitBuf *dest, uint32_t dstReg, uint32_t sampleReg, uint32_t volumeReg, uint32_t tmpReg)
{
	//smull dstReg, tmpReg, sampleReg, volumeReg
	EMIT(LLsmull, dstReg, tmpReg, volumeReg, sampleReg);
	
	//lsr dstReg, #10
	EMIT(LLmov, dstReg, dstReg, EmitShiftLsr, 10, EmitFlagsDoNotCare, false);
	
	//orr dstReg, tmpReg, lsl #22
	EMIT(LLorrReg, dstReg, dstReg, tmpReg, EmitShiftLsl, 22, EmitFlagsDoNotCare, false);
	
	return EmitErrNone;
}

//scale sample by volume, take channels into account. for mono, L is used. if mono in AND out, we expect mono volume in volL
//if output is mono, combine and generate it in L
//regL & regR are in and out, MUST be loRegs
static enum EmitStatus audioPrvMixInFuncScaleChansByVolume(struct EmitBuf *dest, uint32_t regL, uint32_t regR, uint32_t regVolL, uint32_t regVolR, uint32_t regTmp, enum AudioChannelConfig chans, bool nativeFmtIsStereo)
{
	enum EmitStatus now;
	
	//if input is stereo, scale by volumes now
	if (chans == AudioStereo) {
		
		now = audioPrvEmitVolumeScale(dest, regL, regL, regVolL, regTmp);
		if (now != EmitErrNone)
			return now;
		
		now = audioPrvEmitVolumeScale(dest, regR, regR, regVolR, regTmp);
		if (now != EmitErrNone)
			return now;
		
		//if output is mono, combine samples now
		if (!nativeFmtIsStereo) {
		
			//add regL, regR
			EMIT(LLaddReg, regL, regL, regR, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			
			//asrs regL, #1
			EMIT(LLmov, regL, regL, EmitShiftAsr, 1, EmitFlagsDoNotCare, false);
		}
	}
	else if (nativeFmtIsStereo) {	//if input is mono but output is stereo, scale the input sample by each channel's volume
		
		now = audioPrvEmitVolumeScale(dest, regR, regL, regVolR, regTmp);
		if (now != EmitErrNone)
			return now;
			
		now = audioPrvEmitVolumeScale(dest, regL, regL, regVolL, regTmp);
		if (now != EmitErrNone)
			return now;
	}
	else {						//mono in and out - scale the sample by the average volume we had calculated
		
		now = audioPrvEmitVolumeScale(dest, regL, regL, regVolL, regTmp);
		if (now != EmitErrNone)
			return now;
	}
	
	return EmitErrNone;
}

static enum EmitStatus audioPrvMixInAddToOutSampleProperly(struct EmitBuf *dest, uint32_t regDst, uint32_t regSrc)
{
	if (MIXER_BE_RECKLESS_WITH_MIXING) {
		
		//ADD regDst, regSrc
		EMIT(LLaddReg, regDst, regDst, regSrc, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	}
	else {
		
		#if defined(HAVE_v7E_SUPPORT) || defined(HAVE_ARM_v5_DSP_EXTS)
			//QADD regDst, regSrc
			EMIT(LLqadd, regDst, regDst, regSrc);
		#else
			//this is the fastest way i found to saturate in v7. it works, i promise. proof is up to the reader
			
			//ADDS regDst, regSrc
			EMIT(LLaddReg, regDst, regDst, regSrc, EmitShiftLsl, 0, EmitSetFlags, false);
			
			//ASR[vs] regDst, #31
			EMIT(SetCond, EmitCcVs);
			EMIT(LLmov, regDst, regDst, EmitShiftAsr, 31, EmitLeaveFlags, true);
			EMIT(SetCond, EmitCcAl);
	
			//RRX[vs] regDst
			EMIT(SetCond, EmitCcVs);
			EMIT(LLmov, regDst, regDst, EmitShiftRor, 0 /* ROR 0 is RRX */, EmitLeaveFlags, true);
			EMIT(SetCond, EmitCcAl);
			
		#endif
	}
	
	return EmitErrNone;
}

//may corrupt samples in for speed. may corrupt flags, tmpRegL < tmpRegR, regL < regR, tmpRegL bust be even
static enum EmitStatus audioPrvMixInFuncEmitSampleExport(struct EmitBuf *dest, uint32_t regL, uint32_t regR, uint32_t tmpRegL, uint32_t tmpRegR, bool nativeFmtIsStereo)
{
	enum EmitStatus now;
	
	//verify invariants
	if (tmpRegL >= tmpRegR || regL >= regR || (regL & 1))
		return EmitErrInternalErr;
	
	if (nativeFmtIsStereo) { //add two samples into the output buffer
		
		//ldr tmpRegL, [r0, #0]
		EMIT(LLloadImm, tmpRegL, 0, 0, EmitSzWord, false, EmitAdrModeIndex);
	
		//ldr tmpRegR, [r0, #4]
		EMIT(LLloadImm, tmpRegR, 0, 4, EmitSzWord, false, EmitAdrModeIndex);
		
		now = audioPrvMixInAddToOutSampleProperly(dest, regL, tmpRegL);
		if (now != EmitErrNone)
			return now;
		
		now = audioPrvMixInAddToOutSampleProperly(dest, regR, tmpRegR);
		if (now != EmitErrNone)
			return now;
		
		//sample regs are in order, use a stmia with a writeback or a strd (faster)
		#ifdef HAVE_ARM_v5_DSP_EXTS
		
			//strd regL, regR, [r0], #8
			EMIT(LLstrdImm, regL, regR, 0, 8, EmitAdrModePostindex);
		#else
			//stmia r0!, {regL, regR}
			EMIT(HLstmia, 0, (1 << regL) + (1 << regR), true);
		#endif
	}
	else {				//add one sample into the output buffer
		
		//ldr tmpRegL, [r0]
		EMIT(LLloadImm, tmpRegL, 0, 0, EmitSzWord, false, EmitAdrModeIndex);

		//add regL, tmpRegL 				// add LEFT
		now = audioPrvMixInAddToOutSampleProperly(dest, regL, tmpRegL);
		if (now != EmitErrNone)
			return now;

		//str regL [r0], #4	//store
		EMIT(LLstoreImm, regL, 0, 4, EmitSzWord, EmitAdrModePostindex);
	}
	
	return EmitErrNone;
}

enum EmitStatus audioPrvStreamCreateOutputMixFuncGutsNoResamp(struct EmitBuf *dest, enum AudioSampleType sampTyp, enum AudioChannelConfig chans, bool nativeFmtIsStereo)
{
	enum EmitStatus now;
	uintptr_t loopPos;
	
	//push {r1, r4-r6,r8-r9, lr}
	EMIT(HLpush, 0x4372);
	
	//ldr r1, [r1]	//get source pointer
	EMIT(LLloadImm, 1, 1, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//up front decide how many samples we'll produce
	
	//ldr r4, [sp, #ofst_to_numInSamples]
	EMIT(LLloadImm, 4, EMIT_REG_NO_SP, 9 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//cmp r2, r4
	EMIT(LLcmpReg, 2, 4, EmitShiftLsl, 0);
	
	//PL condition is true if r2 >= r4		(more space in output than we'll produce samples)
	
	//movpl r2, r4	//so we simply just have to produce numSamples == r2
	EMIT(SetCond, EmitCcPl);
	EMIT(LLmov, 2, 4, EmitShiftLsl, 0, EmitFlagsDoNotCare, true);
	EMIT(SetCond, EmitCcAl);
	
	#ifdef HAVE_ARM_v5_DSP_EXTS
	
		//ldrd r8, r9, [sp, #ofst_to_volumes]	//load volumes into r8,r9 using ldrd from sp
		EMIT(LLldrdImm, 8, 9, EMIT_REG_NO_SP, 7 * sizeof(uint32_t), EmitAdrModeIndex);
	
	#else
		
		//ldr r8, [sp, #ofst_to_volume_L]
		EMIT(LLloadImm, 8, EMIT_REG_NO_SP, 7 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
		//ldr r9, [sp, #ofst_to_volume_R]
		EMIT(LLloadImm, 9, EMIT_REG_NO_SP, 8 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
		
	#endif

	//if input & output are both mono, we need to calculate average volume (we'll store it in r7)
	if (!nativeFmtIsStereo && chans == AudioMono) {
	
		//add r8, r8
		EMIT(LLaddReg, 8, 8, 9, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);

		//lsrs r8, #1
		EMIT(LLmov, 8, 8, EmitShiftLsr, 1, EmitFlagsDoNotCare, false);
	}
	
	loopPos = emitGetPtrToJumpHere(dest);
	
	//load sample(s) into r4 (and r5 if stereo)
	now = audioPrvEmitLoadSample(dest, 4, 5, 14, sampTyp, chans, NULL);
	if (now != EmitErrNone)
		return now;
	
	//handle volume scaling
	now = audioPrvMixInFuncScaleChansByVolume(dest, 4, 5, 8, 9, 14, chans, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//store it
	now = audioPrvMixInFuncEmitSampleExport(dest, 4, 5, 6, 14, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//subs r2, #1  // account for the one sample we just did
	EMIT(LLsubImm, 2, 2, 1, EmitSetFlags, false);
	
	//loop back if there are more samples to work on
	EMIT(LLbranch, loopPos, EmitCcNe);

	//ldr r2, [sp]   //we need to store r1
	EMIT(LLloadImm, 2, EMIT_REG_NO_SP, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//str r1, [r2]
	EMIT(LLstoreImm, 1, 2, 0, EmitSzWord, EmitAdrModeIndex);

	#ifdef HAVE_ARM_v5

		//pop {r1, r4-r6,r8-r9, pc}
		EMIT(HLpop, 0x8372);
	
	#else
		
		//pop {r1, r4-r6,r8-r9, lr}
		EMIT(HLpop, 0x4372);
		
		//bx lr
		EMIT(LLbx, EMIT_REG_NO_LR);
		
	#endif

	return EmitErrNone;
}

enum EmitStatus audioPrvStreamCreateOutputMixFuncGutsUpsample(struct EmitBuf *dest, const uint16_t* resampTab, enum AudioSampleType sampTyp, enum AudioChannelConfig chans, bool nativeFmtIsStereo)
{
	uint32_t sampleSzShift = (mSampleShifts[sampTyp] + (chans == AudioStereo ? 1 : 0)), sampleSz = 1 << sampleSzShift;
	struct EmitBuf beqOutOfInput, skipTableReloadSpot, bcsSpot, jumpToExitCodeSpot;
	uint32_t i, numOutputChannels = nativeFmtIsStereo ? 2 : 1, tempReg;
	uintptr_t loopLoadSample, loopPostLoadSample;
	enum EmitStatus now;
	
	//int32_t* upsample(int32_t* dst, const uint8_t** srcP, uint32_t maxOutSamples, void* resampleStateP, uint32_t volumeL, uint32_t volumeR, uint32_t numInSamples)

	//r0 is dst
	//r1 is src
	//r2 is nSampLeft
	//r3 is temp value
	//r4 is current sample L (or mono)
	//r5 is current sample R (if input is mono, we store "src end ptr" here
	//r6 is "next" sample L (or mono)
	//r7 is "next" sample R
	//r8 is volume L (or mono volume)
	//r9 is volume R
	//r10 is output L (or mono)
	//r11 is output R
	//r12 is current table pointer
	//r14 is temp value
	//[sp, 4] is "source end ptr" if stereo
	
	//RESAMP STATE is:
	// [0] - previously read "now" L sample
	// [1] - previously read "now" R sample
	// [2] - table pointer to current entry
	

	//we use each sample more than once, so volume scaling is better done BEFORE interpolation than after. we do that
	
	//push {r1-r11, lr}
	EMIT(HLpush, 0x4ffe);

	//ldr r1, [r1]	//get source pointer
	EMIT(LLloadImm, 1, 1, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//calculate source end pointer
	//ldr r5, [sp, #ofst_to_numInSamples]
	EMIT(LLloadImm, 5, EMIT_REG_NO_SP, 14 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//add r5, r1, r5, lsl in_sample_sz_shift
	EMIT(LLaddReg, 5, 1, 5, EmitShiftLsl, sampleSzShift, EmitFlagsDoNotCare, false);
	
	if (nativeFmtIsStereo) {
		//str r5, [sp, #4]					//stash it on the stack where we saved a slot for it (where we stashed r2)
		EMIT(LLstoreImm, 5, EMIT_REG_NO_SP, 4, EmitSzWord, EmitAdrModeIndex);
	}

	//calculate destination end pointer
	//add r2, r0, r2, lsl 2 + isStereo
	EMIT(LLaddReg, 2, 0, 2, EmitShiftLsl, nativeFmtIsStereo ? 3 : 2, EmitFlagsDoNotCare, false);

	//load resamp state
	//ldmia r3, {r6, r7, r12}				//load current table index to r12, and "cur L" and "cur R" from resamp state into r6,r7 (which normally stores next sample), but we're about to move it into "cur" sample space
	EMIT(LLldmia, 3, 0x10c0, false);
	
	#ifdef HAVE_ARM_v5_DSP_EXTS
	
		//ldrd r8, r9, [sp, #ofst_to_volumes]	//load volumes into r8,r9 using ldrd from sp
		EMIT(LLldrdImm, 8, 9, EMIT_REG_NO_SP, 12 * sizeof(uint32_t), EmitAdrModeIndex);
	
	#else
		
		//ldr r8, [sp, #ofst_to_volume_L]
		EMIT(LLloadImm, 8, EMIT_REG_NO_SP, 12 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
		//ldr r9, [sp, #ofst_to_volume_R]
		EMIT(LLloadImm, 9, EMIT_REG_NO_SP, 13 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
		
	#endif
	
	//if input & output are both mono, we need to calculate average volume (we'll store it in r8)
	if (!nativeFmtIsStereo && chans == AudioMono) {
		
		//add r8, r9
		EMIT(LLaddReg, 8, 8, 9, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		
		//lsr r8, #1
		EMIT(LLmov, 8, 8, EmitShiftLsr, 1, EmitFlagsDoNotCare, false);
	}
	
	//save location for the loop where a new sample needs to be loaded
	loopLoadSample = emitGetPtrToJumpHere(dest);
	
	//move current "next" sample into "current"
	EMIT(LLmov, 4, 6, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	if (nativeFmtIsStereo) {
		EMIT(LLmov, 5, 7, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	}
	
	//check if we're out of input samples (since we're about to overwrite r6, we can use it for temp)
	if (!nativeFmtIsStereo)
		tempReg = 5;
	else {
		//ldr r6, [sp, #4]
		EMIT(LLloadImm, 6, EMIT_REG_NO_SP, 4, EmitSzWord, false, EmitAdrModeIndex);
		
		tempReg = 6;
	}
	
	//cmp src, tmpReg
	EMIT(LLcmpReg, 1, tempReg, EmitShiftLsl, 0);
	
	//save space for a "beq" to exit
	EMIT(SaveSpace, &beqOutOfInput, 2);
	
	//get "next" left or mono sample into r6, right into r7 if needed
	now = audioPrvEmitLoadSample(dest, 6, 7, 3, sampTyp, chans, NULL);
	if (now != EmitErrNone)
		return now;
	
	//scale by volume and maybe merge
	now = audioPrvMixInFuncScaleChansByVolume(dest, 6, 7, 8, 9, 3, chans, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//save location for the loop where sample is not needed to be loaded
	loopPostLoadSample = emitGetPtrToJumpHere(dest);
	
	//ldrh r3, [r12], #2 //grab resamp tab value
	EMIT(LLloadImm, 3, 12, 2, EmitSzHalfword, false, EmitAdrModePostindex);

	//cmp r3, #0
	EMIT(LLcmpImm, 3, 0);

	//save space for a "bne" to skip table reload
	EMIT(SaveSpace, &skipTableReloadSpot, 2);
	
	//ldr r12, =resampTab	//reload table
	EMIT(HLloadImmToReg, 12, (uintptr_t)resampTab, true, true, false);
	
	//ldrh r3, [r12], #2 	//grab resamp tab value
	EMIT(LLloadImm, 3, 12, 2, EmitSzHalfword, false, EmitAdrModePostindex);
	
	//set up that "bne" we saved a space for
	EMIT_TO(LLbranch, &skipTableReloadSpot, emitGetPtrToJumpHere(dest), EmitCcNe);
	
	//lsrs r3, #1				//grab the table value's top 15 bits into lr, shift the "emit sample?" bit into C
	EMIT(LLmov, 3, 3, EmitShiftLsr, 1, EmitSetFlags, false);
	
	//rsb lr, r3, #0x8000		//get (0x8000 - tabEntry) into lr
	EMIT(LLrsbImm, EMIT_REG_NO_LR, 3, 0x8000, EmitLeaveFlags, false);
	
	//interpolate
	for (i = 0; i < numOutputChannels; i++) {
		//we need a temp reg. while generating first (L) sample, use the second (R) output reg as temp
		//while generating second (R) sample, use lr since it holds tablVal which we no longer need
		uint32_t sampRegNow = 4 + i, sampRegNext = 6 + i, sampOutReg = 10 + i, tempReg = i ? 3 : 11;
		
		//smull sampOutReg, tempReg, sampRegNow, tablVal(3)	//multiply sample into table value
		EMIT(LLsmull, sampOutReg, tempReg, sampRegNow, 3);
		
		//smlal sampOutReg, tempReg, sampRegNext, inverseTabVal(EMIT_REG_NO_LR)
		EMIT(LLsmlal, sampOutReg, tempReg, sampRegNext, EMIT_REG_NO_LR);
		
		//assemble the results:
		//lsr sampOutReg, #15
		EMIT(LLmov, sampOutReg, sampOutReg, EmitShiftLsr, 15, EmitLeaveFlags, false);
		
		//orr sampOutReg, tempReg, lsl #17
		EMIT(LLorrReg, sampOutReg, sampOutReg, tempReg, EmitShiftLsl, 17, EmitLeaveFlags, false);
	}
	
	//we need to discriminate now based on C flag (which is set if we DO need a new sample to be read).
	// we do this using a bcs which we'll save space for
	// we catually need to emit the sample first, but since that may clobber flags, we do that post-decision
	EMIT(SaveSpace, &bcsSpot, 2);
	
	//store the resuts
	now = audioPrvMixInFuncEmitSampleExport(dest, 10, 11, 3, 14, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;

	// this is the path for when we DO NOT need a new sample
	
	//cmp r2, r0	//see if we;re done
	EMIT(LLcmpReg, 2, 0, EmitShiftLsl, 0);
	
	//loop to start (without sample load) using bne
	EMIT(LLbranch, loopPostLoadSample, EmitCcNe);
	
	//source pointer now points to PAST what should be the "next" sample next run - adjust it
	//subs r1, in_sample_sz
	EMIT(LLsubImm, 1, 1, sampleSz, EmitSetFlags, false);
	
	//we'll need a jump here to exit code. save a slot for it
	EMIT(SaveSpace, &jumpToExitCodeSpot, 2);
	
	// this is the path for when we DO need a new sample
	
	//fill the above "bne"
	EMIT_TO(LLbranch, &bcsSpot, emitGetPtrToJumpHere(dest), EmitCcCs);
	
	//store the resuts
	now = audioPrvMixInFuncEmitSampleExport(dest, 10, 11, 3, 14, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//cmp r2, r0	//see if we're done
	EMIT(LLcmpReg, 2, 0, EmitShiftLsl, 0);
	
	//loop to start (with sample load) using bne (short one should work)
	EMIT(LLbranch, loopLoadSample, EmitCcNe);
	
	//nonetheless shift the "next" sample to "cur" one so we can stash it properly below
	EMIT(LLmov, 4, 6, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	if (nativeFmtIsStereo) {
		EMIT(LLmov, 5, 7, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	}
	
	//source pointer now points to what should be the "next" sample next run. Good
	
	// this is the common exit path. we saved a slot above to insert a jump to here. generate the jump
	EMIT_TO(LLbranch, &jumpToExitCodeSpot, emitGetPtrToJumpHere(dest), EmitCcAl);
	
	// we also saved a spot above when we ran out of input data, generate that beq
	EMIT_TO(LLbranch, &beqOutOfInput, emitGetPtrToJumpHere(dest), EmitCcEq);

	// save "src" pointer
	//ldr r2, [sp]   //we need to store r1
	EMIT(LLloadImm, 2, EMIT_REG_NO_SP, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//str r1, [r2]
	EMIT(LLstoreImm, 1, 2, 0, EmitSzWord, EmitAdrModeIndex);
	
	// save resamp state
	//ldr r2, [sp, #8]
	EMIT(LLloadImm, 2, EMIT_REG_NO_SP, 8, EmitSzWord, false, EmitAdrModeIndex);
	
	//stmia r2, {r4, r5, r12}
	EMIT(LLstmia, 2, 0x1030, true);

	#ifdef HAVE_ARM_v5

		//pop {r1-r11, pc}
		EMIT(HLpop, 0x8ffe);
	
	#else
		
		//pop {r1-r11, lr}
		EMIT(HLpop, 0x4ffe);
		
		//bx lr
		EMIT(LLbx, EMIT_REG_NO_LR);
		
	#endif

	return EmitErrNone;
}

enum EmitStatus audioPrvStreamCreateOutputMixFuncGutsDownsample(struct EmitBuf *dest, const uint16_t* resampTab, enum AudioSampleType sampTyp, enum AudioChannelConfig chans, bool nativeFmtIsStereo)
{
	uint32_t sampleSzShift = (mSampleShifts[sampTyp] + (chans == AudioStereo ? 1 : 0));
	struct EmitBuf savedSpaceForJump;
	bool tmpRegUsed = false;
	enum EmitStatus now;
	uintptr_t mainLoop;
	
	//int32_t* downsample(int32_t* dst, const uint8_t** srcP, uint32_t maxOutSamples, void* resampleStateP, uint32_t volumeL, uint32_t volumeR, uint32_t numInSamples)

	//r0 is dst
	//r1 is src
	//r2 is dst endPtr
	//r3 is curSampL (or mono)
	//r4 is curSampR
	//r5 is tabentry
	//r6 is leftSum lo (or mono)
	//r7 is right sum lo
	//r8 is leftSum hi (or mono)
	//r9 is right sum hi
	//r10 is volume L (or mono)
	//r11 is volume R
	//r12 is tabPtr
	//lr  is source end ptr
	
	//RESAMP STATE is:
	// [0] - curSampL (or mono)
	// [1] - curSampR
	// [2] - leftSum lo (or mono)
	// [3] - right sum lo
	// [4] - leftSum hi (or mono)
	// [5] - right sum hi
	// [6] - tabptr


	//push {r1, r3-r11, lr}
	EMIT(HLpush, 0x4ffa);
	
	//ldr r1, [r1]							//get source pointer
	EMIT(LLloadImm, 1, 1, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//calculate source end pointer
	//ldr r4, [sp, #ofst_to_numInSamples]
	EMIT(LLloadImm, 4, EMIT_REG_NO_SP, 13 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//add lr, r1, r4, lsl in_sample_sz_shift
	EMIT(LLaddReg, EMIT_REG_NO_LR, 1, 4, EmitShiftLsl, sampleSzShift, EmitFlagsDoNotCare, false);
	
	//calculate destination end pointer
	//add r2, r0, r2, lsl 2 + isStereo
	EMIT(LLaddReg, 2, 0, 2, EmitShiftLsl, nativeFmtIsStereo ? 3 : 2, EmitFlagsDoNotCare, false);
	
	//load resample state
	//ldmia r3, {r3, r4, r6-r9, r12}
	EMIT(LLldmia, 3, 0x13d8, false);
	
	#ifdef HAVE_ARM_v5_DSP_EXTS
	
		//ldrd r10, r11, [sp, #ofst_to_volumes]	//load volumes into r10,r11 using ldrd from sp
		EMIT(LLldrdImm, 10, 11, EMIT_REG_NO_SP, 11 * sizeof(uint32_t), EmitAdrModeIndex);
	
	#else
		
		//ldr r10, [sp, #ofst_to_volume_L]
		EMIT(LLloadImm, 10, EMIT_REG_NO_SP, 11 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
		//ldr r11, [sp, #ofst_to_volume_R]
		EMIT(LLloadImm, 11, EMIT_REG_NO_SP, 12 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
		
	#endif
	
	
	//if input & output are both mono, we need to calculate average volume (we'll store it in r10)
	if (!nativeFmtIsStereo && chans == AudioMono) {
		
		//add r10, r11
		EMIT(LLaddReg, 10, 10, 11, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		
		//lsr r10, #1
		EMIT(LLmov, 10, 10, EmitShiftLsr, 1, EmitFlagsDoNotCare, false);
	}
	
	//loop:
	mainLoop = emitGetPtrToJumpHere(dest);
	
	//load tab entry
	
	//ldrh r5, [r12], #2 //grab resamp tab value
	EMIT(LLloadImm, 5, 12, 2, EmitSzHalfword, false, EmitAdrModePostindex);
	
	//cmp r5, #0
	EMIT(LLcmpImm, 5, 0);
	
	//save space for a "bne" to skip table reload
	EMIT(SaveSpace, &savedSpaceForJump, 2);
	
	//ldr r12, =resampTab	//reload table
	EMIT(HLloadImmToReg, 12, (uintptr_t)resampTab, true, true, false);
	
	//ldrh r5, [r12], #2 //grab resamp tab value
	EMIT(LLloadImm, 5, 12, 2, EmitSzHalfword, false, EmitAdrModePostindex);
	
	//set up that "bne" we saved a space for
	EMIT_TO(LLbranch, &savedSpaceForJump, emitGetPtrToJumpHere(dest), EmitCcNe);
	
	//lsls tabentry(aka r5), #17
	EMIT(LLmov, 5, 5, EmitShiftLsl, 17, EmitSetFlags, false);

	//save space for "bcc skip_load_sample"
	EMIT(SaveSpace, &savedSpaceForJump, 2);
	
	//get "next" left or mono sample into r3, right into r4 if needed. use r5 for tmp and reload after
	now = audioPrvEmitLoadSample(dest, 3, 4, 5, sampTyp, chans, &tmpRegUsed);
	if (now != EmitErrNone)
		return now;
	
	if (tmpRegUsed) {
	
		//reload table entry since we used it for tmp
		
		//ldrh r5, [r12, #-2]
		EMIT(LLloadImm, 5, 12, -2, EmitSzHalfword, false, EmitAdrModeIndex);
		
		//lsl tabentry(aka r5), #17
		EMIT(LLmov, 5, 5, EmitShiftLsl, 17, EmitFlagsDoNotCare, false);
	}
	
	//skip_load_sample:
	
	//fill in that jump above to skip loading the sample
	EMIT_TO(LLbranch, &savedSpaceForJump, emitGetPtrToJumpHere(dest), EmitCcCc);
	
	//lsrs tabentry(aka r5), #18		//now only has the multiplier (in bottom 14 bits), and C bit has whether we need to emit
	EMIT(LLmov, 5, 5, EmitShiftLsr, 18, EmitSetFlags, false);

	// SMLAL leftSumLo, leftSumHi, tabentry, curSampL
	EMIT(LLsmlal, 6, 8, 5, 3);
	
	if (chans == AudioStereo) {
		
		// SMLAL rightSumLo, rightSumHi, tabentry, curSampR
		EMIT(LLsmlal, 7, 9, 5, 4);
	}

	//save space for "bcc noemit"
	EMIT(SaveSpace, &savedSpaceForJump, 2);

	//collapse the samples to a single reg
	
	//assemble the results:
	//lsr[s] leftSumLo, #14
	EMIT(LLmov, 6, 6, EmitShiftLsr, 14, EmitFlagsDoNotCare, false);
	
	//orr leftSumLo, leftSumHi, lsl #18
	EMIT(LLorrReg, 6, 6, 8, EmitShiftLsl, 18, EmitLeaveFlags, false);
		
	if (chans == AudioStereo) {
		
		//lsr[s] rightSumLo, #14
		EMIT(LLmov, 7, 7, EmitShiftLsr, 14, EmitFlagsDoNotCare, false);
		
		//orr rightSumLo, rightSumHi, lsl #18
		EMIT(LLorrReg, 7, 7, 9, EmitShiftLsl, 18, EmitLeaveFlags, false);
	}
	
	//scale by volume and maybe merge
	now = audioPrvMixInFuncScaleChansByVolume(dest, 6, 7, 10, 11, 8, chans, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//emit samples (leftSum, rightSum, hi regs free as temps)
	now = audioPrvMixInFuncEmitSampleExport(dest, 6, 7, 8, 9, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//we need to zero all the result regs now
	EMIT(HLloadImmToReg, 6, 0, true, true, false);
	EMIT(LLmov, 7, 6, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	EMIT(LLmov, 8, 6, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	EMIT(LLmov, 9, 6, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//noemit:
	
	//fill in that jump above to skip producing a sample
	EMIT_TO(LLbranch, &savedSpaceForJump, emitGetPtrToJumpHere(dest), EmitCcCc);
	
	//cmp r2, r0		//see if we're done with the output
	EMIT(LLcmpReg, 2, 0, EmitShiftLsl, 0);
	//if not, maybe done with input?
	//cmp[ne] r1, lr
	EMIT(SetCond, EmitCcNe);
	EMIT(LLcmpReg, 1, EMIT_REG_NO_LR, EmitShiftLsl, 0);
	EMIT(SetCond, EmitCcAl);
	
	//if both not done, go loop around
	EMIT(LLbranch, mainLoop, EmitCcNe);

	//loop is over - save state

	// save "src" pointer
	//ldr r2, [sp]   //we need to store r1
	EMIT(LLloadImm, 2, EMIT_REG_NO_SP, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//str r1, [r2]
	EMIT(LLstoreImm, 1, 2, 0, EmitSzWord, EmitAdrModeIndex);
	
	// save resamp state
	//ldr r2, [sp, #4]
	EMIT(LLloadImm, 2, EMIT_REG_NO_SP, 4, EmitSzWord, false, EmitAdrModeIndex);
	
	//stmia r2, {r3, r4, r6-r9, r12}
	EMIT(LLstmia, 2, 0x13d8, false);

	#ifdef HAVE_ARM_v5

		//pop {r1, r3-r11, pc}
		EMIT(HLpop, 0x8ffa);
	
	#else
		
		//pop {r1, r3-r11, lr}
		EMIT(HLpop, 0x4ffa);
		
		//bx lr
		EMIT(LLbx, EMIT_REG_NO_LR);
		
	#endif

	return EmitErrNone;
}

static enum EmitStatus audioPrvMicSignedSat(struct EmitBuf *dest, uint_fast8_t regData, uint_fast8_t regTemp, uint_fast8_t toBits)
{
	struct EmitBuf beqOut1, beqOut2, branchToOut, bplSatPositive;
	
	//i am almost sure this will work
	//asrs   rTmp, rData, #toBits - 1
	//addmis rTmp, #1
	//movne  rData,  1UL << (toBits - 1) - 1
	//negmi  rData, rData
	
	
	//asrs rTmp, rData, #26
	EMIT(LLmov, regTemp, regData, EmitShiftAsr, toBits - 1, EmitSetFlags, false);
	
	//addmis rTmp, #1
	EMIT(SetCond, EmitCcMi);
	EMIT(LLaddImm, regTemp, regTemp, 1, EmitSetFlags, false);
	EMIT(SetCond, EmitCcAl);
	
	//movne rData,  1UL << (toBits - 1)		//encoded as an MVN for 26 bit saturation
	EMIT(SetCond, EmitCcNe);
	EMIT(HLloadImmToReg, regData, (1UL << (toBits - 1)) - 1, false, false, false);
	EMIT(SetCond, EmitCcAl);
	
	//negmi rData, rData
	EMIT(SetCond, EmitCcMi);
	EMIT(LLrsbImm, regData, regData, 0, EmitLeaveFlags, false);
	EMIT(SetCond, EmitCcAl);
	
	return EmitErrNone;
}

static uint32_t audioMicPrvToFloatLE(int32_t i)
{
	union {
		float f;
		uint32_t i;
	} u;
	
	u.f = i;
	u.f /= 33554432.f;
	
	return u.i;
}

static uint32_t audioMicPrvToFloatBE(int32_t i)
{
	union {
		float f;
		uint32_t i;
	} u;
	
	u.f = i;
	u.f /= 33554432.f;
	
	return ((u.i & 0x00ff) << 24) | ((u.i & 0xff00) >> 8) | ((u.i << 8) & 0xff00) | ((u.i & 0x00ff) << 24);
}



//prototype is void* MicCvtF(void* dst, const int16_t *src, uint32_t volumeL, uint32_t volumeR, uint32_t nSamplesOver2);
//source guaranteed four byte aligned always
enum EmitStatus audioPrvMicCreateConvertFunc(struct EmitBuf *dest, enum AudioSampleType sampTyp, enum AudioChannelConfig chans)
{
	bool stereo = chans != AudioMono, unsign = false, bswap = false;
	uint_fast8_t width, widthShift = 0;
	enum EmitStatus now;
	uintptr_t loopStart;
	
	if (!stereo) {
		
		//add r2, r3
		EMIT(LLaddReg, 2, 2, 3, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);

		//lsr r2, #1
		EMIT(LLmov, 2, 2, EmitShiftLsr, 1, EmitFlagsDoNotCare, false);
	}
	
	//push {r4-r7, lr}
	EMIT(HLpush, 0x40f0);
	
	switch (sampTyp) {
		case AudioSampleU8:
			unsign = true;
			//fallthrough
		case AudioSampleS8:
			width = 8;
			break;
		
		case AudioSampleU16BE:
			bswap = true;
			//fallthrough
		
		case AudioSampleU16LE:
			unsign = true;
			width = 16;
			widthShift = 1;
			break;
			
		case AudioSampleS16BE:
			bswap = true;
			//fallthrough
			
		case AudioSampleS16LE:
			width = 16;
			widthShift = 1;
			break;
		
		case AudioSampleU32BE:
			bswap = true;
			//fallthrough
		
		case AudioSampleU32LE:
			unsign = true;
			width = 32;
			widthShift = 2;
			break;
			
		case AudioSampleFloatBE:
		case AudioSampleS32BE:
			bswap = true;
			//fallthrough
			
		case AudioSampleFloatLE:
		case AudioSampleS32LE:
			width = 32;
			widthShift = 2;
			break;
		
		default:
			return EmitErrInvalidInput;
	}
	
	if (sampTyp == AudioSampleS32LE) {
		
		//ldr r4, =audioMicPrvToFloatLE
		EMIT(HLloadImmToReg, 4, (uintptr_t)&audioMicPrvToFloatLE, true, true, false);
	}
	else if (sampTyp == AudioSampleS32BE) {
		
		//ldr r4, = audioMicPrvToFloatBE 
		EMIT(HLloadImmToReg, 4, (uintptr_t)&audioMicPrvToFloatBE, true, true, false);
	}
	
	//ldr r12, [sp, #proper_ofst]					//get nSamplesOver2 into r12
	EMIT(LLloadImm, 12, EMIT_REG_NO_SP, 0x14, EmitSzWord, false, EmitAdrModeIndex);
	
	//add r12, r0, r12, lsl #(stereo + widthShift + 1)								//calc dst's end
	EMIT(LLaddReg, 12, 0, 12, EmitShiftLsl, 1 + widthShift + (stereo ? 1 : 0), EmitFlagsDoNotCare, false);

	//loopstart:
	loopStart = emitGetPtrToJumpHere(dest);
	
	//ldrsh r6, [r1], #2
	EMIT(LLloadImm, 6, 1, 2, EmitSzHalfword, true, EmitAdrModePostindex);

	if (stereo) {
		
		//mov r7, r6
		EMIT(LLmov, 7, 6, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
		//mul r7, volR
		EMIT(LLmulReg, 7, 7, 3, EmitFlagsDoNotCare, false);
	}
	
	//mul r6, volL
	EMIT(LLmulReg, 6, 6, 2, EmitFlagsDoNotCare, false);
	
	//now we need to saturate to 26 bits (use r5 for temp)
	if (stereo) {
		
		now = audioPrvMicSignedSat(dest, 7, 5, 26);
		if (now != EmitErrNone)
			return now;
	}
	now = audioPrvMicSignedSat(dest, 6, 5, 26);
	if (now != EmitErrNone)
		return now;

	//if we need unsignedness, do so now
	if (unsign) {
		if (stereo) {
		
			//add r7, 1UL << 25
			EMIT(LLaddImm, 7, 7, 1UL << 25, EmitFlagsDoNotCare, false);
		}
		
		//add r6, 1UL << 25
		EMIT(LLaddImm, 6, 6, 1UL << 25, EmitFlagsDoNotCare, false);
	}
	
	//convert to desired size, swap as needed, write
	if (width == 8) {
		
		if (stereo) {
		
			//lsr r7, #18
			EMIT(LLmov, 7, 7, EmitShiftLsr, 18, EmitFlagsDoNotCare, false);
			
			//strb r7, [dst, #1]
			EMIT(LLstoreImm, 7, 0, 1, EmitSzByte, EmitAdrModeIndex);
		}
		
		//lsr r6, #18
		EMIT(LLmov, 6, 6, EmitShiftLsr, 18, EmitFlagsDoNotCare, false);
		
		//strb r6, [dst], #proper_sz
		EMIT(LLstoreImm, 6, 0, sizeof(uint8_t) * (stereo ? 2 : 1), EmitSzByte, EmitAdrModePostindex);
	}
	else if (width == 16) {
		
		if (stereo) {
		
			//lsr r7, #10
			EMIT(LLmov, 7, 7, EmitShiftLsr, 10, EmitFlagsDoNotCare, false);
			
			if (bswap) {
				
				//rev16 r7, r7
				now = audioPrvEmitBswap16bot(dest, 7);
				if (now != EmitErrNone)
					return now;
			}
			
			//strh r7, [dst, #2]
			EMIT(LLstoreImm, 7, 0, 2, EmitSzHalfword, EmitAdrModeIndex);
		}
		
		//lsr r6, #10
		EMIT(LLmov, 6, 6, EmitShiftLsr, 10, EmitFlagsDoNotCare, false);
		
		if (bswap) {
				
			//rev16 r6, r6
			now = audioPrvEmitBswap16bot(dest, 6);
			if (now != EmitErrNone)
				return now;
		}
			
		//strh r6, [dst, #0], #proper_sz
		EMIT(LLstoreImm, 6, 0, sizeof(uint16_t) * (stereo ? 2 : 1), EmitSzHalfword, EmitAdrModePostindex);
	}
	else if (sampTyp != AudioSampleFloatLE && sampTyp != AudioSampleFloatBE) {
		
		if (stereo) {
		
			//lsl r7, #6
			EMIT(LLmov, 7, 7, EmitShiftLsl, 6, EmitFlagsDoNotCare, false);
			
			if (bswap) {
				
				//rev r7, r7
				now = audioPrvEmitRev(dest, 7, 4);
				if (now != EmitErrNone)
					return now;
			}
		}
		
		//lsl r6, #6
		EMIT(LLmov, 6, 6, EmitShiftLsl, 6, EmitFlagsDoNotCare, false);
		
		if (bswap) {
				
			//rev r6, r6
			now = audioPrvEmitRev(dest, 6, 4);
			if (now != EmitErrNone)
				return now;
		}
		
		if (stereo) {	//STRD is faster but requires alignment. stmia is ok
			
			//stmia r0!, {regs}
			EMIT(LLstmia, 0, (1 << 6) | (1 << 7), true);
		}
		else {
			
			//str r6, [r0], #4
			EMIT(LLstoreImm, 6, 0, 4, EmitSzWord, EmitAdrModePostindex);
		}
	}
	else {	//float output
		
		//push {r0-r3, r12}
		EMIT(HLpush, 0x010f);
		
		if (stereo) {
			
			//mov r0, r7
			EMIT(LLmov, 0, 7, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			
			#ifdef HAVE_ARM_v5
			
				//blx r4
				EMIT(LLblx, 4);
			
			#else
			
				//mov lr, pc
				EMIT(LLmov, EMIT_REG_NO_LR, EMIT_REG_NO_PC, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
				
				//bx r4
				EMIT(LLbx, 4);
			
			#endif
			
			//mov r7, r0
			EMIT(LLmov, 7, 0, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		}
		
		//mov r0, r6
		EMIT(LLmov, 0, 6, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
		#ifdef HAVE_ARM_v5
		
			//blx r4
			EMIT(LLblx, 4);
		
		#else
			
			//mov lr, pc
			EMIT(LLmov, EMIT_REG_NO_LR, EMIT_REG_NO_PC, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			
			//bx r4
			EMIT(LLbx, 4);
		
		#endif

		//mov r6, r0
		EMIT(LLmov, 6, 0, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);

		//pop {r0-r3, r12}
		EMIT(HLpop, 0x010f);
		
		if (stereo) {	//STRD is faster but requires alignment. stmia is ok
			
			//stmia r0!, {regs}
			EMIT(LLstmia, 0, (1 << 6) | (1 << 7), true);
		}
		else {
			
			//str r6, [r0], #4
			EMIT(LLstoreImm, 6, 0, 4, EmitSzWord, EmitAdrModePostindex);
		}
	}
	
	//cmp r0, r12
	EMIT(LLcmpReg, 0, 12, EmitShiftLsl, 0);

	//bne loopstart
	EMIT(LLbranch, loopStart, EmitCcNe);
	
	#ifdef HAVE_ARM_v5

		//pop {r4-r7, pc}
		EMIT(HLpop, 0x80f0);
	
	#else
		
		//pop {r4-r7, lr}
		EMIT(HLpop, 0x40f0);
		
		//bx lr
		EMIT(LLbx, EMIT_REG_NO_LR);
		
	#endif

	return EmitErrNone;
}


