#include "mapleWire.h"
#include "stm32f103xb.h"

#define MAPLE_OUT_RCC_AHB_BIT	0x04
#define MAPLE_OUT_PORT			GPIOA
#define MAPLE_OUT_PIN_1			2
#define MAPLE_OUT_PIN_5			3

#define MAPLE_IN_RCC_AHB_BIT	0x08
#define MAPLE_IN_PORT			GPIOB
#define MAPLE_IN_PIN_1			0
#define MAPLE_IN_PIN_5			14


static void mapleWireInitPinCfg(GPIO_TypeDef *port, uint32_t pinNo, uint32_t cfg)
{
	if (pinNo >= 8)
		port->CRH = (port->CRH &~ (0xF << ((pinNo - 8) * 4))) | (cfg << ((pinNo - 8) * 4));
	else
		port->CRL = (port->CRL &~ (0xF << ((pinNo - 0) * 4))) | (cfg << ((pinNo - 0) * 4));
	
	port->BSRR = 1 << pinNo;
}

void mapleWireInit(void)
{
	//clock up relevant gpio units
	RCC->APB2ENR |= MAPLE_OUT_RCC_AHB_BIT | MAPLE_IN_RCC_AHB_BIT;
	
	mapleWireInitPinCfg(MAPLE_OUT_PORT, MAPLE_OUT_PIN_1, 0x03);
	mapleWireInitPinCfg(MAPLE_OUT_PORT, MAPLE_OUT_PIN_5, 0x03);
	
	mapleWireInitPinCfg(MAPLE_IN_PORT, MAPLE_IN_PIN_1, 0x04);
	mapleWireInitPinCfg(MAPLE_IN_PORT, MAPLE_IN_PIN_5, 0x04);
}


static void __attribute__((noinline)) mapleTxDelay(void)		//just an unscientific delay of some sort
{
	volatile uint32_t t;
	
	t++;
	t--;
}

void mapleWireTxStart(void)
{
	unsigned i;
	
	MAPLE_OUT_PORT->BRR = 1 << MAPLE_OUT_PIN_1;
	mapleTxDelay();
	
	for (i = 0; i < 4; i++) {
		MAPLE_OUT_PORT->BRR = 1 << MAPLE_OUT_PIN_5;
		mapleTxDelay();
		MAPLE_OUT_PORT->BSRR = 1 << MAPLE_OUT_PIN_5;
		mapleTxDelay();
	}
	
	MAPLE_OUT_PORT->BSRR = 1 << MAPLE_OUT_PIN_1;
	mapleTxDelay();
}

void mapleWireTxStop(void)
{
	unsigned i;
	
	MAPLE_OUT_PORT->BSRR = 1 << MAPLE_OUT_PIN_5;
	mapleTxDelay();
	MAPLE_OUT_PORT->BRR = 1 << MAPLE_OUT_PIN_5;
	mapleTxDelay();
	
	for (i = 0; i < 2; i++) {
		MAPLE_OUT_PORT->BRR = 1 << MAPLE_OUT_PIN_1;
		mapleTxDelay();
		MAPLE_OUT_PORT->BSRR = 1 << MAPLE_OUT_PIN_1;
		mapleTxDelay();
	}
	
	MAPLE_OUT_PORT->BSRR = 1 << MAPLE_OUT_PIN_5;
	mapleTxDelay();
}

void mapleWireTxByte(uint32_t val)
{
	unsigned i;
	
	for (i = 0; i < 8; i += 2, val <<= 2) {	//MSB first
		
		//phase 1 : pin5 goes to data, pin1 does low
		if (val & 0x80)
			MAPLE_OUT_PORT->BSRR = 1 << MAPLE_OUT_PIN_5;
		else
			MAPLE_OUT_PORT->BRR = 1 << MAPLE_OUT_PIN_5;
		
		mapleTxDelay();
		MAPLE_OUT_PORT->BRR = 1 << MAPLE_OUT_PIN_1;
		mapleTxDelay();
		MAPLE_OUT_PORT->BSRR = 1 << MAPLE_OUT_PIN_5;
		mapleTxDelay();
		
		//phase 2 : pin1 goes to data, pin5 does low
		if (val & 0x40)
			MAPLE_OUT_PORT->BSRR = 1 << MAPLE_OUT_PIN_1;
		else
			MAPLE_OUT_PORT->BRR = 1 << MAPLE_OUT_PIN_1;
		
		mapleTxDelay();
		MAPLE_OUT_PORT->BRR = 1 << MAPLE_OUT_PIN_5;
		mapleTxDelay();
		MAPLE_OUT_PORT->BSRR = 1 << MAPLE_OUT_PIN_1;
		mapleTxDelay();
	}
}

//rx (80MHz+ CPU req'd)
uint32_t __attribute__((noinline))/* just in case */ mapleWireRx(uint8_t *dst, uint32_t len)
{
	uint8_t *dstOrig = dst;
	uint32_t mask1 = 1 << MAPLE_IN_PIN_1;
	uint32_t mask5 = 1 << MAPLE_IN_PIN_5;
	uint32_t bothMasks = mask1 | mask5;
	uint32_t inputPortVal = (uint32_t)&MAPLE_IN_PORT->IDR;
	uint32_t t1, t2, dummyWrite;
	
	#define STRINGIFY(_c)	#_c
	#define STRINGIFY2(_c)	STRINGIFY(_c)
	#define PIN1SHIFT		"#" STRINGIFY2(MAPLE_IN_PIN_1 + 1)
	#define PIN5SHIFT		"#" STRINGIFY2(MAPLE_IN_PIN_5 + 1)
	
	asm volatile(
		"0:								\n\t"	// init
		"	movs  %7, #4				\n\t"
		
		"1:								\n\t"	// wait for both highs
		"	ldr   %6, [%3]				\n\t"
		"	ands  %6, %5				\n\t"
		"	cmp   %6, %5				\n\t"
		"	bne   1b					\n\t"	// NOT {1: H, 5: H} -> wait more
		
		"2:								\n\t"	// wait for start: step 1: pin1 goes low
		"	ldr   %6, [%3]				\n\t"
		"	ands  %6, %5				\n\t"
		"	beq   1b					\n\t"	// {1: L, 5: L} -> go back to waiting for both high
		"	cmp   %6, %5				\n\t"
		"	beq   2b					\n\t"	// {1: H, 5: H} -> wait more
		"	tst   %6, %1				\n\t"
		"	bne   1b					\n\t"	// {1: H, 5: L} -> go back to beginning
		"								\n\t"	// {1: L, 5: H} -> go to next step
		
		"3:								\n\t"	// wait for start: step 2: pin 5 (goes low and then high) x4		[substep 1: pin5 -> goes low]
		"	ldr   %6, [%3]				\n\t"
		"	ands  %6, %5				\n\t"
		"	beq   4f					\n\t"	// {1: L, 5: L} -> go on
		"	tst   %6, %2				\n\t"	//
		"	bne   3b					\n\t"	// {1: L, 5: H} -> wait more
		"	b     0b					\n\t"   // {1: H, 5: ?} -> start over
		
		"4:								\n\t"	// wait for start: step 2: pin 5 (goes low and then high) x4		[substep 2: pin5 -> goes high]
		"	ldr   %6, [%3]				\n\t"
		"	ands  %6, %5				\n\t"
		"	beq   4b					\n\t"	// {1: L, 5: L} -> wait more
		"	tst   %6, %1				\n\t"	//
		"	bne   0b					\n\t"	// {1: H, 5: ?} -> start over
		"								\n\t"	// {1: L, 5: H} -> here
		
		"	subs  %7, #1				\n\t"	// see if we've seen the 4 cycles of pin 5 we expected
		"	bne   3b					\n\t"	// if not, go wait for the remaining ones
		
		"5:								\n\t"	// wait for start: step 3: pin1 goes up (causing both pins to be up)
		"	ldr   %6, [%3]				\n\t"
		"	tst   %6, %2				\n\t"
		"	beq   0b					\n\t"	// {1: ?, 5: L} -> bad - restart
		"	tst   %6, %1				\n\t"
		"	beq   5b					\n\t"	// {1: L, 5: H} -> wait some more
		"								\n\t"	// {1: H, 5: H} -> here
		
	#define RX_PHASE_1_normal																												\
		"7:								\n\t"	/* wait for phase 1 (1 goes low after being high) [part 1: wait for it to be high] */		\
		"	ldr   %6, [%3]				\n\t"																								\
		"	tst   %6, %1				\n\t"																								\
		"	beq   7b					\n\t"	/* {1: L, 5: ?} -> wait more */																\
		"8:								\n\t"	/* wait for phase 1 (1 goes low after being high) [part 2: wait for it to go low] */		\
		"	ldr   %6, [%3]				\n\t"																								\
		"	tst   %6, %1				\n\t"																								\
		"	bne   8b					\n\t"	/* {1: H, 5: ?} -> wait more */																\
		"	lsrs  %6, " PIN5SHIFT "		\n\t"	/* shift out bit 5 */																		\
		"	adcs  %7, %7				\n\t"	/* t2 <<= 1 + pin5_state */
	
	#define RX_PHASE_2_normal																												\
		"7:								\n\t"	/* wait for phase 2 (5 goes low after being high) [part 1: wait for it to be high] */		\
		"	ldr   %6, [%3]				\n\t"																								\
		"	tst   %6, %2				\n\t"																								\
		"	beq   7b					\n\t"	/* {1: L, 5: ?} -> wait more */																\
		"8:								\n\t"	/* wait for phase 2 (5 goes low after being high) [part 2: wait for it to go low] */		\
		"	ldr   %6, [%3]				\n\t"																								\
		"	tst   %6, %2				\n\t"																								\
		"	bne   8b					\n\t"	/* {1: H, 5: ?} -> wait more */																\
		"	lsrs  %6, " PIN1SHIFT "		\n\t"	/* shift out bit 1 */																		\
		"	adcs  %7, %7				\n\t"	/* t2 <<= 1 + pin1_state */



		//we are now in byte loop. we avoid caring for phase errors for speed and simplicity. we'll just use our other knowledge to skip errored packets
		//why? speed
		
		//RX a byte

	/*	state diagram on RXing phase 1 bit or a potential stop. We enter state A (once 1 has gone high). states D & E are terminal
	
	STATE	LABEL	PINSTATE									NOTES
	A		8		1H5?										only applicable after start condition
	B		9		1H5L										non-first-byte RX enters here as we know we're in 1H5L state
	C		10		1H5H
	D		11		1L5? (bit done)
	E		12		1H5L (likely the makings of a stop)

	STATE	INPUT		NEW STATE
	A		1L			D
	A		5L			B
	B		1L			D
	B		5H			C
	C		1L			D
	C		5L			E
	*/
	
		"7:								\n\t"	// wait for phase 1 (1 goes low after being high) [part 1: wait for it to be high]
		"	ldr   %6, [%3]				\n\t"
		"	tst   %6, %1				\n\t"
		"	beq   7b					\n\t"	// {1: L, 5: ?} -> wait more
		
		"8:								\n\t"	// state A (see state diagram)
		"	ldr   %6, [%3]				\n\t"
		"	tst   %6, %1				\n\t"
		"	beq   11f					\n\t"	// A ---1L---> D
		"	tst   %6, %2				\n\t"
		"	bne   8b					\n\t"	// A --1H5H--> A
		"								\n\t"	// A ---5L---> B
		
		"9:								\n\t"	// state B (see state diagram)
		"	ldr   %6, [%3]				\n\t"
		"	tst   %6, %1				\n\t"
		"	beq   11f					\n\t"	// B ---1L---> D
		"	tst   %6, %2				\n\t"
		"	beq   9b					\n\t"	// B --1H5L--> B
		"								\n\t"	// B ---5H---> C
		
		"10:							\n\t"	// state C (see state diagram)
		"	ldr   %6, [%3]				\n\t"
		"	tst   %6, %1				\n\t"
		"	beq   11f					\n\t"	// C ---1L---> D
		"	tst   %6, %2				\n\t"
		"	bne   10b					\n\t"	// C --1H5H--> C
		"	b     12f					\n\t"	// C ---5L---> E
		
		"11:							\n\t"	// state D (see state diagram)
		"	lsrs  %6, " PIN5SHIFT "		\n\t"	// shift out bit 5
		"	adcs  %7, %7				\n\t"	// t2 <<= 1 + pin5_state
		

		RX_PHASE_2_normal
		RX_PHASE_1_normal
		RX_PHASE_2_normal
		RX_PHASE_1_normal
		RX_PHASE_2_normal
		RX_PHASE_1_normal
		RX_PHASE_2_normal
		
		"	strb %7, [%0], #1			\n\t"
		"	cmp  %0, %4					\n\t"
		"	bne  9b						\n\t"
		
		"12:							\n\t"	// state E (see state diagram) - likely a stop. we really do not bother even checking further and just assume it is
		
		
		:"=r"(dst), "=r"(mask1), "=r"(mask5), "=r"(inputPortVal), "=h"(dummyWrite), "=r"(bothMasks), "=r"(t1), "=r"(t2)
		: "0"(dst),  "1"(mask1),  "2"(mask5),  "3"(inputPortVal),  "4" (dst + len),  "5"(bothMasks),  "6"(t1),  "7"(t2)
		:"cc", "memory"
	);
	
	return dst - dstOrig;

	
	#undef STRINGIFY
	#undef STRINGIFY2
	#undef PIN1SHIFT
	#undef PIN5SHIFT
	#undef RX_PHASE_2_normal
	#undef RX_PHASE_1_normal
}
