#define _ISLOCAL_	static

#define WEAK __attribute__ ((weak))
#define ALIAS(f) __attribute__ ((weak, alias (#f)))

#define NON_PORTABLE
#include <HwrMiscFlags.h>
#undef NON_PORTABLE

#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "palmcardProto.h"
#include "palmcardComms.h"
#include "machSpecific.h"
#include "ral_export.h"
#include "palmcard.h"
#include "printf.h"
#include "pinout.h"
#include "timers.h"
#include "memmap.h"
#include "entry.h"
#include "i2cIO.h"
#include "pace.h"
#include "heap.h"
#include "irqs.h"
#include "boot.h"
#include "mpu.h"
#include "ral.h"
#include "cpu.h"




//these are in the HardFault handler and control the writeable range of ROMRAM
extern uint32_t mRomRamStart, mRomRamLen;
static uint8_t mSnum[12];

void __attribute__((used)) IntDefaultHandler(void)
{
	cpuIrqDefaultHandler();
}

WEAK void TIMER0_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void TIMER1_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void TIMER2_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void TIMER3_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void PWM_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void USB_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void XIP_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void PIO0_0_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void PIO0_1_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void PIO1_0_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void PIO1_1_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void DMA0_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void DMA1_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void IO_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void QSPI_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void SIO0_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void SIO1_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void CLOCKS_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void SPI0_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void SPI1_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void UART0_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void UART1_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void ADC_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void I2C0_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void I2C1_IRQHandler(void) ALIAS(IntDefaultHandler);
WEAK void RTC_IRQHandler(void) ALIAS(IntDefaultHandler);


__attribute__ ((aligned (256))) void (* const __ISR_VECTORS[])  (void) =
{
	0,							// unused: initial sp
	0,							// unused: reset handler

	NMI_Handler,				// The NMI handler
	HardFault_Handler,			// The hard fault handler

#ifdef BUILD_FOR_THUMB_1
	0,
	0,
	0,
#else
	MemManage_Handler,
	BusFault_Handler,
	UsageFault_Handler,
#endif
	0,
	0,
	0,
	0,
	
	SVC_Handler,				// SVCall handler
#ifdef BUILD_FOR_THUMB_1
	0,
#else
	DebugMonitor_Handler,		// Reserved
#endif
	0,
	PendSV_Handler,				// The PendSV handler
	SysTick_Handler,			// The SysTick handler
	
	// Chip Level interrupts here
	TIMER0_IRQHandler,
	TIMER1_IRQHandler,
	TIMER2_IRQHandler,
	TIMER3_IRQHandler,
	PWM_IRQHandler,
	USB_IRQHandler,
	XIP_IRQHandler,
	PIO0_0_IRQHandler,
	PIO0_1_IRQHandler,
	PIO1_0_IRQHandler,
	PIO1_1_IRQHandler,
	DMA0_IRQHandler,
	DMA1_IRQHandler,
	IO_IRQHandler,
	QSPI_IRQHandler,
	SIO0_IRQHandler,
	SIO1_IRQHandler,
	CLOCKS_IRQHandler,
	SPI0_IRQHandler,
	SPI1_IRQHandler,
	UART0_IRQHandler,
	UART1_IRQHandler,
	ADC_IRQHandler,
	I2C0_IRQHandler,
	I2C1_IRQHandler,
	RTC_IRQHandler,
};

void machIdle(void)
{
	asm volatile("wfi");
}

static void mpuRegCfg(uint32_t idx, uint32_t addr, uint32_t cfg)
{
	#ifdef __MPU_PRESENT
		if (!(MPU->TYPE & 1) && (MPU->TYPE & MPU_TYPE_DREGION_Msk)) {	//we do what we can
			
			MPU->RBAR = addr | 0x10 | idx;
			MPU->RASR = cfg;
		}
	#endif
}

//configures mmu for storage ram
void machSetStorageAreaWriteable(bool writeable)
{
	if (writeable)
		mRomRamStart = CPU_STORAGE_RAM_BASE;
	else
		mRomRamStart = CPU_STORAGE_RAM_BASE + CPU_STORAGE_RAM_SIZE;
	mRomRamLen = ROMRAM_BASE + ROMRAM_SIZE - mRomRamStart;
}

static void gpiosConfig(void)
{
	static struct {
		uint8_t pin;
		uint8_t padCfg;
		uint8_t funcSel		: 5;
		uint8_t syncBypass	: 1;
	} const cfgs[] = {
		
		//DQ pins are in/out under PIO0 control
		{PIN_DQ0,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PDE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		{PIN_DQ1,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PDE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		{PIN_DQ2,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PDE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		{PIN_DQ3,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PDE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		{PIN_DQ4,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PDE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		{PIN_DQ5,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PDE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		{PIN_DQ6,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PDE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		{PIN_DQ7,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PDE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		{PIN_DQ8,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PDE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		{PIN_DQ9,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PDE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		{PIN_DQ10,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PDE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		{PIN_DQ11,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PDE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		{PIN_DQ12,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PUE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		{PIN_DQ13,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PUE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		{PIN_DQ14,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PUE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		{PIN_DQ15,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PDE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO0_0, 1},
		
		//chip selects and other contorl signals are input-only, also for PIOs
		{PIN_CSA0,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_OD_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_SIO_0, 1},
		{PIN_CSA2,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_OD_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_SIO_0, 1},
		{PIN_A1,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_OD_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_SIO_0, 1},
		{PIN_nOE,	PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_OD_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_SIO_0, 1},
		
		//IRQ is output-only, CPU controlled
		{PIN_IRQ3,	0, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_SIO_0, 0},
		
		//memory control pin is output-only, CPU controlled, likely never touched
		{PIN_RAMnFLASH, 0, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_SIO_0, 0},
		
		//IrDS
		{PIN_IRDA_IN, PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_OD_BITS | PADS_BANK0_GPIO0_PUE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO1_0, 0},
		{PIN_IRDA_OUT, 0, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_PIO1_0, 0},
		
		//SD
		{PIN_SD_DAT0, PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PUE_BITS | PADS_BANK0_GPIO0_DRIVE_VALUE_12MA, 7, 1},
		{PIN_SD_CMD, PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_PUE_BITS | PADS_BANK0_GPIO0_DRIVE_VALUE_12MA, 7, 1},
		{PIN_SD_CLK, PADS_BANK0_GPIO0_DRIVE_VALUE_12MA, 7, 1},
		{PIN_SD_DET, PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_OD_BITS | PADS_BANK0_GPIO0_PDE_BITS, IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_SIO_0, 0},
		
		//I2C
		{PIN_SDA, PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_DRIVE_VALUE_12MA, IO_BANK0_GPIO22_CTRL_FUNCSEL_VALUE_I2C1_SDA, 0},
		{PIN_SCL, PADS_BANK0_GPIO0_IE_BITS | PADS_BANK0_GPIO0_DRIVE_VALUE_12MA, IO_BANK0_GPIO23_CTRL_FUNCSEL_VALUE_I2C1_SCL, 0},
	};
	uint32_t specifiedPins = 0;
	uint_fast8_t i;
	
	//configure all specified pins
	for (i = 0; i < sizeof(cfgs) / sizeof(*cfgs); i++) {
		
		uint_fast8_t pinNo = cfgs[i].pin;
		
		padsbank0_hw->io[pinNo] = cfgs[i].padCfg;
		iobank0_hw->io[pinNo].ctrl = (iobank0_hw->io[pinNo].ctrl &~ IO_BANK0_GPIO0_CTRL_FUNCSEL_BITS) | (cfgs[i].funcSel << IO_BANK0_GPIO0_CTRL_FUNCSEL_LSB);
		
		if (cfgs[i].syncBypass)
			syscfg_hw->proc_in_sync_bypass |= 1 << pinNo;
		
		specifiedPins |= 1UL << pinNo;
	}
	
	//configure all unspecified pins
	for (i = 0; i < 30; i++, specifiedPins >>= 1) {
		
		if (specifiedPins & 1)
			continue;
		
		padsbank0_hw->io[i] = PADS_BANK0_GPIO0_OD_BITS | PADS_BANK0_GPIO0_PDE_BITS;
		iobank0_hw->io[i].ctrl |= IO_BANK0_GPIO0_CTRL_FUNCSEL_VALUE_NULL;
	}
	
	sio_hw->gpio_out = (1 << PIN_RAMnFLASH) | (1 << PIN_IRQ3);
	sio_hw->gpio_oe = (1 << PIN_RAMnFLASH) | (1 << PIN_IRQ3) | (1 << PIN_IRDA_OUT);
}

static void prvGenerateSnum(void)
{
	static const char snumDigits[32] = "123456789ACDEFGHJKLMNPQRSTUVWXYZ";
	uint8_t flashData[5 + 16] = {0x4b};
	uint64_t hash = 0x2718281828459045;
	uint_fast8_t i;
	
	palmcardFlashSpiAccess(flashData, flashData, sizeof(flashData));
	
	for (i = 0; i < 16; i++) {
		
		if (hash >> 63)
			hash = (hash << 1) ^ 0x3141592653589793;
		else
			hash <<= 1;
		
		hash ^= flashData[5 + i];
	}
		
	for (i = 0; i < sizeof(mSnum); i++, hash /= sizeof(snumDigits))
		flashData[i] = mSnum[i] = snumDigits[hash % sizeof(snumDigits)];

	flashData[i] = 0;
	logi("GENERATED SNUM: '%s'\n", flashData);
}

static void prvFlashSleep(void)
{
	//XXXX this does not work since soft reset will try to read the flash without sending the wake command :(
	//we can do the weird rp2040 "set code up in sram to call on soft rset" thing later maybe
	
	/*
	uint8_t sleep = 0xb9;
	
	palmcardFlashSpiAccess(&sleep, &sleep, sizeof(sleep));
	*/
}

#ifdef USE_XRAM_FOR_DYNHEAP
	
	typedef void* (*fMemChunkNew)(uint32_t heap, uint32_t size, uint32_t flags);
	typedef void* (*fMemPtrNew)(uint32_t size);
	typedef void (*fMemChunkFree)(void* ptr);
	typedef void* (*fMemPtrResize)(void *ptr, uint32_t newSz);
	
	static fMemChunkNew mOpMemChunkNew;
	static fMemPtrNew mOpMemPtrNew;
	static fMemChunkFree mOpMemChunkFree;
	static fMemPtrResize mOpMemPtrResize;
	
	static void* pMemPtrResize(void *ptr, uint32_t newSz)
	{
		if (kheapIsInOurHeaps(ptr)) {		//stack chunks would not be resized
			
			uint32_t smallerSz, oldSz = kheapGetChunkSize(ptr);
			void *newPtr = mOpMemPtrNew(newSz);		//risks issues if old chunk was not owned by current process...tough
			
			if (newPtr) {
				
				smallerSz = newSz < oldSz ? newSz : oldSz;
				memcpy(newPtr, ptr, smallerSz);
				kheapFree(ptr);
				return newPtr;
			}
			
			return NULL;
		}
		
		return mOpMemPtrResize(ptr, newSz);
	}
	
	static void* pMemChunkNew(uint32_t heap, uint32_t size, uint32_t flags)
	{
		if (!heap && (flags & 0x200) && size >= 256 && size <= 0x100000 && !(size & (size - 1))) {
			
			logi("alloc of %u bytes may be a stack\n", size);
			
			return kheapAllocEx(size, MEM_USABLE_AS_STACK | MEM_NO_OS_HEAP);
		}
	
		return mOpMemChunkNew(heap, size, flags);
	}
	
	static void* pMemPtrNew(uint32_t size)
	{
		if (size >= 8192 && size <= 0x100000 && !(size & (size - 1))) {
			
			logi("alloc of %u bytes may be a stack\n", size);
			
			return kheapAllocEx(size, MEM_USABLE_AS_STACK | MEM_NO_OS_HEAP);
		}
	
		return mOpMemPtrNew(size);
	}
	
	static void pMemChunkFree(void* ptr)
	{
		if (((uintptr_t)ptr) >= ROMRAM_BASE && (((uintptr_t)ptr) - ROMRAM_BASE) < ROMRAM_SIZE) {
		
			mOpMemChunkFree(ptr);
			return;
		}
		else if (ptr) {
			
			logi("free of 0x%08x\n", ptr);
			kheapFree(ptr);
		}
	}
#endif


void __attribute__((used)) machInit(uint32_t stage, const void* data)
{
	if (stage == STAGE_INIT_EARLY) {	//no globals/vectors/anything yet!!!
		
		uint32_t unitsToReset, unitsToUnreset;
		
		//start ROSC
		rosc_hw->ctrl = (rosc_hw->ctrl &~ (ROSC_CTRL_ENABLE_BITS | ROSC_CTRL_FREQ_RANGE_BITS)) | (ROSC_CTRL_ENABLE_VALUE_ENABLE << ROSC_CTRL_ENABLE_LSB) | (ROSC_CTRL_FREQ_RANGE_VALUE_MEDIUM << ROSC_CTRL_FREQ_RANGE_LSB);
		rosc_hw->div = (rosc_hw->div &~ ROSC_DIV_BITS) | ((ROSC_DIV_VALUE_PASS + 1) << ROSC_DIV_LSB);
		while ((rosc_hw->status & (ROSC_STATUS_STABLE_BITS | ROSC_STATUS_ENABLED_BITS)) != (ROSC_STATUS_STABLE_BITS | ROSC_STATUS_ENABLED_BITS));
		
		//tell refclock to use ROSC
		clocks_hw->clk[clk_ref].ctrl = (clocks_hw->clk[clk_ref].ctrl &~ CLOCKS_CLK_REF_CTRL_SRC_BITS) | (CLOCKS_CLK_REF_CTRL_SRC_VALUE_ROSC_CLKSRC_PH << CLOCKS_CLK_REF_CTRL_SRC_LSB);
		
		//use ref clock for cpu, use sys clock for periphs
		clocks_hw->clk[clk_peri].ctrl &=~ CLOCKS_CLK_PERI_CTRL_ENABLE_BITS;
		clocks_hw->clk[clk_sys].ctrl = (clocks_hw->clk[clk_sys].ctrl &~ CLOCKS_CLK_SYS_CTRL_SRC_BITS)| (CLOCKS_CLK_SYS_CTRL_SRC_VALUE_CLK_REF << CLOCKS_CLK_SYS_CTRL_SRC_LSB);
		clocks_hw->clk[clk_peri].ctrl = (clocks_hw->clk[clk_peri].ctrl &~ (CLOCKS_CLK_PERI_CTRL_KILL_BITS | CLOCKS_CLK_PERI_CTRL_AUXSRC_BITS)) | CLOCKS_CLK_PERI_CTRL_ENABLE_BITS | (CLOCKS_CLK_PERI_CTRL_AUXSRC_VALUE_CLK_SYS << CLOCKS_CLK_PERI_CTRL_AUXSRC_LSB);
		
		//start XOSC (by stopping it first...)
		xosc_hw->ctrl = (xosc_hw->ctrl &~ XOSC_CTRL_ENABLE_BITS) | (XOSC_CTRL_ENABLE_VALUE_DISABLE << XOSC_CTRL_ENABLE_LSB);
		while (xosc_hw->status & XOSC_STATUS_ENABLED_BITS);
		xosc_hw->startup = (xosc_hw->startup &~ XOSC_STARTUP_DELAY_BITS) | (8191 << XOSC_STARTUP_DELAY_LSB);
		xosc_hw->ctrl = (xosc_hw->ctrl &~ (XOSC_CTRL_FREQ_RANGE_BITS | XOSC_CTRL_ENABLE_BITS)) | (XOSC_CTRL_ENABLE_VALUE_ENABLE << XOSC_CTRL_ENABLE_LSB) | (XOSC_CTRL_FREQ_RANGE_VALUE_1_15MHZ << XOSC_CTRL_FREQ_RANGE_LSB);
		while ((xosc_hw->status & (XOSC_STATUS_STABLE_BITS | XOSC_STATUS_ENABLED_BITS)) != (XOSC_STATUS_STABLE_BITS | XOSC_STATUS_ENABLED_BITS));
		
		//tell refclock to use XOSC
		clocks_hw->clk[clk_ref].ctrl = (clocks_hw->clk[clk_ref].ctrl &~ CLOCKS_CLK_REF_CTRL_SRC_BITS) | (CLOCKS_CLK_REF_CTRL_SRC_VALUE_XOSC_CLKSRC << CLOCKS_CLK_REF_CTRL_SRC_LSB);
		
		//reset units
		unitsToReset = RESETS_RESET_PWM_BITS | RESETS_RESET_RTC_BITS | RESETS_RESET_PIO0_BITS | RESETS_RESET_PIO1_BITS | RESETS_RESET_PIO1_BITS | RESETS_RESET_DMA_BITS | RESETS_RESET_PLL_SYS_BITS | RESETS_RESET_TIMER_BITS | RESETS_RESET_I2C1_BITS;
		unitsToUnreset = unitsToReset | RESETS_RESET_PADS_BANK0_BITS | RESETS_RESET_IO_BANK0_BITS;
		resets_hw->reset |= unitsToReset;		//this is correct...
		resets_hw->reset |= unitsToReset;
		resets_hw->reset &=~ unitsToUnreset;
		resets_hw->reset &=~ unitsToUnreset;
		resets_hw->reset &=~ unitsToUnreset;
		while ((resets_hw->reset_done & unitsToReset) != unitsToReset);
			
		//many MHz please
		logt("pll setup\n");
		pll_sys_hw->pwr |= (PLL_PWR_VCOPD_BITS | PLL_PWR_POSTDIVPD_BITS | PLL_PWR_PD_BITS);		//dividers on
		pll_sys_hw->fbdiv_int = (pll_sys_hw->fbdiv_int &~ PLL_FBDIV_INT_BITS) | ((CPU_CLOCK_RATE / 1000000 / 2) << PLL_FBDIV_INT_LSB);
		pll_sys_hw->prim = (pll_sys_hw->prim &~ (PLL_PRIM_POSTDIV1_BITS | PLL_PRIM_POSTDIV2_BITS)) | (6 << PLL_PRIM_POSTDIV1_LSB) | (1 << PLL_PRIM_POSTDIV2_LSB);
		pll_sys_hw->pwr &=~ (PLL_PWR_VCOPD_BITS | PLL_PWR_POSTDIVPD_BITS | PLL_PWR_PD_BITS);		//dividers on
		while (!(pll_sys_hw->cs & PLL_CS_LOCK_BITS));
		pll_sys_hw->cs &=~ PLL_CS_BYPASS_BITS;
		logt("pll is up\n");
	
		//switch sys.AUX to pll
		clocks_hw->clk[clk_sys].ctrl = (clocks_hw->clk[clk_sys].ctrl &~ CLOCKS_CLK_SYS_CTRL_AUXSRC_BITS) | (CLOCKS_CLK_SYS_CTRL_AUXSRC_VALUE_CLKSRC_PLL_SYS << CLOCKS_CLK_SYS_CTRL_AUXSRC_LSB);
		
		//switch sys to AUX and wait for it
		clocks_hw->clk[clk_sys].ctrl = (clocks_hw->clk[clk_sys].ctrl &~ CLOCKS_CLK_SYS_CTRL_SRC_BITS) | (CLOCKS_CLK_SYS_CTRL_SRC_VALUE_CLKSRC_CLK_SYS_AUX << CLOCKS_CLK_SYS_CTRL_SRC_LSB);
		while (((clocks_hw->clk[clk_sys].selected & CLOCKS_CLK_REF_SELECTED_BITS) >> CLOCKS_CLK_REF_SELECTED_LSB) != (1 << CLOCKS_CLK_SYS_CTRL_SRC_VALUE_CLKSRC_CLK_SYS_AUX));
		
		//disable ROSC
		rosc_hw->ctrl = (rosc_hw->ctrl &~ ROSC_CTRL_ENABLE_BITS) | (ROSC_CTRL_ENABLE_VALUE_DISABLE << ROSC_CTRL_ENABLE_LSB);
		while (rosc_hw->status & ROSC_STATUS_STABLE_BITS);
		
		//DMA units get fucked up if they do not get bus cycles, to make them high priority
		bus_ctrl_hw->priority = (bus_ctrl_hw->priority &~ (BUSCTRL_BUS_PRIORITY_PROC1_BITS | BUSCTRL_BUS_PRIORITY_PROC0_BITS)) | BUSCTRL_BUS_PRIORITY_DMA_R_BITS | BUSCTRL_BUS_PRIORITY_DMA_W_BITS;
		
		gpiosConfig();
		
		xip_ctrl_hw->ctrl = (xip_ctrl_hw->ctrl &~ XIP_CTRL_POWER_DOWN_BITS) | XIP_CTRL_ERR_BADWRITE_BITS | XIP_CTRL_EN_BITS;
	}
	else if (stage == STAGE_SOMEWHAT_EARLY_INIT) {
		
		rp2040rtInit();
	}
	else if (stage == STAGE_INIT_SET_VTOR) {
		
		SCB->VTOR = (uint32_t)__ISR_VECTORS;
	}
	else if (stage == STAGE_SETUP_HEAPS) {
		
		kheapRegisterHeap(HAL_STATIC_MEM_BASE, HAL_STATIC_MEM_SIZE, MEM_USABLE_AS_STACK | MEM_USABLE_FOR_DMA | MEM_USABLE_FOR_EXEC | MEM_FAST);
		kheapRegisterHeap(HAL_STATIC_MEM2_BASE, HAL_STATIC_MEM2_SIZE, MEM_USABLE_AS_STACK | MEM_USABLE_FOR_DMA | MEM_USABLE_FOR_EXEC | MEM_FAST);
	}
	else if (stage == STAGE_INIT_INTERRUPTS) {
		
		uint_fast16_t i;
		
		const struct MachInitDataInterrupts *info = (const struct MachInitDataInterrupts*)data;
		uint32_t mediumPrio = (info->lowestAllowablePrio + info->highestAllowablePrio) / 2;
		
		//set all HW ints to medium prio
		for (i = (unsigned)TIMER0_IRQn; i <= (unsigned)TIMER0_IRQn + CPU_NUM_IRQS; i++) {
			
			NVIC_SetPriority(i, mediumPrio);
		}
	
		//scheduler timer interrupt is high prio too so nobody else can interrupt it (and more importantly - it will not interrupt syscalls and vise-versa)
		NVIC_SetPriority(TIMER0_IRQn, info->schedulingTimerPrio);
		
		i2cIoInit();
		prvFlashSleep();
		prvGenerateSnum();
		
	}
	else if (stage == STAGE_INIT_MPU) {
		
		#ifdef __MPU_PRESENT
		
			uint32_t i;
		
			//ROMRAM region
			mpuRegCfg(0, ROMRAM_BASE, MPU_PERM_U_RO_S_RO | MPU_MEM_TYPE_RAM | MPU_FLAG_ENABLED | ROMRAM_SZ_MPU);
			
			//dynamic ram and other ram
			mpuRegCfg(1, 0x20000000, MPU_PERM_U_RW_S_RW | MPU_MEM_TYPE_RAM | MPU_FLAG_ENABLED | MPU_REGION_SZ_512KB);
			
			//IOPORT & SCB (0xD0000000 + 0x20000000, device, nx)
			mpuRegCfg(2, 0x80000000, MPU_PERM_U_XX_S_RW | MPU_MEM_TYPE_DEVICE | MPU_PERM_NX | MPU_FLAG_ENABLED | MPU_REGION_SZ_2GB | MPU_SRD_0th | MPU_SRD_1st | MPU_SRD_2nd | MPU_SRD_3rd | MPU_SRD_4th | MPU_SRD_7th);
			
			//Periphs (0x40000000 + 0x20000000, device)
			mpuRegCfg(3, 0x40000000, MPU_PERM_U_RW_S_RW | MPU_MEM_TYPE_DEVICE | MPU_PERM_NX | MPU_FLAG_ENABLED | MPU_REGION_SZ_512MB);
			
			//USB ram we use as ram
			mpuRegCfg(4, 0x50100000, MPU_PERM_U_RW_S_RW | MPU_MEM_TYPE_RAM | MPU_FLAG_ENABLED | MPU_REGION_SZ_4KB);
			
			//rp2040 ROM (for using rom funcs)
			mpuRegCfg(5, 0x00000000, MPU_PERM_U_RO_S_RO | MPU_MEM_TYPE_ROM | MPU_FLAG_ENABLED | MPU_REGION_SZ_16KB);
			
			//allow access to XIPCTRL and SSI since spi flash code needs them. They are at 0x14000000 and 0x18000000
			mpuRegCfg(6, 0x18000000, MPU_PERM_U_RW_S_RW | MPU_MEM_TYPE_DEVICE | MPU_PERM_NX | MPU_FLAG_ENABLED | MPU_REGION_SZ_256MB | MPU_SRD_0th | MPU_SRD_1st | MPU_SRD_3rd | MPU_SRD_5th | MPU_SRD_6th | MPU_SRD_7th);
			
			//disable the other region(s)
			for (i = 7; i < (MPU->TYPE & MPU_TYPE_DREGION_Msk) >> MPU_TYPE_DREGION_Pos; i++)
				mpuRegCfg(i, 0, 0);
			
			machSetStorageAreaWriteable(true);
			
			//mpu on
			MPU->CTRL = MPU_CTRL_ENABLE_Msk;	//HFNMIENA must be zero for ROMRAM. this would break the debug version of m0FaultDispatch. Luckily that is not an option with ROMRAM anyways.
			
			logi("mpu on now\n");
			
			
			machSetStorageAreaWriteable(false);
			
			logi("device has a 0x%08x-byte dynamic heap\n", CPU_DYN_RAM_SIZE);

			//xxx: our romram code expects ram to be linear (no page wrapping. this makes vilsion a YES and APMEMORY a no)
			

		#else
		
			#error "MPU needed for ROMRAM"
		
		#endif
	}
	else if (stage == STAGE_INIT_IN_THREAD) {
		
		uint8_t firstFreeDmaCh = 0, firstFreeSmInPio1 = 0, firstFreeSmInstrInPio1 = 0;
		uint8_t nPioInstrs = (pio1_hw->dbg_cfginfo & PIO_DBG_CFGINFO_IMEM_SIZE_BITS) >> PIO_DBG_CFGINFO_IMEM_SIZE_LSB;
		uint8_t nPioSms = (pio1_hw->dbg_cfginfo & PIO_DBG_CFGINFO_SM_COUNT_BITS) >> PIO_DBG_CFGINFO_SM_COUNT_LSB;
		uint8_t nDmaCh = ((dma_hw->n_channels & DMA_N_CHANNELS_BITS) >> DMA_N_CHANNELS_LSB);
		
		logi("initing comms\n");
		i2cIoPalmReset();	//teehee
		palmcardCommsInit(&firstFreeSmInPio1, &firstFreeSmInstrInPio1, &firstFreeDmaCh, nDmaCh, nPioSms, nPioInstrs);
		
		//init other hardwares
		if (!palmcardSdioSetup(&firstFreeSmInPio1, &firstFreeSmInstrInPio1, &firstFreeDmaCh, nDmaCh, nPioSms, nPioInstrs))
			fatal("Failed to init hw: %s\n", "SDIO");
		
		if (!palmcardIrSetup(&firstFreeSmInPio1, &firstFreeSmInstrInPio1, &firstFreeDmaCh, nDmaCh, nPioSms, nPioInstrs))
			fatal("Failed to init hw: %s\n", "IrDA");
		
		logi("After misc HW init, free DMA chs: %u, free PIO instrs: %u, free PIO SMs: %u\n", nDmaCh - firstFreeDmaCh, nPioInstrs - firstFreeSmInstrInPio1, nPioSms - firstFreeSmInPio1);
		
		if (!ralSetRePalmTabFunc(REPALM_FUNC_SPI_FLASH_OP, &palmcardFlashSpiAccess))
			fatal("Failed to export spi access\n");
	}
	else if (stage == STAGE_INIT_POST_RAL) {
		
	#ifdef USE_XRAM_FOR_DYNHEAP
		void ***r9;
		
		asm("mov %0, r9": "=r"(r9));
		
		mOpMemChunkNew = r9[-8 / 4][0x4BC / 4];
		r9[-8 / 4][0x4BC / 4] = pMemChunkNew;
		
		mOpMemPtrNew = r9[-8 / 4][0x584 / 4];
		r9[-8 / 4][0x584 / 4] = pMemPtrNew;
		
		mOpMemChunkFree = r9[-8 / 4][0x4B8 / 4];
		r9[-8 / 4][0x4B8 / 4] = pMemChunkFree;
		
		mOpMemPtrResize = r9[-8 / 4][0x594 / 4];
		r9[-8 / 4][0x594 / 4] = pMemPtrResize;
	#endif
	
	
		//boot splash is still in (hard reset has been confirmed and performed, if needed). This is the time to wait for machine ID since we are sure nothing queries it before now, and things may after
		while (palmcardCommsGetHwFlags() & PCC_HW_INVALID_RESERVED_BIT);
		logi("Machine ID received\n");
	}
}

bool hwMaybeGetRomToken(uint32_t name, const void **dataP, uint16_t *szP)
{
	if (name == CREATE_4CC('s','n','u','m')) {
		
		if (dataP)
			*dataP = mSnum;
		if (szP)
			*szP = sizeof(mSnum);
		
		return true;
	}
	
	return false;
}

void hwGetMiscFlags(uint16_t *miscFlagsP, uint16_t *extMiscFlagsP)
{
	uint_fast16_t myHwFlags = palmcardCommsGetHwFlags();
	
	if (myHwFlags & PCC_HW_INVALID_RESERVED_BIT)
		fatal("HW Flags are invalid: 0x%04x\n", myHwFlags);
	
	//report backlight if we have it
	if (miscFlagsP)
		*miscFlagsP = hwrMiscFlagHasMiscFlagExt | hwrMiscFlagHasCradleDetect | hwrMiscFlagNoRTCBug | hwrMiscFlagHasMbdIrDA | ((myHwFlags & PCC_HW_HAS_BACKLIGHT) ? hwrMiscFlagHasBacklight : 0);
	
	if (extMiscFlagsP)
		*extMiscFlagsP = hwrMiscFlagExt115KIrOK;
}

int32_t cpuGetClockRate(enum ClockRateDevice dev)
{
	switch (dev) {
		case CpuClockRate:
		case TimerClockRate:
			return CPU_CLOCK_RATE;
		
		case SdioUnitClockRate:
			return CPU_CLOCK_RATE / 2;
		
		default:
			return-1;
	}
}

bool hwPwrCtl(uint32_t selector, const uint32_t *newValP, uint32_t *oldValP)
{
	switch (selector) {
		
		case PWR_SEL_SD_CARD:
			*oldValP = i2cIoSdCardEnable(!!*newValP);
			return true;
		
		default:
			return false;
	}
}

static void machPrvSocSleep(void)
{
	//switch sys to REF and wait for it
	clocks_hw->clk[clk_sys].ctrl = (clocks_hw->clk[clk_sys].ctrl &~ CLOCKS_CLK_SYS_CTRL_SRC_BITS) | (CLOCKS_CLK_SYS_CTRL_SRC_VALUE_CLK_REF << CLOCKS_CLK_SYS_CTRL_SRC_LSB);
	while (((clocks_hw->clk[clk_sys].selected & CLOCKS_CLK_REF_SELECTED_BITS) >> CLOCKS_CLK_REF_SELECTED_LSB) != (1 << CLOCKS_CLK_SYS_CTRL_SRC_VALUE_CLK_REF));
	
	//shut the PLL down
	pll_sys_hw->cs |= PLL_CS_BYPASS_BITS;
	pll_sys_hw->pwr |= PLL_PWR_VCOPD_BITS | PLL_PWR_POSTDIVPD_BITS | PLL_PWR_PD_BITS | PLL_PWR_DSMPD_BITS;
	
	//go to dormant mode
	xosc_hw->dormant = XOSC_DORMANT_VALUE_DORMANT;
}

void machSleep(void)
{
	logi("sending sleep signal!\n");
	palmcardCommsSleep();
	logi("slowing\n");
	machPrvSocSleep();
	logi("pretending to sleep\n");
	SysTaskDelay(10000);
	logi("waking up\n");
	dalModifyWakeFlags(DAL_WAKE_FLAG_GENERAL, 0);
	//nothing yet
}

static void machBusyWaitDelay(uint64_t ticks)
{
	uint64_t start = timerGetTime();
	
	while (timerGetTime() - start < ticks);
}

void machBusyWaitDelayMsec(uint32_t msec)
{
	machBusyWaitDelay((uint64_t)msec * TIMER_TICKS_PER_MSEC);
}

void machBusyWaitDelayUsec(uint32_t usec)
{
	machBusyWaitDelay((uint64_t)usec * (TIMER_TICKS_PER_MSEC / 1000));
}

void deviceReset(bool doHardReset)
{
	if (doHardReset) {
		
		machSetStorageAreaWriteable(true);
		asm volatile("cpsid i");
		memset((void*)CPU_STORAGE_RAM_BASE, 0, CPU_STORAGE_RAM_SIZE);
	}
	NVIC_SystemReset();
}

static void __attribute__((section(".ramcode"),noinline,naked)) palmcardFlashSpiAccessGuts(const uint8_t *tx, uint8_t *rx, uint32_t nBytes)
{
	//we REALLY want this to be small, so we do this in asm. C code folows
	//getting SSI unit to cooperate is not easy. taking over its gpios is...

	#define STR2(x)	#x
	#define STR(x)	STR2(x)

	asm volatile(
		"	push	{r4-r7, lr}									\n\t"
		"	adds	r6, r0, r2									\n\t"	//r6 = &tx[nBytes]
		
		"	movs	r4, " STR(XIP_SSI_BASE) " >> 24				\n\t"
		"	lsls	r4, r4, #24									\n\t"
		"1:														\n\t"
		"	ldr		r3, [r4, #0x28]								\n\t"	//ssi_hw->sr
		"	lsrs	r3, r3, #1									\n\t"	//check SSI_SR_BUSY_BITS
		"	bcs		1b											\n\t"
		
		"	movs	r4, " STR(XIP_CTRL_BASE) " >> 24			\n\t"
		"	lsls	r4, r4, #24									\n\t"
		"	ldr		r3, [r4, #0x00]								\n\t"	//xip_ctrl_hw->ctrl &=~ XIP_CTRL_EN_BITS
		"	lsrs	r3, r3, #1									\n\t"
		"	lsls	r3, r3, #1									\n\t"
		"	str		r3, [r4, #0x00]								\n\t"
		
		"	dsb		sy											\n\t"
		
		"	movs	r5, #5										\n\t"
		"	bl		cfgPins										\n\t"
		
		"	movs	r4, " STR(SIO_BASE) " >> 24					\n\t"
		"	lsls	r4, r4, #24									\n\t"
		
		"	push	{r0-r3}										\n\t"
		"	movs	r0, #0										\n\t"
		"	bl		i2cIoRamAccessEnable						\n\t"
		"	pop		{r0-r3}										\n\t"
		
		"	movs	r7, #1										\n\t"
		"bigloop:												\n\t"
		"	ldrb	r2, [r0]									\n\t"
		"	adds	r0, r0, #1									\n\t"
		"	movs	r3, #8										\n\t"
		"smallloop:												\n\t"	//[2] is highest pin to be out, so this is safe
		"	lsrs	r5, r2, #7									\n\t"
		"	lsls	r5, r5, #2									\n\t"
		"	str		r5, [r4, #0x30]								\n\t"
		"	str		r7, [r4, #0x34]								\n\t"
		"	ldr		r5, [r4, #0x08]								\n\t"
		"	lsrs	r5, r5, #4									\n\t"
		"	adcs	r2, r2										\n\t"
		"	subs	r3, r3, #1									\n\t"
		"	str		r7, [r4, #0x38]								\n\t"
		"	bne		smallloop									\n\t"
		"	strb	r2, [r1]									\n\t"
		"	adds	r1, r1, #1									\n\t"
		"	cmp		r0, r6										\n\t"
		"	bne		bigloop										\n\t"
		
		"	movs	r0, #1										\n\t"
		"	bl		i2cIoRamAccessEnable						\n\t"
		
		"	movs	r5, #0										\n\t"
		"	bl		cfgPins										\n\t"
		
		"	movs	r4, " STR(XIP_CTRL_BASE) " >> 24			\n\t"
		"	lsls	r4, r4, #24									\n\t"
		"	ldr		r3, [r4, #0x00]								\n\t"	//xip_ctrl_hw->ctrl |= XIP_CTRL_EN_BITS
		"	adds	r3, r3, #1									\n\t"
		"	str		r3, [r4, #0x00]								\n\t"
		
		"	pop		{r4-r7, pc}									\n\t"
		
		"cfgPins:												\n\t"	//clobbers r2, r3, r4, expects param in r5
		"	ldr		r4, =" STR(IO_QSPI_BASE) "					\n\t"
		"	movs	r2, #4 + 3 * 8								\n\t"	//offset of ioqspi_hw->io[3].ctrl
		"1:														\n\t"
		"	ldr		r3, [r4, r2]								\n\t"
		"	lsrs	r3, r3, #5									\n\t"
		"	lsls	r3, r3, #5									\n\t"
		"	adds	r3, r3, r5									\n\t"
		"	str		r3, [r4, r2]								\n\t"
		"	subs	r2, #8										\n\t"	//go to prev entry
		"	bpl		1b											\n\t"
		"	bx		lr											\n\t"
	:::"cc", "memory","r0","r1","r2","r3","r12","lr"
	);
		
		

/*
	uint_fast8_t i;
	
	while(ssi_hw->sr & SSI_SR_BUSY_BITS);
	xip_ctrl_hw->ctrl &=~ XIP_CTRL_EN_BITS;
	asm volatile("dsb sy");
	
	for (i = 0; i < 4; i++)
		ioqspi_hw->io[i].ctrl = (ioqspi_hw->io[i].ctrl &~ 31) | 5;
		
	sio_hw->gpio_clr = 1 << PIN_RAMnFLASH;
	
	while (nBytes--) {
		
		uint_fast8_t i, d = *tx++;
		
		for (i = 0; i < 8; i++){
			if (d & 0x80)
				sio_hw->gpio_hi_set = 1 << 2;
			else
				sio_hw->gpio_hi_clr = 1 << 2;
			asm volatile("dsb sy");
			sio_hw->gpio_hi_set = 1 << 0;
			asm volatile("dsb sy");
			sio_hw->gpio_hi_clr = 1 << 0;
			d = d * 2 + ((sio_hw->gpio_hi_in >> 3) & 1);
		}
		*rx++ = d;
	}
	
	for (i = 0; i < 4; i++)
		ioqspi_hw->io[i].ctrl = (ioqspi_hw->io[i].ctrl &~ 31) | 0;
	
	//RAM mode
	sio_hw->gpio_set = 1 << PIN_RAMnFLASH;
	
	xip_ctrl_hw->ctrl = (xip_ctrl_hw->ctrl &~ XIP_CTRL_POWER_DOWN_BITS) | XIP_CTRL_EN_BITS;
*/
}


static void palmcardFlashSpiAccess(const uint8_t *txIn, uint8_t *rxIn, uint32_t nBytes)
{
	irq_state_t prevState;
	uint8_t *tx = (uint8_t*)txIn, *rx = rxIn;
	
	//neither read nor write data can be in external memory
	
	#ifdef USE_XRAM_FOR_DYNHEAP
		
		if (((uintptr_t)txIn) >= ROMRAM_BASE && (((uintptr_t)txIn) - ROMRAM_BASE) < ROMRAM_SIZE) {
			
			tx = kheapAllocEx(nBytes, MEM_NO_OS_HEAP);
			
			if (!tx)
				fatal("%s: cannot alloc %cX", __func__, 'T');
			
			memcpy(tx, txIn, nBytes);
		}
		if (((uintptr_t)rxIn) >= ROMRAM_BASE && (((uintptr_t)rxIn) - ROMRAM_BASE) < ROMRAM_SIZE) {
			
			rx = kheapAllocEx(nBytes, MEM_NO_OS_HEAP);
			
			if (!rx)
				fatal("%s: cannot alloc %cX", __func__, 'R');
		}
		
	#endif
	
	prevState = irqsAllOff();
	
	//always safe to do
	sio_hw->gpio_hi_oe_set = 0b0111;
	sio_hw->gpio_hi_oe_clr = 0b1000;
	
	palmcardFlashSpiAccessGuts(tx, rx, nBytes);

	//irqs are back
	irqsRestoreState(prevState);
	
	if (rx != rxIn) {
		memcpy(rxIn, rx, nBytes);
		kheapFree(rx);
	}
	if (tx != txIn)
		kheapFree(tx);
}

Err machinePaceDispatch(EmulStateRef ref, uint16_t call, Err *ret68kP)
{
	switch (call) {
		case sysLibTrapCustom + REPALM_FUNC_SPI_FLASH_OP:		//void palmcardFlashSpiAccess(const uint8_t *tx, uint8_t *rx, uint32_t nBytes)
			palmcardFlashSpiAccess((void*)PceReadInt32From68KStack(ref, 2), (void*)PceReadInt32From68KStack(ref, 6), PceReadInt32From68KStack(ref, 10));
			return errNone;
		
		default:
			return sysErrNotAllowed;
	}
}



