#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdio.h>
#include "parsePrcPdb.h"
#include "util.h"



struct PacePatch {			//each uint16 is a count of halfwords from start
	uint16_t entryPt;		//entrypt
	uint16_t line1010;		//line1010 handler placeholder (put a BL there)
	uint16_t line1111;		//line1111 handler placeholder (put a BL there)
	uint16_t trap0;			//TRAP 0 handler
	uint16_t trap8;			//TRAP 8 handler
	uint16_t trapF;			//TRAP F handler
	uint16_t illegalInstr;	//illegal instr handler
	uint16_t unimplInstr;	//unimpl instr handler
	uint16_t div0;			//div 0 handler
	uint16_t tBit;			//trace bit handler;
	//code follows
};
	




static bool prvOfstIsInRange(const void* base, uint32_t sz, uint32_t ofst)
{
	return ofst < sz;
}

static bool prvPtrIsInRange(const void* base, uint32_t sz, const void* ptr)
{
	if ((uintptr_t)ptr < (uintptr_t)base)
		return false;
	
	if ((uintptr_t)ptr - (uintptr_t)base >= sz)
		return false;
	
	return true;
}

static bool prvResolvePotentialArmThunk(const void* ptr, uint32_t sz, uint32_t codeStartOfst, uint32_t *outputP)
{
	const uint32_t *at, *code = (const uint32_t*)((uintptr_t)ptr + codeStartOfst);
	uint32_t t;
	
	if (!prvOfstIsInRange(ptr, sz, codeStartOfst))
		return false;
	
	if (codeStartOfst & 3) {
		fprintf(stderr, "   code offset 0x%08x is not arm\n", codeStartOfst);
		return false;
	}

	if ((code[0] & 0xFFFFF003) != 0xE59FC000)					//LDR R12, [PC, +#ofst_divisible_by_4]
		goto not_thunk;
	
	if (code[1] != 0xE08CC00F && code[1] != 0xE08FC00C)			//ADD R12,R12,PC or ADD R12,PC,R12
		goto not_thunk;
	
	if (code[2] != 0xE12FFF1C)									//BX R12
		goto not_thunk;
	
	t = (code[0] & 0xFFF) + codeStartOfst + 8;
	if ((t & 3) || !prvOfstIsInRange(ptr, sz, t))	//is the literal value in range?
		return false;
	
	t = *(const uint32_t*)((uintptr_t)ptr + t);
	t += codeStartOfst + 12;
	
	if (!prvOfstIsInRange(ptr, sz, t))
		return false;
	
	*outputP = t;
	return true;

not_thunk:
	*outputP = codeStartOfst;
	return true;
}

static bool paceGetCallTarget(const void* ptr, uint32_t sz, const int32_t *call, uint32_t *targetOfstP)
{
	uint32_t instr = *call, curOfst = (uintptr_t)call - (uintptr_t)ptr;
	
	if ((instr >> 25) == 0x7D) {			//BLX
		
		*targetOfstP = curOfst + 8 + ((((int32_t)instr) << 8) >> 6) + ((instr & 0x01000000) >> 23) + 1;
		return prvOfstIsInRange(ptr, sz, *targetOfstP);
	}
	
	if ((instr >> 24) == 0xEB) {			//BL	call means it must then switch to thumb there or it is arm all around
		
		return prvResolvePotentialArmThunk(ptr, sz, curOfst + 8 + ((((int32_t)instr) << 8) >> 6), targetOfstP);
	}
	
	return false;
}

static bool paceFindSingleCall(const void* ptr, uint32_t sz, uint32_t codeStartOfst, uint32_t *outputP)
{
	const uint32_t *code = (const uint32_t*)ptr;
	uint32_t i;
	
	if (codeStartOfst & 3) {
		fprintf(stderr, "   code offset 0x%08x is not arm\n", codeStartOfst);
		return false;
	}
	
	for (i = codeStartOfst; i < sz; i += 4) {
		
		uint32_t instr = code[i / 4];
		
		if (paceGetCallTarget(ptr, sz, &code[i / 4], outputP))
			return true;
		
		if ((instr & 0x0F000000) == 0x0A000000)	//any kind of branch means we're disqualified
			break;
		
		if ((instr & 0x0FFFFFD0) == 0x012FFF10)	//so do BX/BLX
			break;
	}
	
	fprintf(stderr, "   call not found\n");
	return false;
}

static bool paceFindSingleCallTopLevel(const void* ptr, uint32_t sz, const uint32_t *table, uint32_t idx, uint32_t *outputP)
{
	fprintf(stderr, "table[%u] = 0x%08x\n", idx, table[idx]);
	
	return paceFindSingleCall(ptr, sz, table[idx] + (uintptr_t)table - (uintptr_t)ptr, outputP);
}

static bool paceFindSecondLevelHandlerRaw(const void* ptr, uint32_t sz, const uint32_t *mainTable, uint32_t idx1, uint32_t idx2, uint32_t *outputP)
{
	const uint32_t *p, *instrs = mainTable;
	
	//find illegal instruction handler
	// 1. find first level handler for instrs with top 4 bits as idx1
	p = instrs + instrs[idx1] / 4;	//get pointer to first major handler
	if (!prvPtrIsInRange(ptr, sz, p) || !prvPtrIsInRange(ptr, sz, p + 7)) {
		fprintf(stderr, "First major handler [%d] pointer not valid\n", idx1);
		return false;
	}
	if ((p[0] & 0xFFFFF07F) != 0xE1A00008 || (p[1] & 0xFFFFF07F) != 0xE1A00020 || p[2] != 0xE79F0100 || p[3] != 0xE08FF000) {
		fprintf(stderr, "First major handler code not valid: {0x%08x 0x%08x 0x%08x 0x%08x}\n", p[0], p[1], p[2], p[3]);
		return false;
	}
	
	fprintf(stderr, "handler[%u] = 0x%08x\n", idx1, (unsigned)((char*)p - (char*)ptr));
	fprintf(stderr, "&handler[%u,%u] = 0x%08x\n", idx1, idx2, p[4 + idx2]);
	// 2. find instr handler for instrs with bits 6..1 as idx2
	p = p + 5 + p[4 + idx2] / 4;
	if (!prvPtrIsInRange(ptr, sz, p)) {
		fprintf(stderr, "Second major handler [%u,%u] pointer not valid\n", idx1, idx2);
		return false;
	}
	
	*outputP = (uintptr_t)p - (uintptr_t)ptr;
	return true;
}

static bool paceFindSingleCallSecondLevel(const void* ptr, uint32_t sz, const uint32_t *mainTable, uint32_t idx1, uint32_t idx2, uint32_t *outputP)
{
	static const uint8_t exitPattern[] = {
		0x00, 0x00, 0x9D, 0xE5, 0x02, 0xA0, 0x4A, 0xE2, 0x44, 0xA0, 0x80, 0xE5, 0x48, 0x40, 0x80, 0xE5,
		0x00, 0x80, 0x80, 0xE5, 0x18, 0x90, 0x9D, 0xE5, 0x00, 0x00, 0x9D, 0xE5,
	};
	const uint32_t *p;
	uint32_t ofst;
	
	
	if (!paceFindSecondLevelHandlerRaw(ptr, sz, mainTable, idx1, idx2, &ofst))
		return false;
	
	p = (const uint32_t*)(ofst + (uintptr_t)ptr);
	if (memcmp(p, exitPattern, sizeof(exitPattern))) {
		fprintf(stderr, "Second major handler [%u,%u] not valid: {0x%08x 0x%08x 0x%08x 0x%08x}\n", idx1, idx2, p[0], p[1], p[2], p[3]);
		return false;
	}
	fprintf(stderr, "handler[%u,%u] = 0x%08x\n", idx1, idx2, (unsigned)((char*)p - (char*)ptr));
	// 3. find the target where we branch to
	p = p + 9 + (((int32_t)(p[7] << 8)) >> 8);
	if (!prvPtrIsInRange(ptr, sz, p) || !prvResolvePotentialArmThunk(ptr, sz, (uintptr_t)p - (uintptr_t)ptr, outputP)) {
		fprintf(stderr, "Final handler not valid: {0x%08x 0x%08x 0x%08x 0x%08x}\n", p[0], p[1], p[2], p[3]);
		return false;
	}
	
	return true;
}

static bool paceFindSingleCallThirdLevel(const void* ptr, uint32_t sz, const uint32_t *mainTable, uint32_t idx1, uint32_t idx2, uint32_t idx3, bool usesAltExitPattern, uint32_t *outputP)
{
	const uint32_t *p, *instrs = mainTable;
	static const uint8_t exitPattern[] = {		//usef for traps 0 and 8
		0x00, 0x00, 0x9D, 0xE5, 0x02, 0xA0, 0x4A, 0xE2, 0x44, 0xA0, 0x80, 0xE5, 0x48, 0x40, 0x80, 0xE5,
		0x00, 0x80, 0x80, 0xE5, 0x18, 0x90, 0x9D, 0xE5, 0x00, 0x00, 0x9D, 0xE5,
	};
	
	static const uint8_t altExitPattern[] = {	//used for Trap F
		0x07, 0xC4, 0xA0, 0xE1, 0xB2, 0x70, 0xDA, 0xE0, 0x2C, 0xC8, 0x8C, 0xE1, 0xFF, 0xC8, 0xCC, 0xE3,
		0x00, 0x00, 0x9D, 0xE5, 0x02, 0xA0, 0x4A, 0xE2, 0x44, 0xA0, 0x80, 0xE5, 0x48, 0x40, 0x80, 0xE5,
		0x00, 0x80, 0x80, 0xE5, 0x18, 0x90, 0x9D, 0xE5, 0x00, 0x00, 0x9D, 0xE5, 0x0C, 0x10, 0xA0, 0xE1,
	};
	
	//find illegal instruction handler
	// 1. find first level handler for instrs with top 4 bits as idx1
	p = instrs + instrs[idx1] / 4;	//get pointer to first major handler
	if (!prvPtrIsInRange(ptr, sz, p) || !prvPtrIsInRange(ptr, sz, p + 7)) {
		fprintf(stderr, "First major handler [%d] pointer not valid\n", idx1);
		return false;
	}
	if (p[0] != 0xE1A00A08 || p[1] != 0xE1A00D20 || p[2] != 0xE79F0100 || p[3] != 0xE08FF000) {
		fprintf(stderr, "First major handler code not valid: {0x%08x 0x%08x 0x%08x 0x%08x}\n", p[0], p[1], p[2], p[3]);
		return false;
	}
	
	fprintf(stderr, "handler[%u] = 0x%08x\n", idx1, (unsigned)((char*)p - (char*)ptr));
	fprintf(stderr, "&handler[%u,%u] = 0x%08x\n", idx1, idx2, p[4 + idx2]);
	// 2. find instr handler for instrs with bits 6..1 as idx2
	p = p + 5 + p[4 + idx2] / 4;
	if (!prvPtrIsInRange(ptr, sz, p) || !prvPtrIsInRange(ptr, sz, p + 67)) {
		fprintf(stderr, "Second major handler [%u,%u] pointer not valid\n", idx1, idx2);
		return false;
	}
	if (p[0] != 0xE208003F || p[1] != 0xE79F0100 || p[2] != 0xE08FF000) {
		fprintf(stderr, "Third major handler [%u,%u] not valid: {0x%08x 0x%08x 0x%08x}\n", idx1, idx2, p[0], p[1], p[2]);
		return false;
	}
	
	fprintf(stderr, "handler[%u,%u] = 0x%08x\n", idx1, idx2, (unsigned)((char*)p - (char*)ptr));
	fprintf(stderr, "&handler[%u,%u,%u] = 0x%08x\n", idx1, idx2, idx3, p[3 + idx3]);
	// 2. find instr handler for instrs with bits as idx3
	p = p + 4 + p[3 + idx3] / 4;
	fprintf(stderr, "handler[%u,%u,%u] = 0x%08x\n", idx1, idx2, idx3, (unsigned)((char*)p - (char*)ptr));
	if ((!usesAltExitPattern && memcmp(p, exitPattern, sizeof(exitPattern))) || (usesAltExitPattern && memcmp(p, altExitPattern, sizeof(altExitPattern)))) {
		fprintf(stderr, "Third major handler [%u,%u,%u] not valid: {0x%08x 0x%08x 0x%08x 0x%08x}\n", idx1, idx2, idx3, p[0], p[1], p[2], p[3]);
		return false;
	}
	
	return paceFindSingleCall(ptr, sz, (uintptr_t)p - (uintptr_t)ptr, outputP);
}

static bool paceFindDivZeroHandler(const void* ptr, uint32_t sz, const uint32_t *mainTable, uint32_t *outputP)
{

/*
			//for div0:
			[8,26] -> handle_divu
			bl			fetch_word_for_ea
			[...]
			bl			secondary_udiv_handler
			->>
				first call
*/

	static const uint8_t udivCall1[] = {
		0x09, 0x28, 0xA0, 0xE1, 0x22, 0x28, 0xA0, 0xE1,
		0x00, 0x00, 0x9D, 0xE5, 0x02, 0xA0, 0x4A, 0xE2,
		0x44, 0xA0, 0x80, 0xE5, 0x48, 0x40, 0x80, 0xE5,
		0x00, 0x80, 0x80, 0xE5, 0x18, 0x90, 0x9D, 0xE5,
		0x00, 0x00, 0x9D, 0xE5, 0x08, 0x1A, 0xA0, 0xE1,
		0xA1, 0x1E, 0xA0, 0xE1,
	};
	static const uint8_t udivCall2[] = {
		0x00, 0x00, 0x9D, 0xE5, 0x44, 0xA0, 0x90, 0xE5,
		0x48, 0x40, 0x90, 0xE5, 0x04, 0x50, 0x80, 0xE2,
		0x24, 0x60, 0x80, 0xE2, 0xB2, 0x70, 0xDA, 0xE0
	};
		
	const uint32_t *p;
	uint32_t i, ofst;
	
	if (!paceFindSecondLevelHandlerRaw(ptr, sz, mainTable, 8, 26, &ofst))
		return false;
	
	p = (const uint32_t*)(ofst + (uintptr_t)ptr);
	if (p[0] != 0xE208C03F || (p[1] & 0xFF000000) != 0xEB000000) {
		fprintf(stderr, "Udiv handler/1 not valid: {0x%08x 0x%08x}\n", p[0], p[1]);
		return false;
	}
	if (memcmp(p + 2, udivCall1, sizeof(udivCall1))) {
		fprintf(stderr, "Udiv handler/2 not valid: {0x%08x 0x%08x 0x%08x 0x%08x}\n", p[2], p[3], p[4], p[5]);
		return false;
	}
	if ((p[13] >> 25) != 0x7D && (p[13] >> 24) != 0xEB) {			//must be a bl or a blx
		fprintf(stderr, "Udiv handler/3 not valid: {0x%08x}\n", p[13]);
		return false;
	}
	if (memcmp(p + 14, udivCall2, sizeof(udivCall2))) {
		fprintf(stderr, "Udiv handler/4 not valid: {0x%08x 0x%08x 0x%08x 0x%08x}\n", p[14], p[15], p[16], p[17]);
		return false;
	}
	
	if (!paceGetCallTarget(ptr, sz, &p[13], &ofst)) {
		fprintf(stderr, "Udiv secondary handler not found\n");
		return false;
	}

	fprintf(stderr, "secondary handler is 0x%08x\n", ofst);
	
	//at this point, the first call it makes is the one we need
	if (ofst & 1) {		//thumb
		
		const uint16_t *instr;
		uint32_t t;
		
		ofst &=~ 1;
		instr = (const uint16_t*)(((uintptr_t)ptr) + ofst);
		for (i = ofst; i < ofst + 32 && i + 4 <= sz; i += 2, instr++) {
			
			switch (instr[0] >> 11) {
				case 0b11101:	//BLX suffix or undefined instr. either way cannot be first!
				case 0b11111:	//BL suffix - cannot be last
					return false;
				case 0b11110:	//BL/BLX prefix - long instr probably follows - investigate further
					break;
				default:		//one-halfword instrs
					continue;
			}
			//we are commited to this first half so it is ok to trash "i"
			t = i + 4 + ((((int32_t)instr[0]) << 21) >> 9);
			switch (instr[1] >> 11) {
				case 0b11101:	//BLX suffix or undefined instr
					if (instr[1] & 1)	//undef
						return false;
					//BLX imm
					t += (instr[1] & 0x07FF) * 2;
					t &=~ 3;
					t++;
					break;
				
				case 0b11111:	//BL suffix
					//BL imm
					t += (instr[1] & 0x07FF) * 2 + 1;
					break;
				
				default:		//invalid as second halfword following a BL/BLX prefix
					return false;
			}
			
			if (!prvOfstIsInRange(ptr, sz, t))
				return false;
			
			*outputP = t;
			return true;
		}
	}
	else {				//arm
		
		const uint32_t *instr = (const uint32_t*)(((uintptr_t)ptr) + ofst);
		for (i = ofst; i < ofst + 64 && i + 4 <= sz; i += 4, instr++) {
			
			if ((*instr & 0x0F000000) == 0x0B000000) {
				
				i += 8 + ((((int32_t)*instr) << 8) >> 6);
				return prvResolvePotentialArmThunk(ptr, sz, i, outputP);
			}
		}
	}
	return false;
}

static bool performJumpInsertion(void* buf, uint32_t writeOfst, uint32_t toOfst)
{
	int32_t offset = (int32_t)((toOfst &~ 1) - writeOfst - 4) / 2;
	uint16_t* dst = (uint16_t*)(((uintptr_t)buf) + writeOfst);
	uint32_t S = (offset >> 23) & 1;
	uint32_t J1 = ((offset >> 22) & 1) ^ S ^ 1;
	uint32_t J2 = ((offset >> 21) & 1) ^ S ^ 1;
	uint32_t imm10 = (offset >> 11) & 0x3FF;
	uint32_t imm11 = offset & 0x7FF;
	
	if ((offset >> 23) != 0 && (offset >> 23) != -1) {
		fprintf(stderr, "offset too long: %d\n", offset);
		return false;
	}
	
	*dst++ = 0xf000 + (S << 10) + imm10;
	*dst++ = 0xc000 + (J1 << 13) + (J2 << 11) + imm11 + ((toOfst & 1) ? 0x1000 : 0x0000);
	
	return true;
}

#define TRACE_LOC_IN_EMU		0x5D4C
#define TRACE_JUMPLOC_IN_EMU	0x5D98
#define FULL_EMU_SIZE			0x5E08

static uint32_t findArmBranchOrCall(struct Buffer *buf, uint32_t *to, bool lookForBL, uint32_t **firstFoundP)			//finds a SINGLE arm B or BL to a location
{
	uint32_t i, *ret = NULL, *buf32 = (uint32_t*)buf->data, numResults = 0;
	uint_fast8_t desiredInstr = lookForBL ? 0xeb : 0xea;
	
	for (i = 0; i < buf->sz / sizeof(uint32_t); i++) {
		
		int32_t ofst = ((int32_t)(buf32[i] << 8)) >> 8;
		uint32_t *dst;
		
		if ((buf32[i] >> 24) != desiredInstr)		//must be the right instr
			continue;
		
		dst = buf32 + i + 2 + ofst;
		
		if (dst != to)
			continue;
		
		if (!numResults)
			*firstFoundP = buf32 + i;
		
		numResults++;
	}
	
	return numResults;
}

static uint32_t findThumbCall(struct Buffer *buf, uint16_t *to, bool withExchange, uint16_t **firstFoundP)	//find a BL or a BLX
{
	uint16_t *ret = NULL, *buf16 = (uint16_t*)buf->data;
	uint32_t i, numResults = 0;
	
	for (i = 0; i < buf->sz / sizeof(uint16_t) - 1; i++) {
		
		uint32_t s = (buf16[i] >> 10) & 1;
		uint32_t imm10 = buf16[i] & 0x03ff;
		uint32_t imm11 = buf16[i + 1] & 0x7ff;
		uint32_t j1 = (buf16[i + 1] >> 13) & 1;
		uint32_t j2 = (buf16[i + 1] >> 11) & 1;
		uint32_t i1 = (j1 ^ s ^ 1), i2 = (j2 ^ s ^ 1);
		int32_t fullOfst = (imm11 << 1) + (imm10 << 12) + (i2 << 22) + (i1 << 23) + (s ? 0xff000000 : 0x00000000);
		uint16_t *dst = buf16 + i + 2 + (fullOfst / 2);
		
		if ((buf16[i] & 0xf800) != 0xf000 || (buf16[i + 1] & 0xd000) != (withExchange ? 0xc000 : 0xd000))
			continue;
				
		if (dst != to)
			continue;
		
		if (!numResults)
			*firstFoundP = buf16 + i;
		
		numResults++;
	}
	
	return numResults;
}

static uint16_t* findCallSite(struct Buffer *buf, void *emuStart, char **replacedNameP)
{
	uint32_t *directArmBranch = NULL, *directArmCall = NULL;
	uint16_t *blxCall = NULL, *thumbJump, *thumbCall;
	unsigned numFound = 0;
	
	numFound += findArmBranchOrCall(buf, emuStart, false, &directArmBranch);
	numFound += findArmBranchOrCall(buf, emuStart, true, &directArmCall);
	numFound += findThumbCall(buf, emuStart, true, &blxCall);
	
	if (numFound != 1) {
		
		fprintf(stderr, " Expected one call to the emulator, found %u. Will use the safer but slower patch method\n", numFound);
		return NULL;
	}
	if (directArmCall) {
		
		fprintf(stderr, " Emulator is called in ARM mode. Will have to use the slower patch method\n");
		return NULL;
	}
	
	if (blxCall) {		//just replace it
		*replacedNameP = "BLX";
		return blxCall;
	}
	*replacedNameP = "ARM BL";
	
	//we know the ARM branch is at an even address, but the "BX PC" will work either 4 or 2 bytes before it. Either is ok, both is not. Check
	thumbJump = (uint16_t*)(directArmBranch - 1);
	if (thumbJump[0] == 0x4778 && thumbJump[1] != 0x4778) {
		
		//BX PC, DNK, arm branch
	}
	else if (thumbJump[0] != 0x4778 && thumbJump[1] == 0x4778) {
		
		thumbJump++;
		//BX PC, arm branch
	}
	else {
		
		fprintf(stderr, " ARM branch not preceded by a \"BX PC\". Will use the safer but slower patch method\n");
		return NULL;
	}
	
	//we found the "BX PC". There should only be one call to it. We patch that
	if (1 != findThumbCall(buf, thumbJump, false, &thumbCall)) {
		
		fprintf(stderr, " Thumb call not found. Will use the safer but slower patch method\n");
		return NULL;
	}
	
	return thumbCall;
}

static bool performPatch(struct Buffer *buf, uint32_t* emuStart, uint32_t line1010handler, uint32_t line1111handler, uint32_t trap0handler, uint32_t trap8handler, uint32_t trapOsCallHandler, uint32_t illegalInstrHandler, uint32_t notImplInstrHandler, uint32_t div0handler, uint32_t traceBitHandler)
{
	uint32_t codeBase, extraSize = 0;
	static uint8_t newCode[65536];
	char *replacedInstrName;
	uint16_t *thumbCall;
	int L, newLen;
	struct PacePatch *pp = (struct PacePatch*)newCode;
	
	for (L = 0; L < sizeof(newCode); L++) {
		int ch;
		
		if ((ch = getchar()) == EOF)
			break;
		newCode[L] = ch;
	}
	fprintf(stderr, "read a %u-byte patch\n", L);
	
	//fill emulator with 0xFF
	memset(emuStart, 0xFF, FULL_EMU_SIZE);
	
	//There will always be only one entry to PACE emulator. However, there are three ways that it can be entered. Only one will be used in a particular PACE.prc.
	//1. PACE.prc compiled in thumb mode, it will be an ARM "B", preceded by an optional 2 bytes of DNK, preceded by a thumb "BX PC". A single thumb "BL" will refrence it
	//2. PACE.prc compiled in thumb mode, it will have a BLX to the emulator
	//3. PACE.prc compiled in ARM mode will simply have one "BL" calling the emulator. In this case we'll need to add a switch to thumb mode.
	thumbCall = findCallSite(buf, emuStart, &replacedInstrName);
	if (!thumbCall)
		extraSize = 16 /* size of our jump to thumb */;

	//see if we can insert the code
	if (L < FULL_EMU_SIZE - extraSize /* size of our jump to thumb */) {
		
		codeBase = extraSize + (uintptr_t)emuStart - (uintptr_t)buf->data;
		fprintf(stderr, "  -> inserting in place\n");
	}
	else {
		
		codeBase = buf->sz;
		
		//resize res
		buf->data = realloc(buf->data, buf->sz += L - sizeof(struct PacePatch));
		if (!buf->data)
			return false;
		
		fprintf(stderr, "  -> inserting at end\n");
	}
	
	//write a transition to thumb if needed
	if (!thumbCall) {
		
		*emuStart++ = 0xE59FC004;
		*emuStart++ = 0xE08CC00F;
		*emuStart++ = 0xE12FFF1C;
		*emuStart = codeBase + 2 * pp->entryPt + 1 - ((uintptr_t)emuStart - (uintptr_t)buf->data);
	}
	else {		//replace the actual thumb call with a BL to the new place
		
		uint32_t ofst = ((uint16_t*)emuStart) + pp->entryPt - (thumbCall + 2);
		uint32_t s, i1, i2, j1, j2;
		//PACE is small enough that we are guaranteed to be in range, skip the check
		
		thumbCall[1] = ofst & 0x7ff;
		ofst >>= 11;
		thumbCall[0] = ofst & 0x3ff;
		ofst >>= 10;
		
		i2 = ofst & 1;
		ofst >>= 1;
		i1 = ofst & 1;
		ofst >>= 1;
		s = ofst & 1;
		
		j1 = i1 ^ s ^ 1;
		j2 = i2 ^ s ^ 1;
		
		thumbCall[1] |= (j2 << 11) + (j1 << 13) + 0xd000;
		thumbCall[0] |= (s << 10) + 0xf000;
		
		fprintf(stderr, " Replacing %s to pace in thumb code directly\n", replacedInstrName);
	}
	
	//copy in new code
	memcpy(buf->data + codeBase, pp + 1, L - sizeof(struct PacePatch));
	
	//write in jumps
	if (!performJumpInsertion(buf->data, codeBase + 2 * pp->line1010, line1010handler))
		return false;
	
	if (!performJumpInsertion(buf->data, codeBase + 2 * pp->line1111, line1111handler))
		return false;
	
	if (!performJumpInsertion(buf->data, codeBase + 2 * pp->trap0, trap0handler))
		return false;
	
	if (!performJumpInsertion(buf->data, codeBase + 2 * pp->trap8, trap8handler))
		return false;
	
	if (!performJumpInsertion(buf->data, codeBase + 2 * pp->trapF, trapOsCallHandler))
		return false;
	
	if (!performJumpInsertion(buf->data, codeBase + 2 * pp->illegalInstr, illegalInstrHandler))
		return false;
	
	if (!performJumpInsertion(buf->data, codeBase + 2 * pp->unimplInstr, notImplInstrHandler))
		return false;
	
	if (!performJumpInsertion(buf->data, codeBase + 2 * pp->div0, div0handler))
		return false;
	
	if (!performJumpInsertion(buf->data, codeBase + 2 * pp->tBit, traceBitHandler))
		return false;
	
	
	fprintf(stderr, "Patch complete!\n");

	return true;
}

static bool patchPace(struct Buffer *buf)
{
	static const uint8_t initPattern[] = {
		0xF1, 0x4F, 0x2D, 0xE9, 0x00, 0x00, 0x9D, 0xE5, 0x44, 0xA0, 0x90, 0xE5, 0x48, 0x40, 0x90, 0xE5,
		0x04, 0x50, 0x80, 0xE2, 0x24, 0x60, 0x80, 0xE2, 0xB2, 0x70, 0xDA, 0xE0, 0x44, 0xB0, 0x8F, 0xE2,
		0x01, 0x00, 0x00, 0xEA, 0xC0, 0x0C, 0x14, 0xE3, 0x47, 0x17, 0x00, 0x1A, 0x07, 0x84, 0xA0, 0xE1,
		0x27, 0x84, 0x88, 0xE1, 0xF0, 0x0C, 0x08, 0xE2, 0x20, 0x05, 0x9B, 0xE7, 0xB2, 0x70, 0xDA, 0xE0,
		0x00, 0xF0, 0x8B, 0xE0, 0x00, 0x00, 0x9D, 0xE5, 0x02, 0xA0, 0x4A, 0xE2, 0x44, 0xA0, 0x80, 0xE5,
		0x48, 0x40, 0x80, 0xE5, 0x00, 0x80, 0x80, 0xE5, 0x18, 0x90, 0x9D, 0xE5, 0x00, 0x00, 0xA0, 0xE3,
		0xF2, 0x4F, 0xBD, 0xE8, 0x1E, 0xFF, 0x2F, 0xE1,
	};
	
	static const uint8_t traceExceptionHandler[] = {
		0x80, 0x0C, 0x14, 0xE3, 0x09, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x9D, 0xE5, 0x02, 0x10, 0x4A, 0xE2,
		0x44, 0x10, 0x80, 0xE5, 0x48, 0x40, 0x80, 0xE5, 0x07, 0x84, 0xA0, 0xE1, 0x27, 0x84, 0x88, 0xE1,
		0xF0, 0x0C, 0x08, 0xE2, 0x20, 0x05, 0x9B, 0xE7, 0xB2, 0x70, 0xDA, 0xE0, 0x00, 0xF0, 0x8B, 0xE0,
		0x00, 0x00, 0x9D, 0xE5, 0x02, 0xA0, 0x4A, 0xE2, 0x44, 0xA0, 0x80, 0xE5, 0x48, 0x40, 0x80, 0xE5,
		0x00, 0x80, 0x80, 0xE5, 0x18, 0x90, 0x9D, 0xE5, 0x00, 0x00, 0x9D, 0xE5
	};
	
	static const uint8_t endPattern[] = {
		0x00, 0x00, 0x9D, 0xE5, 0x02, 0xA0, 0x4A, 0xE2, 0x44, 0xA0, 0x80, 0xE5, 0x48, 0x40, 0x80, 0xE5,
		0x00, 0x80, 0x80, 0xE5, 0x18, 0x90, 0x9D, 0xE5, 0x00, 0x00, 0x9D, 0xE5
	};
	
	uint32_t sz = buf->sz, t, i, traceBitHandler, line1010handler, line1111handler, illegalInstrHandler, notImplInstrHandler, trap0handler, trap8handler, trapOsCallHandler, div0handler;
	uint32_t *p, *emuStart, *instrs = (uint32_t*)buf->data;
	void* ptr = instrs;
	
	for (i = 0; i + sizeof(initPattern) < sz; i += 4) {
		
		if (!memcmp(instrs + i / 4, initPattern, sizeof(initPattern))) {
			
			emuStart = instrs + i / 4;
			instrs = emuStart + sizeof(initPattern) / 4;
			
			fprintf(stderr, " emulator start found at offset 0x%08x\n", i);
			
			//verify end
			if (memcmp(emuStart + (FULL_EMU_SIZE - sizeof(endPattern)) / 4, endPattern, sizeof(endPattern))) {
				fprintf(stderr, "end pattern not found at expected offset\n");
				return false;
			}
			
			fprintf(stderr, "  emulator size %u (0x0%x) bytes\n", FULL_EMU_SIZE, FULL_EMU_SIZE);
			
			//get trace bit handler
			if (memcmp(emuStart + TRACE_LOC_IN_EMU / 4, traceExceptionHandler, sizeof(traceExceptionHandler))) {
				fprintf(stderr, "trace bit handler not found at expected offset\n");
				return false;
			}
			if (!paceGetCallTarget(ptr, sz, emuStart + TRACE_JUMPLOC_IN_EMU / 4, &traceBitHandler))
				return false;
			fprintf(stderr, "  found trace bit handler at offset 0x%08x\n", traceBitHandler);
			
			//find line 1010 handler
			if (!paceFindSingleCallTopLevel(ptr, sz, instrs, 10, &line1010handler))
				return false;
			fprintf(stderr, "  found line 1010 handler at offset 0x%08x\n", line1010handler);
			
			//find line 1111 handler
			if (!paceFindSingleCallTopLevel(ptr, sz, instrs, 15, &line1111handler))
				return false;
			fprintf(stderr, "  found line 1111 handler at offset 0x%08x\n", line1111handler);
			
			//find illegal instruction handler
			if (!paceFindSingleCallSecondLevel(ptr, sz, instrs, 0, 3, &illegalInstrHandler))
				return false;
			fprintf(stderr, "  found illegal instr handler at offset 0x%08x\n", illegalInstrHandler);
			
			//find unimpl instruction handler
			if (!paceFindSingleCallSecondLevel(ptr, sz, instrs, 0, 58, &notImplInstrHandler))
				return false;
			fprintf(stderr, "  found unimpl instr handler at offset 0x%08x\n", notImplInstrHandler);

			//find trap 0 instruction handler
			if (!paceFindSingleCallThirdLevel(ptr, sz, instrs, 4, 57, 0, false, &trap0handler))
				return false;
			fprintf(stderr, "  found TRAP0 handler at offset 0x%08x\n", trap0handler);

			//find trap 8 instruction handler
			if (!paceFindSingleCallThirdLevel(ptr, sz, instrs, 4, 57, 8, false, &trap8handler))
				return false;
			fprintf(stderr, "  found TRAP8 handler at offset 0x%08x\n", trap8handler);

			//find trap 15 instruction handler (SysTraps)
			if (!paceFindSingleCallThirdLevel(ptr, sz, instrs, 4, 57, 15, true, &trapOsCallHandler))
				return false;
			fprintf(stderr, "  found SysTrap handler at offset 0x%08x\n", trapOsCallHandler);

			//find div-by-zero handler
			if (!paceFindDivZeroHandler(ptr, sz, instrs, &div0handler))
				return false;
			fprintf(stderr, "  found DIV0 handler at offset 0x%08x\n", div0handler);
			
			//we found everything - patch it!
			return performPatch(buf, emuStart, line1010handler, line1111handler, trap0handler, trap8handler, trapOsCallHandler, illegalInstrHandler, notImplInstrHandler, div0handler, traceBitHandler);
		}
	}
	
	fprintf(stderr, " emulator not found!!\n");
	return false;
}

int main(int argc, char** argv)
{
	struct PalmDb *db;
	int i;
	
	if (argc != 3) {
		fprintf(stderr, "USAGE: %s pace.prc pace_patched.prc < pacepatch.bin\n", argv[0]);
		return -1;
	}

	db = parsePrcPdb(argv[1]);
	if (!db || !(db->attributes & PALM_DB_ATTR_RES_DB) || db->type != 'libr' || db->creator != 'a68k') {
		fprintf(stderr, "Not a valid pace prc '%s'\n", argv[1]);
		return -2;
	}
		
	fprintf(stderr, "patching '%s'\n", argv[1]);
			
	for (i = 0; i < db->numChildren; i++) {
		
		struct PalmRes *res = &db->res[i];
			
		if (res->type == 'amdc' && res->id == 0) {
		
			if (res->buf.sz < 1024) {
				fprintf(stderr, "File '%s' has ('amdc', 0) resource with unexpected size of %u\n", argv[1], res->buf.sz);
				return -3;
			}
			
			if (!patchPace(&res->buf))
				return -4;
			
			if (!writePrcPdb(db, argv[2])) {
				fprintf(stderr, "Failed to write output file '%s'\n", argv[2]);
				return -5;
			}

			freePrcPdb(db);
			return 0;
		}
	}
	
	fprintf(stderr, "Patching failed\n");
	freePrcPdb(db);
	return -7;
}