mirror of
https://github.com/bsnes-emu/bsnes.git
synced 2025-02-23 14:42:33 +01:00
byuu says: Changelog: - added 30 new PAL games to icarus (courtesy of Mikerochip) - new version of libco no longer requires mprotect nor W|X permissions - nall: default C compiler to -std=c11 instead of -std=c99 - nall: use `-fno-strict-aliasing` during compilation - updated nall/certificates (hopefully for the last time) - updated nall/http to newer coding conventions - nall: improve handling of range() function I didn't really work on higan at all, this is mostly just a release because lots of other things have changed. The most interesting is `-fno-strict-aliasing` ... basically, it joins `-fwrapv` as being "stop the GCC developers from doing *really* evil shit that could lead to security vulnerabilities or instabilities." For the most part, it's a ~2% speed penalty for higan. Except for the Sega Genesis, where it's a ~10% speedup. I have no idea how that's possible, but clearly something's going very wrong with strict aliasing on the Genesis core. So ... it is what it is. If you need the performance for the non-Genesis cores, you can turn it off in your builds. But I'm getting quite sick of C++'s "surprises" and clever compiler developers, so I'm keeping it on in all of my software going forward.
161 lines
5.9 KiB
C
161 lines
5.9 KiB
C
/*
|
|
libco.amd64 (2016-09-14)
|
|
author: byuu
|
|
license: public domain
|
|
*/
|
|
|
|
#define LIBCO_C
|
|
#include "libco.h"
|
|
#include "settings.h"
|
|
|
|
#include <assert.h>
|
|
#include <stdlib.h>
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
static thread_local long long co_active_buffer[64];
|
|
static thread_local cothread_t co_active_handle = 0;
|
|
static void (*co_swap)(cothread_t, cothread_t) = 0;
|
|
|
|
#ifdef LIBCO_MPROTECT
|
|
alignas(4096)
|
|
#else
|
|
section(text)
|
|
#endif
|
|
#ifdef _WIN32
|
|
/* ABI: Win64 */
|
|
static const unsigned char co_swap_function[4096] = {
|
|
0x48, 0x89, 0x22, /* mov [rdx],rsp */
|
|
0x48, 0x8b, 0x21, /* mov rsp,[rcx] */
|
|
0x58, /* pop rax */
|
|
0x48, 0x89, 0x6a, 0x08, /* mov [rdx+ 8],rbp */
|
|
0x48, 0x89, 0x72, 0x10, /* mov [rdx+16],rsi */
|
|
0x48, 0x89, 0x7a, 0x18, /* mov [rdx+24],rdi */
|
|
0x48, 0x89, 0x5a, 0x20, /* mov [rdx+32],rbx */
|
|
0x4c, 0x89, 0x62, 0x28, /* mov [rdx+40],r12 */
|
|
0x4c, 0x89, 0x6a, 0x30, /* mov [rdx+48],r13 */
|
|
0x4c, 0x89, 0x72, 0x38, /* mov [rdx+56],r14 */
|
|
0x4c, 0x89, 0x7a, 0x40, /* mov [rdx+64],r15 */
|
|
#if !defined(LIBCO_NO_SSE)
|
|
0x0f, 0x29, 0x72, 0x50, /* movaps [rdx+ 80],xmm6 */
|
|
0x0f, 0x29, 0x7a, 0x60, /* movaps [rdx+ 96],xmm7 */
|
|
0x44, 0x0f, 0x29, 0x42, 0x70, /* movaps [rdx+112],xmm8 */
|
|
0x48, 0x83, 0xc2, 0x70, /* add rdx,112 */
|
|
0x44, 0x0f, 0x29, 0x4a, 0x10, /* movaps [rdx+ 16],xmm9 */
|
|
0x44, 0x0f, 0x29, 0x52, 0x20, /* movaps [rdx+ 32],xmm10 */
|
|
0x44, 0x0f, 0x29, 0x5a, 0x30, /* movaps [rdx+ 48],xmm11 */
|
|
0x44, 0x0f, 0x29, 0x62, 0x40, /* movaps [rdx+ 64],xmm12 */
|
|
0x44, 0x0f, 0x29, 0x6a, 0x50, /* movaps [rdx+ 80],xmm13 */
|
|
0x44, 0x0f, 0x29, 0x72, 0x60, /* movaps [rdx+ 96],xmm14 */
|
|
0x44, 0x0f, 0x29, 0x7a, 0x70, /* movaps [rdx+112],xmm15 */
|
|
#endif
|
|
0x48, 0x8b, 0x69, 0x08, /* mov rbp,[rcx+ 8] */
|
|
0x48, 0x8b, 0x71, 0x10, /* mov rsi,[rcx+16] */
|
|
0x48, 0x8b, 0x79, 0x18, /* mov rdi,[rcx+24] */
|
|
0x48, 0x8b, 0x59, 0x20, /* mov rbx,[rcx+32] */
|
|
0x4c, 0x8b, 0x61, 0x28, /* mov r12,[rcx+40] */
|
|
0x4c, 0x8b, 0x69, 0x30, /* mov r13,[rcx+48] */
|
|
0x4c, 0x8b, 0x71, 0x38, /* mov r14,[rcx+56] */
|
|
0x4c, 0x8b, 0x79, 0x40, /* mov r15,[rcx+64] */
|
|
#if !defined(LIBCO_NO_SSE)
|
|
0x0f, 0x28, 0x71, 0x50, /* movaps xmm6, [rcx+ 80] */
|
|
0x0f, 0x28, 0x79, 0x60, /* movaps xmm7, [rcx+ 96] */
|
|
0x44, 0x0f, 0x28, 0x41, 0x70, /* movaps xmm8, [rcx+112] */
|
|
0x48, 0x83, 0xc1, 0x70, /* add rcx,112 */
|
|
0x44, 0x0f, 0x28, 0x49, 0x10, /* movaps xmm9, [rcx+ 16] */
|
|
0x44, 0x0f, 0x28, 0x51, 0x20, /* movaps xmm10,[rcx+ 32] */
|
|
0x44, 0x0f, 0x28, 0x59, 0x30, /* movaps xmm11,[rcx+ 48] */
|
|
0x44, 0x0f, 0x28, 0x61, 0x40, /* movaps xmm12,[rcx+ 64] */
|
|
0x44, 0x0f, 0x28, 0x69, 0x50, /* movaps xmm13,[rcx+ 80] */
|
|
0x44, 0x0f, 0x28, 0x71, 0x60, /* movaps xmm14,[rcx+ 96] */
|
|
0x44, 0x0f, 0x28, 0x79, 0x70, /* movaps xmm15,[rcx+112] */
|
|
#endif
|
|
0xff, 0xe0, /* jmp rax */
|
|
};
|
|
|
|
#include <windows.h>
|
|
|
|
static void co_init() {
|
|
#ifdef LIBCO_MPROTECT
|
|
DWORD old_privileges;
|
|
VirtualProtect((void*)co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READ, &old_privileges);
|
|
#endif
|
|
}
|
|
#else
|
|
/* ABI: SystemV */
|
|
static const unsigned char co_swap_function[4096] = {
|
|
0x48, 0x89, 0x26, /* mov [rsi],rsp */
|
|
0x48, 0x8b, 0x27, /* mov rsp,[rdi] */
|
|
0x58, /* pop rax */
|
|
0x48, 0x89, 0x6e, 0x08, /* mov [rsi+ 8],rbp */
|
|
0x48, 0x89, 0x5e, 0x10, /* mov [rsi+16],rbx */
|
|
0x4c, 0x89, 0x66, 0x18, /* mov [rsi+24],r12 */
|
|
0x4c, 0x89, 0x6e, 0x20, /* mov [rsi+32],r13 */
|
|
0x4c, 0x89, 0x76, 0x28, /* mov [rsi+40],r14 */
|
|
0x4c, 0x89, 0x7e, 0x30, /* mov [rsi+48],r15 */
|
|
0x48, 0x8b, 0x6f, 0x08, /* mov rbp,[rdi+ 8] */
|
|
0x48, 0x8b, 0x5f, 0x10, /* mov rbx,[rdi+16] */
|
|
0x4c, 0x8b, 0x67, 0x18, /* mov r12,[rdi+24] */
|
|
0x4c, 0x8b, 0x6f, 0x20, /* mov r13,[rdi+32] */
|
|
0x4c, 0x8b, 0x77, 0x28, /* mov r14,[rdi+40] */
|
|
0x4c, 0x8b, 0x7f, 0x30, /* mov r15,[rdi+48] */
|
|
0xff, 0xe0, /* jmp rax */
|
|
};
|
|
|
|
#include <unistd.h>
|
|
#include <sys/mman.h>
|
|
|
|
static void co_init() {
|
|
#ifdef LIBCO_MPROTECT
|
|
unsigned long long addr = (unsigned long long)co_swap_function;
|
|
unsigned long long base = addr - (addr % sysconf(_SC_PAGESIZE));
|
|
unsigned long long size = (addr - base) + sizeof co_swap_function;
|
|
mprotect((void*)base, size, PROT_READ | PROT_EXEC);
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
static void crash() {
|
|
assert(0); /* called only if cothread_t entrypoint returns */
|
|
}
|
|
|
|
cothread_t co_active() {
|
|
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
|
return co_active_handle;
|
|
}
|
|
|
|
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
|
|
cothread_t handle;
|
|
if(!co_swap) {
|
|
co_init();
|
|
co_swap = (void (*)(cothread_t, cothread_t))co_swap_function;
|
|
}
|
|
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
|
size += 512; /* allocate additional space for storage */
|
|
size &= ~15; /* align stack to 16-byte boundary */
|
|
|
|
if(handle = (cothread_t)malloc(size)) {
|
|
long long *p = (long long*)((char*)handle + size); /* seek to top of stack */
|
|
*--p = (long long)crash; /* crash if entrypoint returns */
|
|
*--p = (long long)entrypoint; /* start of function */
|
|
*(long long*)handle = (long long)p; /* stack pointer */
|
|
}
|
|
|
|
return handle;
|
|
}
|
|
|
|
void co_delete(cothread_t handle) {
|
|
free(handle);
|
|
}
|
|
|
|
void co_switch(cothread_t handle) {
|
|
register cothread_t co_previous_handle = co_active_handle;
|
|
co_swap(co_active_handle = handle, co_previous_handle);
|
|
}
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|