From b934ce41d9c4a66737da809151bdd85b6883b82c Mon Sep 17 00:00:00 2001 From: byuu Date: Sat, 22 Dec 2007 18:26:54 +0000 Subject: [PATCH 01/43] Update to bsnes v027 release. This version replaces libui with miu -- a new GUI wrapper library, and cleans up large portions of the source code. Unfortunately, the GUI rewrite took far, far longer than I ever imagined. As a result, no work has gone into the core emulation for this version. But with the GUI rewrite out of the way, that should change in the near future. And thanks to the new UI library, I can now begin work on adding a cross-platform debugger to bsnes, at long last. Changelog: - Major source code cleanup (lib/, ui/miu/, ui/vai/) - Cheat code editor was broken in v0.026, this is now fixed - Cheat code file format simplified for human readability - Makefile install target improvements [belegdol] - libui replaced with miu GUI library - Custom video / audio / input drivers replaced with vai HW library - ppc and ppc64 libco targets added [Vas Crabb] - x86 and x86-64 libco targets now work on OS X [Lucas Newman] --- libco.ppc.s | 478 +++++++++++++++++++++++++++++++++++++++++ libco.ppc64.s | 513 +++++++++++++++++++++++++++++++++++++++++++++ libco.ucontext.cpp | 80 +++++++ libco.win.cpp | 66 ++++++ libco.x86-64.asm | 146 +++++++++++++ libco.x86.asm | 155 ++++++++++++++ 6 files changed, 1438 insertions(+) create mode 100644 libco.ppc.s create mode 100644 libco.ppc64.s create mode 100644 libco.ucontext.cpp create mode 100644 libco.win.cpp create mode 100644 libco.x86-64.asm create mode 100644 libco.x86.asm diff --git a/libco.ppc.s b/libco.ppc.s new file mode 100644 index 00000000..d7f6b758 --- /dev/null +++ b/libco.ppc.s @@ -0,0 +1,478 @@ +;***** +;libco.ppc (2007-11-29) +;author: Vas Crabb +;license: public domain +; +;cross-platform PowerPC implementation of libco +;special thanks to byuu for writing the original version +; +;[ABI compatibility] +;- gcc; mac os x; ppc +; +;[nonvolatile registers] +;- GPR1, GPR13 - GPR31 +;- FPR14 - FPR31 +;- V20 - V31 +;- VRSAVE, CR2 - CR4 +; +;[volatile registers] +;- GPR0, GPR2 - GPR12 +;- FPR0 - FPR13 +;- V0 - V19 +;- LR, CTR, XER, CR0, CR1, CR5 - CR7 +;***** + + +;Declare some target-specific stuff + + .section __TEXT,__text,regular,pure_instructions + .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 + .machine ppc + + +;Constants + + .cstring + .align 2 + +_sysctl_altivec: + .ascii "hw.optional.altivec\0" + + +;Declare space for variables + +.lcomm _co_environ,4,2 ;bit 0 = initialised, bit 1 = have Altivec/VMX +.lcomm _co_primary_buffer,1024,2 ;buffer (will be zeroed by loader) + + .data + .align 2 + +_co_active_context: + .long _co_primary_buffer + + + .text + .align 2 + + +;Declare exported names + +.globl _co_active +.globl _co_create +.globl _co_delete +.globl _co_switch + + +;***** +;extern "C" cothread_t co_active(); +;return = GPR3 +;***** + +_co_active: + mflr r0 ;GPR0 = return address + bcl 20,31,L_co_active$spb +L_co_active$spb: + mflr r2 ;GPR2 set for position-independance + addis r3,r2,ha16(_co_active_context-L_co_active$spb) ;get value in GPR3 + lwz r3,lo16(_co_active_context-L_co_active$spb)(r3) + mtlr r0 ;LR = return address + blr ;return + + +;***** +;extern "C" cothread_t co_create(unsigned int heapsize, void (*coentry)()); +;GPR3 = heapsize +;GPR4 = coentry +;return = GPR3 +;***** + +_co_create: + mflr r0 ;GPR0 = return address + stmw r30,-8(r1) ;save GPR30 and GPR31 + stw r0,8(r1) ;save return address + stwu r1,-(2*4+16+24)(r1) ;allocate 16 bytes for locals/parameters + +;create heap space (stack + register storage) + addi r31,r3,1024-24 ;subtract space for linkage + mr r30,r4 ;GPR30 = coentry + addi r3,r3,1024 ;allocate extra memory for contextual info + bl L_malloc$stub ;GPR3 = malloc(heapsize + 1024) + add r4,r3,r31 ;GPR4 points to top-of-stack + rlwinm r5,r4,0,0,27 ;force 16-byte alignment + +;store thread entry point + registers, so that first call to co_switch will execute coentry + stw r30,8(r5) ;store entry point + addi r6,0,2+19+18*2+12*4+1 ;clear for CR, old GPR1, 19 GPRs, 18 FPRs, 12 VRs, VRSAVE + addi r0,0,0 + addi r7,0,4 ;start at 4(GPR5) + mtctr r6 +L_co_create$clear_loop: + stwx r0,r5,r7 ;clear a word + addi r7,r7,-4 ;increment pointer + bdnz L_co_create$clear_loop ;loop + stwu r5,-448(r5) ;store top of stack + +;initialize context memory heap and return + stw r5,0(r3) ;*cothread_t = stack heap pointer (GPR1) + lwz r1,0(r1) ;deallocate stack frame + lwz r8,8(r1) ;fetch return address + lmw r30,-8(r1) ;restore GPR30 and GPR31 + mtlr r8 ;return address in LR + blr ;return + + +;***** +;extern "C" void co_delete(cothread_t cothread); +;GPR3 = cothread +;***** + +_co_delete: + b L_free$stub ;free(GPR3) + + +;***** +;extern "C" void co_switch(cothread_t cothread); +;GPR3 = cothread +;***** +; +;Frame looks like: +; +;Old New Value +; 8(r1) 456(r1) Saved LR +; 4(r1) 452(r1) Saved CR +; 0(r1) 448(r1) Old GPR1 +; -4(r1) 444(r1) Saved GPR31 +; -8(r1) 440(r1) Saved GPR30 +;... ... ... +; -72(r1) 376(r1) Saved GPR14 +; -76(r1) 372(r1) Saved GPR13 +; -80(r1) 368(r1) Saved VRSAVE +; -84(r1) 364(r1) +++ +; -88(r1) 360(r1) Saved FPR31 +; -92(r1) 356(r1) +++ +; -96(r1) 352(r1) Saved FPR30 +;... ... ... +;-212(r1) 236(r1) +++ +;-216(r1) 232(r1) Saved FPR15 +;-220(r1) 228(r1) +++ +;-224(r1) 224(r1) Saved FPR14 +;-228(r1) 220(r1) +++ value +;-232(r1) 216(r1) +++ len +;-236(r1) 212(r1) +++ +;-240(r1) 208(r1) Saved VR31 +;-244(r1) 204(r1) +++ +;-248(r1) 200(r1) +++ +;-252(r1) 196(r1) +++ +;-256(r1) 192(r1) Saved VR30 +;... ... ... +;-388(r1) 60(r1) +++ +;-392(r1) 56(r1) +++ +;-396(r1) 52(r1) +++ +;-400(r1) 48(r1) Saved VR21 +;-404(r1) 44(r1) +++ +;-408(r1) 40(r1) +++ Param 5 (GPR7) +;-412(r1) 36(r1) +++ Param 4 (GPR6) +;-416(r1) 32(r1) Saved VR20 Param 3 (GPR5) +;-420(r1) 28(r1) - Param 2 (GPR4) +;-424(r1) 24(r1) - Param 1 (GPR3) +;-428(r1) 20(r1) - Reserved +;-432(r1) 16(r1) - Reserved +;-436(r1) 12(r1) - Reserved +;-440(r1) 8(r1) - New LR +;-444(r1) 4(r1) - New CR +;-448(r1) 0(r1) Saved GPR1 + + +_co_switch: + stmw r13,-76(r1) ;save preserved GPRs + stfd f14,-224(r1) ;save preserved FPRs + stfd f15,-216(r1) + stfd f16,-208(r1) + stfd f17,-200(r1) + stfd f18,-192(r1) + stfd f19,-184(r1) + stfd f20,-176(r1) + stfd f21,-168(r1) + stfd f22,-160(r1) + stfd f23,-152(r1) + stfd f24,-144(r1) + stfd f25,-136(r1) + stfd f26,-128(r1) + stfd f27,-120(r1) + stfd f28,-112(r1) + stfd f29,-104(r1) + stfd f30,-96(r1) + stfd f31,-88(r1) + mflr r0 ;save return address + stw r0,8(r1) + mfcr r2 ;save condition codes + stw r2,4(r1) + stwu r1,-448(r1) ;create stack frame (save 19 GPRs, 18 FRPs, 12 VRs, VRSAVE) + + mr r30,r3 ;save new context pointer + bcl 20,31,L_co_switch$spb ;get address of co_active_context +L_co_switch$spb: + mflr r31 + + addis r29,r31,ha16(_co_environ-L_co_switch$spb) ;get environment flags + lwz r8,lo16(_co_environ-L_co_switch$spb)(r29) + andis. r9,r8,0x8000 ;is it initialised? + bne+ L_co_switch$initialised + + addi r0,0,4 ;len = sizeof(int) + stw r0,216(r1) + addis r3,r31,ha16(_sysctl_altivec-L_co_switch$spb) ;GPR3 = "hw.optional.altivec" + addi r3,r3,lo16(_sysctl_altivec-L_co_switch$spb) + addi r4,r1,220 ;GPR4 = &value + addi r5,r1,216 ;GPR5 = &len + addi r6,0,0 ;newp = 0 + addi r7,0,0 ;newlen = 0 + bl L_sysctlbyname$stub ;call sysctlbyname + lwz r2,220(r1) ;fetch result + addis r8,0,0x8000 ;set initialised bit + cmpwi cr5,r3,0 ;assume error means not present + cmpwi cr6,r2,0 ;test result + blt- cr5,L_co_switch$store_environ + beq cr6,L_co_switch$store_environ + oris r8,r8,0x4000 ;set the flag to say we have it! +L_co_switch$store_environ: + stw r8,lo16(_co_environ-L_co_switch$spb)(r29) ;store environment flags +L_co_switch$initialised: + + andis. r10,r8,0x4000 ;do we have Altivec/VMX? + beq L_co_switch$save_no_vmx + mfspr r11,256 ;save VRSAVE + andi. r0,r11,0x0FFF ;short-circuit if it's zero + stw r11,368(r1) + beq L_co_switch$save_no_vmx + andi. r0,r11,0x0800 ;check bit 20 + addi r2,0,32 ;starting index + beq L_co_switch$save_skip_vr20 + stvx v20,r1,r2 ;save VR20 +L_co_switch$save_skip_vr20: + addi r2,r2,16 ;stride + andi. r0,r11,0x0400 ;check bit 21 + beq L_co_switch$save_skip_vr21 + stvx v21,r1,r2 ;save VR21 +L_co_switch$save_skip_vr21: + addi r2,r2,16 ;stride + andi. r0,r11,0x0200 ;check bit 22 + beq L_co_switch$save_skip_vr22 + stvx v22,r1,r2 ;save VR22 +L_co_switch$save_skip_vr22: + addi r2,r2,16 ;stride + andi. r0,r11,0x0100 ;check bit 23 + beq L_co_switch$save_skip_vr23 + stvx v23,r1,r2 ;save VR23 +L_co_switch$save_skip_vr23: + addi r2,r2,16 ;stride + andi. r0,r11,0x0080 ;check bit 24 + beq L_co_switch$save_skip_vr24 + stvx v24,r1,r2 ;save VR24 +L_co_switch$save_skip_vr24: + addi r2,r2,16 ;stride + andi. r0,r11,0x0040 ;check bit 25 + beq L_co_switch$save_skip_vr25 + stvx v25,r1,r2 ;save VR25 +L_co_switch$save_skip_vr25: + addi r2,r2,16 ;stride + andi. r0,r11,0x0020 ;check bit 26 + beq L_co_switch$save_skip_vr26 + stvx v26,r1,r2 ;save VR26 +L_co_switch$save_skip_vr26: + addi r2,r2,16 ;stride + andi. r0,r11,0x0010 ;check bit 27 + beq L_co_switch$save_skip_vr27 + stvx v27,r1,r2 ;save VR27 +L_co_switch$save_skip_vr27: + addi r2,r2,16 ;stride + andi. r0,r11,0x0008 ;check bit 28 + beq L_co_switch$save_skip_vr28 + stvx v28,r1,r2 ;save VR28 +L_co_switch$save_skip_vr28: + addi r2,r2,16 ;stride + andi. r0,r11,0x0004 ;check bit 29 + beq L_co_switch$save_skip_vr29 + stvx v29,r1,r2 ;save VR29 +L_co_switch$save_skip_vr29: + addi r2,r2,16 ;stride + andi. r0,r11,0x0002 ;check bit 30 + beq L_co_switch$save_skip_vr30 + stvx v30,r1,r2 ;save VR30 +L_co_switch$save_skip_vr30: + addi r2,r2,16 ;stride + andi. r0,r11,0x0001 ;check bit 31 + beq L_co_switch$save_skip_vr31 + stvx v31,r1,r2 ;save VR31 +L_co_switch$save_skip_vr31: +L_co_switch$save_no_vmx: + + addis r4,r31,ha16(_co_active_context-L_co_switch$spb) ;save current context + lwz r5,lo16(_co_active_context-L_co_switch$spb)(r4) + stw r30,lo16(_co_active_context-L_co_switch$spb)(r4);set new context + stw r1,0(r5) ;save current stack pointer + lwz r1,0(r30) ;get new stack pointer + + andis. r10,r8,0x4000 ;do we have Altivec/VMX? + beq L_co_switch$restore_no_vmx + lwz r11,368(r1) ;restore VRSAVE + andi. r0,r11,0x0FFF ;short-circuit if it's zero + mtspr 256,r11 + beq L_co_switch$restore_no_vmx + andi. r0,r11,0x0800 ;check bit 20 + addi r2,0,32 ;starting index + beq L_co_switch$restore_skip_vr20 + lvx v20,r1,r2 ;restore VR20 +L_co_switch$restore_skip_vr20: + addi r2,r2,16 ;stride + andi. r0,r11,0x0400 ;check bit 21 + beq L_co_switch$restore_skip_vr21 + lvx v21,r1,r2 ;restore VR21 +L_co_switch$restore_skip_vr21: + addi r2,r2,16 ;stride + andi. r0,r11,0x0200 ;check bit 22 + beq L_co_switch$restore_skip_vr22 + lvx v22,r1,r2 ;restore VR22 +L_co_switch$restore_skip_vr22: + addi r2,r2,16 ;stride + andi. r0,r11,0x0100 ;check bit 23 + beq L_co_switch$restore_skip_vr23 + lvx v23,r1,r2 ;restore VR23 +L_co_switch$restore_skip_vr23: + addi r2,r2,16 ;stride + andi. r0,r11,0x0080 ;check bit 24 + beq L_co_switch$restore_skip_vr24 + lvx v24,r1,r2 ;restore VR24 +L_co_switch$restore_skip_vr24: + addi r2,r2,16 ;stride + andi. r0,r11,0x0040 ;check bit 25 + beq L_co_switch$restore_skip_vr25 + lvx v25,r1,r2 ;restore VR25 +L_co_switch$restore_skip_vr25: + addi r2,r2,16 ;stride + andi. r0,r11,0x0020 ;check bit 26 + beq L_co_switch$restore_skip_vr26 + lvx v26,r1,r2 ;restore VR26 +L_co_switch$restore_skip_vr26: + addi r2,r2,16 ;stride + andi. r0,r11,0x0010 ;check bit 27 + beq L_co_switch$restore_skip_vr27 + lvx v27,r1,r2 ;restore VR27 +L_co_switch$restore_skip_vr27: + addi r2,r2,16 ;stride + andi. r0,r11,0x0008 ;check bit 28 + beq L_co_switch$restore_skip_vr28 + lvx v28,r1,r2 ;restore VR28 +L_co_switch$restore_skip_vr28: + addi r2,r2,16 ;stride + andi. r0,r11,0x0004 ;check bit 29 + beq L_co_switch$restore_skip_vr29 + lvx v29,r1,r2 ;restore VR29 +L_co_switch$restore_skip_vr29: + addi r2,r2,16 ;stride + andi. r0,r11,0x0002 ;check bit 30 + beq L_co_switch$restore_skip_vr30 + lvx v30,r1,r2 ;restore VR30 +L_co_switch$restore_skip_vr30: + addi r2,r2,16 ;stride + andi. r0,r11,0x0001 ;check bit 31 + beq L_co_switch$restore_skip_vr31 + lvx v31,r1,r2 ;restore VR31 +L_co_switch$restore_skip_vr31: +L_co_switch$restore_no_vmx: + + lwz r1,0(r1) ;deallocate stack frame + lwz r6,8(r1) ;return address in GPR6 + lwz r7,4(r1) ;condition codes in GPR7 + addi r0,0,0 ;make thread main crash if it returns + lmw r13,-76(r1) ;restore preserved GPRs + lfd f14,-224(r1) ;restore preserved FPRs + lfd f15,-216(r1) + lfd f16,-208(r1) + lfd f17,-200(r1) + lfd f18,-192(r1) + lfd f19,-184(r1) + lfd f20,-176(r1) + lfd f21,-168(r1) + lfd f22,-160(r1) + lfd f23,-152(r1) + lfd f24,-144(r1) + lfd f25,-136(r1) + lfd f26,-128(r1) + lfd f27,-120(r1) + lfd f28,-112(r1) + lfd f29,-104(r1) + lfd f30,-96(r1) + lfd f31,-88(r1) + mtlr r0 + mtctr r6 ;restore return address + mtcrf 32,r7 ;restore preserved condition codes + mtcrf 16,r7 + mtcrf 8,r7 + bctr ;return + + + +;Import external functions + + .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 + .align 5 +L_malloc$stub: + .indirect_symbol _malloc + mflr r0 + bcl 20,31,L_malloc$spb +L_malloc$spb: + mflr r11 + addis r11,r11,ha16(L_malloc$lazy_ptr-L_malloc$spb) + mtlr r0 + lwzu r12,lo16(L_malloc$lazy_ptr-L_malloc$spb)(r11) + mtctr r12 + bctr + .lazy_symbol_pointer +L_malloc$lazy_ptr: + .indirect_symbol _malloc + .long dyld_stub_binding_helper + + + .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 + .align 5 +L_free$stub: + .indirect_symbol _free + mflr r0 + bcl 20,31,L_free$spb +L_free$spb: + mflr r11 + addis r11,r11,ha16(L_free$lazy_ptr-L_free$spb) + mtlr r0 + lwzu r12,lo16(L_free$lazy_ptr-L_free$spb)(r11) + mtctr r12 + bctr + .lazy_symbol_pointer +L_free$lazy_ptr: + .indirect_symbol _free + .long dyld_stub_binding_helper + + + .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 + .align 5 +L_sysctlbyname$stub: + .indirect_symbol _sysctlbyname + mflr r0 + bcl 20,31,L_sysctlbyname$spb +L_sysctlbyname$spb: + mflr r11 + addis r11,r11,ha16(L_sysctlbyname$lazy_ptr-L_sysctlbyname$spb) + mtlr r0 + lwzu r12,lo16(L_sysctlbyname$lazy_ptr-L_sysctlbyname$spb)(r11) + mtctr r12 + bctr + .lazy_symbol_pointer +L_sysctlbyname$lazy_ptr: + .indirect_symbol _sysctlbyname + .long dyld_stub_binding_helper + + +;This needs to be here! + + .subsections_via_symbols + diff --git a/libco.ppc64.s b/libco.ppc64.s new file mode 100644 index 00000000..2fb048d7 --- /dev/null +++ b/libco.ppc64.s @@ -0,0 +1,513 @@ +;***** +;libco.ppc64 (2007-12-05) +;author: Vas Crabb +;license: public domain +; +;cross-platform 64-bit PowerPC implementation of libco +;special thanks to byuu for writing the original version +; +;[ABI compatibility] +;- gcc; mac os x; ppc64 +; +;[nonvolatile registers] +;- GPR1, GPR13 - GPR31 +;- FPR14 - FPR31 +;- V20 - V31 +;- VRSAVE, CR2 - CR4 +; +;[volatile registers] +;- GPR0, GPR2 - GPR12 +;- FPR0 - FPR13 +;- V0 - V19 +;- LR, CTR, XER, CR0, CR1, CR5 - CR7 +;***** + + +;Declare some target-specific stuff + + .section __TEXT,__text,regular,pure_instructions + .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 + .machine ppc64 + + +;Constants + + .cstring + .align 3 + +_sysctl_altivec: + .ascii "hw.optional.altivec\0" + + +;Declare space for variables + +.lcomm _co_environ,4,2 ;bit 0 = initialised, bit 1 = have Altivec/VMX +.lcomm _co_primary_buffer,1024,3 ;buffer (will be zeroed by loader) + + .data + .align 3 + +_co_active_context: + .quad _co_primary_buffer + + + .text + .align 2 + + +;Declare exported names + +.globl _co_active +.globl _co_create +.globl _co_delete +.globl _co_switch + + +;***** +;extern "C" cothread_t co_active(); +;return = GPR3 +;***** + +_co_active: + mflr r0 ;GPR0 = return address + bcl 20,31,L_co_active$spb +L_co_active$spb: + mflr r2 ;GPR2 set for position-independance + addis r3,r2,ha16(_co_active_context-L_co_active$spb) ;get value in GPR3 + ld r3,lo16(_co_active_context-L_co_active$spb)(r3) + mtlr r0 ;LR = return address + blr ;return + + +;***** +;extern "C" cothread_t co_create(unsigned int heapsize, void (*coentry)()); +;GPR3 = heapsize +;GPR4 = coentry +;return = GPR3 +;***** + +_co_create: + mflr r0 ;GPR0 = return address + std r30,-16(r1) ;save GPR30 and GPR31 + std r31,-8(r1) + std r0,16(r1) ;save return address + stdu r1,-(2*8+16+48)(r1) ;allocate 16 bytes for locals/parameters + +;create heap space (stack + register storage) + addi r31,r3,1024-48 ;subtract space for linkage + mr r30,r4 ;GPR30 = coentry + addi r3,r3,1024 ;allocate extra memory for contextual info + bl L_malloc$stub ;GPR3 = malloc(heapsize + 1024) + add r4,r3,r31 ;GPR4 points to top-of-stack + rldicr r5,r4,0,59 ;force 16-byte alignment + +;store thread entry point + registers, so that first call to co_switch will execute coentry + std r30,16(r5) ;store entry point + addi r6,0,2+19+18+12*2+1 ;clear for CR, old GPR1, 19 GPRs, 18 FPRs, 12 VRs, VRSAVE + addi r0,0,0 + addi r7,0,8 ;start at 8(GPR5) + mtctr r6 +L_co_create$clear_loop: + stdx r0,r5,r7 ;clear a double + addi r7,r7,-8 ;increment pointer + bdnz L_co_create$clear_loop ;loop + stdu r5,-544(r5) ;store top of stack + +;initialize context memory heap and return + addis r9,0,0x8000 ;GPR13 not set (system TLS) + std r5,0(r3) ;*cothread_t = stack heap pointer (GPR1) + stw r9,8(r3) ;this is a flag word + ld r1,0(r1) ;deallocate stack frame + ld r8,16(r1) ;fetch return address + ld r30,-16(r1) ;restore GPR30 and GPR31 + ld r31,-8(r1) + mtlr r8 ;return address in LR + blr ;return + + +;***** +;extern "C" void co_delete(cothread_t cothread); +;GPR3 = cothread +;***** + +_co_delete: + b L_free$stub ;free(GPR3) + + +;***** +;extern "C" void co_switch(cothread_t cothread); +;GPR3 = cothread +;***** +; +;Frame looks like: +; +;Old New Value +; 16(r1) 560(r1) Saved LR +; 8(r1) 552(r1) Saved CR +; 0(r1) 544(r1) Old GPR1 +; -8(r1) 536(r1) Saved GPR31 +; -16(r1) 528(r1) Saved GPR30 +;... ... ... +;-144(r1) 400(r1) Saved GPR14 +;-152(r1) 392(r1) Saved GPR13 +;-160(r1) 384(r1) Saved FPR31 +;-168(r1) 376(r1) Saved FPR30 +;... ... ... +;-288(r1) 256(r1) Saved FPR15 +;-296(r1) 248(r1) Saved FPR14 +;-304(r1) 240(r1) Saved VRSAVE +;-312(r1) 232(r1) +++ value +;-320(r1) 224(r1) Saved VR31 len +;-328(r1) 216(r1) +++ +;-336(r1) 208(r1) Saved VR30 +;... ... ... +;-456(r1) 88(r1) +++ +;-464(r1) 80(r1) Saved VR22 Param 5 (GPR7) +;-472(r1) 72(r1) +++ Param 4 (GPR6) +;-480(r1) 64(r1) Saved VR21 Param 3 (GPR5) +;-488(r1) 56(r1) +++ Param 2 (GPR4) +;-496(r1) 48(r1) Saved VR20 Param 1 (GPR3) +;-504(r1) 40(r1) - Reserved +;-512(r1) 32(r1) - Reserved +;-520(r1) 24(r1) - Reserved +;-528(r1) 16(r1) - New LR +;-536(r1) 8(r1) - New CR +;-544(r1) 0(r1) Saved GPR1 + + +_co_switch: + std r13,-152(r1) ;save preserved GPRs + std r14,-144(r1) + std r15,-136(r1) + std r16,-128(r1) + std r17,-120(r1) + std r18,-112(r1) + std r19,-104(r1) + std r20,-96(r1) + std r21,-88(r1) + std r22,-80(r1) + std r23,-72(r1) + std r24,-64(r1) + std r25,-56(r1) + std r26,-48(r1) + std r27,-40(r1) + std r28,-32(r1) + std r29,-24(r1) + std r30,-16(r1) + std r31,-8(r1) + mflr r0 ;save return address + std r0,16(r1) + mfcr r2 ;save condition codes + stw r2,8(r1) + stdu r1,-544(r1) ;create stack frame (save 19 GPRs, 18 FRPs, 12 VRs, VRSAVE) + stfd f14,248(r1) ;save preserved FPRs + stfd f15,256(r1) + stfd f16,264(r1) + stfd f17,272(r1) + stfd f18,280(r1) + stfd f19,288(r1) + stfd f20,296(r1) + stfd f21,304(r1) + stfd f22,312(r1) + stfd f23,320(r1) + stfd f24,328(r1) + stfd f25,336(r1) + stfd f26,344(r1) + stfd f27,352(r1) + stfd f28,360(r1) + stfd f29,368(r1) + stfd f30,376(r1) + stfd f31,384(r1) + + mr r30,r3 ;save new context pointer + bcl 20,31,L_co_switch$spb ;get address of co_active_context +L_co_switch$spb: + mflr r31 + + addis r29,r31,ha16(_co_environ-L_co_switch$spb) ;get environment flags + lwz r8,lo16(_co_environ-L_co_switch$spb)(r29) + andis. r9,r8,0x8000 ;is it initialised? + bne+ L_co_switch$initialised + + addi r0,0,4 ;len = sizeof(int) + std r0,224(r1) + addis r3,r31,ha16(_sysctl_altivec-L_co_switch$spb) ;GPR3 = "hw.optional.altivec" + addi r3,r3,lo16(_sysctl_altivec-L_co_switch$spb) + addi r4,r1,232 ;GPR4 = &value + addi r5,r1,224 ;GPR5 = &len + addi r6,0,0 ;newp = 0 + addi r7,0,0 ;newlen = 0 + bl L_sysctlbyname$stub ;call sysctlbyname + lwz r2,232(r1) ;fetch result + addis r8,0,0x8000 ;set initialised bit + cmpdi cr5,r3,0 ;assume error means not present + cmpwi cr6,r2,0 ;test result + blt- cr5,L_co_switch$store_environ + beq cr6,L_co_switch$store_environ + oris r8,r8,0x4000 ;set the flag to say we have it! +L_co_switch$store_environ: + stw r8,lo16(_co_environ-L_co_switch$spb)(r29) ;store environment flags +L_co_switch$initialised: + + andis. r10,r8,0x4000 ;do we have Altivec/VMX? + beq L_co_switch$save_no_vmx + mfspr r11,256 ;save VRSAVE + andi. r0,r11,0x0FFF ;short-circuit if it's zero + stw r11,240(r1) + beq L_co_switch$save_no_vmx + andi. r0,r11,0x0800 ;check bit 20 + addi r2,0,48 ;starting index + beq L_co_switch$save_skip_vr20 + stvx v20,r1,r2 ;save VR20 +L_co_switch$save_skip_vr20: + addi r2,r2,16 ;stride + andi. r0,r11,0x0400 ;check bit 21 + beq L_co_switch$save_skip_vr21 + stvx v21,r1,r2 ;save VR21 +L_co_switch$save_skip_vr21: + addi r2,r2,16 ;stride + andi. r0,r11,0x0200 ;check bit 22 + beq L_co_switch$save_skip_vr22 + stvx v22,r1,r2 ;save VR22 +L_co_switch$save_skip_vr22: + addi r2,r2,16 ;stride + andi. r0,r11,0x0100 ;check bit 23 + beq L_co_switch$save_skip_vr23 + stvx v23,r1,r2 ;save VR23 +L_co_switch$save_skip_vr23: + addi r2,r2,16 ;stride + andi. r0,r11,0x0080 ;check bit 24 + beq L_co_switch$save_skip_vr24 + stvx v24,r1,r2 ;save VR24 +L_co_switch$save_skip_vr24: + addi r2,r2,16 ;stride + andi. r0,r11,0x0040 ;check bit 25 + beq L_co_switch$save_skip_vr25 + stvx v25,r1,r2 ;save VR25 +L_co_switch$save_skip_vr25: + addi r2,r2,16 ;stride + andi. r0,r11,0x0020 ;check bit 26 + beq L_co_switch$save_skip_vr26 + stvx v26,r1,r2 ;save VR26 +L_co_switch$save_skip_vr26: + addi r2,r2,16 ;stride + andi. r0,r11,0x0010 ;check bit 27 + beq L_co_switch$save_skip_vr27 + stvx v27,r1,r2 ;save VR27 +L_co_switch$save_skip_vr27: + addi r2,r2,16 ;stride + andi. r0,r11,0x0008 ;check bit 28 + beq L_co_switch$save_skip_vr28 + stvx v28,r1,r2 ;save VR28 +L_co_switch$save_skip_vr28: + addi r2,r2,16 ;stride + andi. r0,r11,0x0004 ;check bit 29 + beq L_co_switch$save_skip_vr29 + stvx v29,r1,r2 ;save VR29 +L_co_switch$save_skip_vr29: + addi r2,r2,16 ;stride + andi. r0,r11,0x0002 ;check bit 30 + beq L_co_switch$save_skip_vr30 + stvx v30,r1,r2 ;save VR30 +L_co_switch$save_skip_vr30: + addi r2,r2,16 ;stride + andi. r0,r11,0x0001 ;check bit 31 + beq L_co_switch$save_skip_vr31 + stvx v31,r1,r2 ;save VR31 +L_co_switch$save_skip_vr31: +L_co_switch$save_no_vmx: + + addis r4,r31,ha16(_co_active_context-L_co_switch$spb) ;save current context + ld r5,lo16(_co_active_context-L_co_switch$spb)(r4) + std r30,lo16(_co_active_context-L_co_switch$spb)(r4);set new context + std r1,0(r5) ;save current stack pointer + ld r1,0(r30) ;get new stack pointer + lwz r12,8(r30) ;have we already set GPR13 (system TLS)? + andis. r0,r12,0x8000 + beq+ L_co_switch$gpr13_set + std r13,392(r1) + xoris r12,r12,0x8000 + stw r12,8(r30) +L_co_switch$gpr13_set: + + andis. r10,r8,0x4000 ;do we have Altivec/VMX? + beq L_co_switch$restore_no_vmx + lwz r11,240(r1) ;restore VRSAVE + andi. r0,r11,0x0FFF ;short-circuit if it's zero + mtspr 256,r11 + beq L_co_switch$restore_no_vmx + andi. r0,r11,0x0800 ;check bit 20 + addi r2,0,48 ;starting index + beq L_co_switch$restore_skip_vr20 + lvx v20,r1,r2 ;restore VR20 +L_co_switch$restore_skip_vr20: + addi r2,r2,16 ;stride + andi. r0,r11,0x0400 ;check bit 21 + beq L_co_switch$restore_skip_vr21 + lvx v21,r1,r2 ;restore VR21 +L_co_switch$restore_skip_vr21: + addi r2,r2,16 ;stride + andi. r0,r11,0x0200 ;check bit 22 + beq L_co_switch$restore_skip_vr22 + lvx v22,r1,r2 ;restore VR22 +L_co_switch$restore_skip_vr22: + addi r2,r2,16 ;stride + andi. r0,r11,0x0100 ;check bit 23 + beq L_co_switch$restore_skip_vr23 + lvx v23,r1,r2 ;restore VR23 +L_co_switch$restore_skip_vr23: + addi r2,r2,16 ;stride + andi. r0,r11,0x0080 ;check bit 24 + beq L_co_switch$restore_skip_vr24 + lvx v24,r1,r2 ;restore VR24 +L_co_switch$restore_skip_vr24: + addi r2,r2,16 ;stride + andi. r0,r11,0x0040 ;check bit 25 + beq L_co_switch$restore_skip_vr25 + lvx v25,r1,r2 ;restore VR25 +L_co_switch$restore_skip_vr25: + addi r2,r2,16 ;stride + andi. r0,r11,0x0020 ;check bit 26 + beq L_co_switch$restore_skip_vr26 + lvx v26,r1,r2 ;restore VR26 +L_co_switch$restore_skip_vr26: + addi r2,r2,16 ;stride + andi. r0,r11,0x0010 ;check bit 27 + beq L_co_switch$restore_skip_vr27 + lvx v27,r1,r2 ;restore VR27 +L_co_switch$restore_skip_vr27: + addi r2,r2,16 ;stride + andi. r0,r11,0x0008 ;check bit 28 + beq L_co_switch$restore_skip_vr28 + lvx v28,r1,r2 ;restore VR28 +L_co_switch$restore_skip_vr28: + addi r2,r2,16 ;stride + andi. r0,r11,0x0004 ;check bit 29 + beq L_co_switch$restore_skip_vr29 + lvx v29,r1,r2 ;restore VR29 +L_co_switch$restore_skip_vr29: + addi r2,r2,16 ;stride + andi. r0,r11,0x0002 ;check bit 30 + beq L_co_switch$restore_skip_vr30 + lvx v30,r1,r2 ;restore VR30 +L_co_switch$restore_skip_vr30: + addi r2,r2,16 ;stride + andi. r0,r11,0x0001 ;check bit 31 + beq L_co_switch$restore_skip_vr31 + lvx v31,r1,r2 ;restore VR31 +L_co_switch$restore_skip_vr31: +L_co_switch$restore_no_vmx: + + lfd f14,248(r1) ;restore preserved FPRs + lfd f15,256(r1) + lfd f16,264(r1) + lfd f17,272(r1) + lfd f18,280(r1) + lfd f19,288(r1) + lfd f20,296(r1) + lfd f21,304(r1) + lfd f22,312(r1) + lfd f23,320(r1) + lfd f24,328(r1) + lfd f25,336(r1) + lfd f26,344(r1) + lfd f27,352(r1) + lfd f28,360(r1) + lfd f29,368(r1) + lfd f30,376(r1) + lfd f31,384(r1) + addi r0,0,0 ;make thread main crash if it returns + ld r1,0(r1) ;deallocate stack frame + ld r6,16(r1) ;return address in GPR6 + lwz r7,8(r1) ;condition codes in GPR7 + ld r13,-152(r1) ;restore preserved GPRs + ld r14,-144(r1) + ld r15,-136(r1) + ld r16,-128(r1) + ld r17,-120(r1) + ld r18,-112(r1) + ld r19,-104(r1) + ld r20,-96(r1) + ld r21,-88(r1) + ld r22,-80(r1) + ld r23,-72(r1) + ld r24,-64(r1) + ld r25,-56(r1) + ld r26,-48(r1) + ld r27,-40(r1) + ld r28,-32(r1) + ld r29,-24(r1) + ld r30,-16(r1) + ld r31,-8(r1) + mtlr r0 + mtctr r6 ;restore return address + mtcrf 32,r7 ;restore preserved condition codes + mtcrf 16,r7 + mtcrf 8,r7 + bctr ;return + + + +;Import external functions + + .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 + .align 5 +L_malloc$stub: + .indirect_symbol _malloc + mflr r0 + bcl 20,31,L_malloc$spb +L_malloc$spb: + mflr r11 + addis r11,r11,ha16(L_malloc$lazy_ptr-L_malloc$spb) + mtlr r0 + ldu r12,lo16(L_malloc$lazy_ptr-L_malloc$spb)(r11) + mtctr r12 + bctr + .lazy_symbol_pointer +L_malloc$lazy_ptr: + .indirect_symbol _malloc + .quad dyld_stub_binding_helper + + + .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 + .align 5 +L_free$stub: + .indirect_symbol _free + mflr r0 + bcl 20,31,L_free$spb +L_free$spb: + mflr r11 + addis r11,r11,ha16(L_free$lazy_ptr-L_free$spb) + mtlr r0 + ldu r12,lo16(L_free$lazy_ptr-L_free$spb)(r11) + mtctr r12 + bctr + .lazy_symbol_pointer +L_free$lazy_ptr: + .indirect_symbol _free + .quad dyld_stub_binding_helper + + + .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 + .align 5 +L_sysctlbyname$stub: + .indirect_symbol _sysctlbyname + mflr r0 + bcl 20,31,L_sysctlbyname$spb +L_sysctlbyname$spb: + mflr r11 + addis r11,r11,ha16(L_sysctlbyname$lazy_ptr-L_sysctlbyname$spb) + mtlr r0 + ldu r12,lo16(L_sysctlbyname$lazy_ptr-L_sysctlbyname$spb)(r11) + mtctr r12 + bctr + .lazy_symbol_pointer +L_sysctlbyname$lazy_ptr: + .indirect_symbol _sysctlbyname + .quad dyld_stub_binding_helper + + +;This needs to be here! + + .subsections_via_symbols + diff --git a/libco.ucontext.cpp b/libco.ucontext.cpp new file mode 100644 index 00000000..da236326 --- /dev/null +++ b/libco.ucontext.cpp @@ -0,0 +1,80 @@ +/* + libco.ucontext (2007-09-08) + author: byuu + license: public domain +*/ + +#include +#include +#include "../libco.h" + +//WARNING: the overhead of POSIX ucontext is very high, +//averaging ~450x that of standard subroutine calls. +//(tested on FreeBSD 6.2-RELEASE) +//By contrast, on the same system, libco_x86's overhead +//is ~7.25x standard subroutine calls; or fifty times faster. +// +//This library only exists for two reasons: +//1 - as an initial test for the viability of a ucontext implementation +//2 - to demonstrate the power and speed of libco over existing implementations, +// such as pth (which defaults to wrapping ucontext on unix targets) +// +//Use this library only as a *last resort* + +struct cothread_struct { + ucontext_t cohandle; + void (*coentry)(); +}; + +cothread_t __co_active = 0, __co_primary = 0; +void co_entrypoint(cothread_t cothread); +void co_init(); + +/***** + * library functions + *****/ + +cothread_t co_active() { + if(__co_primary == 0)co_init(); + return __co_active; +} + +cothread_t co_create(unsigned int heapsize, void (*coentry)()) { + if(__co_primary == 0)co_init(); +cothread_struct *thread = (cothread_struct*)malloc(sizeof(cothread_struct)); + thread->coentry = coentry; + getcontext(&thread->cohandle); + heapsize += 512; + thread->cohandle.uc_stack.ss_sp = (char*)malloc(heapsize); + thread->cohandle.uc_stack.ss_size = heapsize; + makecontext(&thread->cohandle, (void (*)())co_entrypoint, 1, thread); + return (cothread_t)thread; +} + +void co_delete(cothread_t cothread) { +cothread_struct *thread = (cothread_struct*)cothread; + free(thread->cohandle.uc_stack.ss_sp); + free(thread); +} + +void co_switch(cothread_t cothread) { +cothread_struct *active = (cothread_struct*)__co_active; +cothread_struct *swap = (cothread_struct*)cothread; + __co_active = cothread; + swapcontext(&active->cohandle, &swap->cohandle); +} + +/***** + * internal functions + *****/ + +void co_entrypoint(cothread_t cothread) { + ((cothread_struct*)cothread)->coentry(); +} + +void co_init() { +cothread_struct *thread = (cothread_struct*)malloc(sizeof(cothread_struct)); + thread->coentry = 0; + getcontext(&thread->cohandle); + __co_active = __co_primary = (cothread_t)thread; +} diff --git a/libco.win.cpp b/libco.win.cpp new file mode 100644 index 00000000..f0d16626 --- /dev/null +++ b/libco.win.cpp @@ -0,0 +1,66 @@ +/* + libco.win (2007-09-08) + author: byuu + license: public domain +*/ + +#define WINVER 0x0400 +#define _WIN32_WINNT 0x0400 +#include +#include "../libco.h" + +struct cothread_struct { + void *cohandle; + void (*coentry)(); +}; + +cothread_t __co_active = 0, __co_primary = 0; + +void __stdcall co_entryproc(void*); +cothread_t co_init(); + +/***** + * library functions + *****/ + +cothread_t co_active() { + if(__co_primary == 0)co_init(); + return __co_active; +} + +cothread_t co_create(unsigned int heapsize, void (*coentry)()) { + if(__co_primary == 0)co_init(); +cothread_struct *s = (cothread_struct*)malloc(sizeof(cothread_struct)); + s->coentry = coentry; + s->cohandle = CreateFiber(heapsize + 512, co_entryproc, (void*)s); + return (cothread_t)s; +} + +void co_delete(cothread_t cothread) { +cothread_struct *s = (cothread_struct*)cothread; + DeleteFiber(s->cohandle); + free(cothread); +} + +void co_switch(cothread_t cothread) { + __co_active = cothread; +cothread_struct *s = (cothread_struct*)cothread; + SwitchToFiber(s->cohandle); +} + +/***** + * internal functions + *****/ + +void __stdcall co_entryproc(void *cothread) { + ((cothread_struct*)cothread)->coentry(); +} + +cothread_t co_init() { + ConvertThreadToFiber(0); +cothread_struct *s = (cothread_struct*)malloc(sizeof(cothread_struct)); + s->coentry = 0; + s->cohandle = GetCurrentFiber(); + __co_active = __co_primary = (cothread_t)s; + return __co_active; +} diff --git a/libco.x86-64.asm b/libco.x86-64.asm new file mode 100644 index 00000000..051987f0 --- /dev/null +++ b/libco.x86-64.asm @@ -0,0 +1,146 @@ +;***** +;libco.x86-64 (2007-12-11) +;author: byuu +;license: public domain +; +;cross-platform x86-64 implementation of libco +;thanks to Aaron Giles and Joel Yliluoma for various optimizations +;thanks to Lucas Newman and Vas Crabb for assistance with OS X support +; +;[ABI compatibility] +;- SystemV ( http://refspecs.freestandards.org/elf/x86_64-SysV-psABI.pdf ) +;- gcc; mac os x; x86-64 +;- gcc; linux; x86-64 +;- gcc; freebsd; x86-64 +; +;[nonvolatile registers] +;- rsp, rbp, rbx, r12, r13, r14, r15 +; +;[volatile registers] +;- rax, rcx, rdx, r8, r9, r10, r11, rdi, rsi +;- st0 - st7 +;- xmm0 - xmm15 +;***** + +;***** +;linker-specific name decorations +;***** + +%ifdef OSX +%define malloc _malloc +%define free _free + +%define co_active _co_active +%define co_create _co_create +%define co_delete _co_delete +%define co_switch _co_switch +%endif + +bits 64 + +section .bss + +align 8 +co_primary_buffer resb 512 + +section .data + +align 8 +co_active_context dq co_primary_buffer + +section .text + +extern malloc +extern free + +global co_active +global co_create +global co_delete +global co_switch + +;***** +;extern "C" cothread_t co_active(); +;return = rax +;***** + +align 16 +co_active: + mov rax,[co_active_context wrt rip] + ret + +;***** +;extern "C" cothread_t co_create(unsigned int heapsize, void (*coentry)()); +;rdi = heapsize +;rsi = coentry +;return = rax +;***** + +align 16 +co_create: +;create heap space (stack + context) + add rdi,512 ;allocate extra memory for contextual info + + push rdi ;backup volatile registers before malloc call + push rsi + + sub rsp,8 ;SSE 16-byte stack alignment + call malloc ;rax = malloc(rdi) + add rsp,8 + + pop rsi ;restore volatile registers + pop rdi + + add rdi,rax ;set rdi to point to top of stack heap + and rdi,-16 ;force 16-byte alignment of stack heap + +;store thread entry point + registers, so that first call to co_switch will execute coentry + mov qword[rdi-8],0 ;crash if entry point returns + mov qword[rdi-16],rsi ;entry point + mov qword[rdi-24],0 ;r15 + mov qword[rdi-32],0 ;r14 + mov qword[rdi-40],0 ;r13 + mov qword[rdi-48],0 ;r12 + mov qword[rdi-56],0 ;rbx + mov qword[rdi-64],0 ;rbp + sub rdi,64 + +;initialize context memory heap and return + mov [rax],rdi ;*cothread_t = stack heap pointer (rsp) + ret ;return allocated memory block as thread handle + +;***** +;extern "C" void co_delete(cothread_t cothread); +;rdi = cothread +;***** + +align 16 +co_delete: + jmp free ;free(rdi) + +;***** +;extern "C" void co_switch(cothread_t cothread); +;rdi = cothread +;***** + +align 16 +co_switch: + mov rax,[co_active_context wrt rip] ;backup current context + mov [co_active_context wrt rip],rdi ;set new active context + + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + mov [rax],rsp + + mov rsp,[rdi] + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx + pop rbp + + ret diff --git a/libco.x86.asm b/libco.x86.asm new file mode 100644 index 00000000..f151c4a8 --- /dev/null +++ b/libco.x86.asm @@ -0,0 +1,155 @@ +;***** +;libco.x86 (2007-12-11) +;author: byuu +;license: public domain +; +;cross-platform x86 implementation of libco +;thanks to Aaron Giles and Joel Yliluoma for various optimizations +;thanks to Lucas Newman and Vas Crabb for assistance with OS X support +; +;[ABI compatibility] +;- visual c++; windows; x86 +;- mingw; windows; x86 +;- gcc; mac os x; x86 +;- gcc; linux; x86 +;- gcc; freebsd; x86 +; +;[nonvolatile registers] +;- esp, ebp, edi, esi, ebx +; +;[volatile registers] +;- eax, ecx, edx +;- st0 - st7 +;- xmm0 - xmm15 +;***** + +;***** +;linker-specific name decorations +;***** + +%ifdef WIN +%define malloc _malloc +%define free _free + +%define co_active @co_active@0 +%define co_create @co_create@8 +%define co_delete @co_delete@4 +%define co_switch @co_switch@4 +%endif + +%ifdef OSX +%define malloc _malloc +%define free _free + +%define co_active _co_active +%define co_create _co_create +%define co_delete _co_delete +%define co_switch _co_switch +%endif + +bits 32 + +section .bss + +align 4 +co_primary_buffer resb 512 + +section .data + +align 4 +co_active_context dd co_primary_buffer + +section .text + +extern malloc +extern free + +global co_active +global co_create +global co_delete +global co_switch + +;***** +;extern "C" cothread_t fastcall co_active(); +;return = eax +;***** + +align 16 +co_active: + mov eax,[co_active_context] + ret + +;***** +;extern "C" cothread_t fastcall co_create(unsigned int heapsize, void (*coentry)()); +;ecx = heapsize +;edx = coentry +;return = eax +;***** + +align 16 +co_create: +;create heap space (stack + context) + add ecx,512 ;allocate extra memory for contextual info + + push ecx ;backup volatile registers before malloc call + push edx + + push ecx + call malloc ;eax = malloc(ecx) + add esp,4 + + pop edx ;restore volatile registers + pop ecx + + add ecx,eax ;set edx to point to top of stack heap + and ecx,-16 ;force 16-byte alignment of stack heap + +;store thread entry point + registers, so that first call to co_switch will execute coentry + mov dword[ecx-4],0 ;crash if entry point returns + mov dword[ecx-8],edx ;entry point + mov dword[ecx-12],0 ;ebp + mov dword[ecx-16],0 ;esi + mov dword[ecx-20],0 ;edi + mov dword[ecx-24],0 ;ebx + sub ecx,24 + +;initialize context memory heap and return + mov [eax],ecx ;*cothread_t = stack heap pointer (esp) + ret ;return allocated memory block as thread handle + +;***** +;extern "C" void fastcall co_delete(cothread_t cothread); +;ecx = cothread +;***** + +align 16 +co_delete: + sub esp,8 ;SSE 16-byte stack alignment + push ecx + call free ;free(ecx) + add esp,4+8 + ret + +;***** +;extern "C" void fastcall co_switch(cothread_t cothread); +;ecx = cothread +;***** + +align 16 +co_switch: + mov eax,[co_active_context] ;backup current context + mov [co_active_context],ecx ;set new active context + + push ebp + push esi + push edi + push ebx + mov [eax],esp + + mov esp,[ecx] + pop ebx + pop edi + pop esi + pop ebp + + ret From dc692754c3514cc264b9d1a15f071c2c7b0dce71 Mon Sep 17 00:00:00 2001 From: byuu Date: Mon, 4 Feb 2008 16:16:34 +0000 Subject: [PATCH 02/43] Update to bsnes v028 release. Changelog: - OpenGL (with hardware filter mode support) and SDL video drivers added to Linux port - OpenAL (with speed regulation disable support) and OSS audio drivers added to Linux port [Nach] - SDL input driver (with joypad support) added to Linux port - Emulator pause option added - Added option to select behavior of bsnes when idle: allow input, ignore input or pause emulator - Added support to remap common GUI actions to key/joypad presses on the "Input Configuration" screen - bsnes will now clamp the video output size when it is larger than the screen resolution - GUI library has been enhanced, and renamed to hiro - Fullscreen mode now always centers video, rather than approximates - Fullscreen mode now works correctly on Linux/Openbox - Extra layer of abstraction in src/ui has been removed, as GUI lib unifies all ports anyway - Video, audio and input drivers unified into standard library, named ruby - All custom headers have been merged into a new template library, named nall - Makefile rewritten, vastly improved. Allows quick toggling of compiled-in drivers - Makefile: all object files now placed in /src/obj, binary placed in / - libco greatly enhanced, no longer requires an assembler to build [byuu, blargg, Nach] - libco SJLJ driver added; bsnes should now build on any Unix-derivative now (Solaris, OS X, PS3, etc) [Nach] - Fixed register $213e.d4 PPU1 open bus behavior [zones] - Windows port will not activate screensaver while bsnes is running [Nightcrawler] - Visual C++ target no longer requires stdint.h - And lots more -- mostly code refactoring related --- fiber.c | 51 +++++++++++++ libco.ucontext.cpp | 80 -------------------- libco.win.cpp | 66 ----------------- libco.x86-64.asm | 146 ------------------------------------ libco.x86.asm | 155 --------------------------------------- libco.ppc.s => ppc.s | 0 libco.ppc64.s => ppc64.s | 0 sjlj.c | 102 ++++++++++++++++++++++++++ ucontext.c | 67 +++++++++++++++++ x86-64.c | 81 ++++++++++++++++++++ x86.c | 110 +++++++++++++++++++++++++++ 11 files changed, 411 insertions(+), 447 deletions(-) create mode 100644 fiber.c delete mode 100644 libco.ucontext.cpp delete mode 100644 libco.win.cpp delete mode 100644 libco.x86-64.asm delete mode 100644 libco.x86.asm rename libco.ppc.s => ppc.s (100%) rename libco.ppc64.s => ppc64.s (100%) create mode 100644 sjlj.c create mode 100644 ucontext.c create mode 100644 x86-64.c create mode 100644 x86.c diff --git a/fiber.c b/fiber.c new file mode 100644 index 00000000..d1b39586 --- /dev/null +++ b/fiber.c @@ -0,0 +1,51 @@ +/* + libco.win (2008-01-28) + authors: Nach, byuu + license: public domain +*/ + +#define LIBCO_C +#include "../libco.h" +#define WINVER 0x0400 +#define _WIN32_WINNT 0x0400 +#define WIN32_LEAN_AND_MEAN +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static thread_local cothread_t co_active_ = 0; + +static void __stdcall co_thunk(void *coentry) { + ((void (*)(void))coentry)(); +} + +cothread_t co_active() { + if(!co_active_) { + ConvertThreadToFiber(0); + co_active_ = GetCurrentFiber(); + } + return co_active_; +} + +cothread_t co_create(unsigned int heapsize, void (*coentry)(void)) { + if(!co_active_) { + ConvertThreadToFiber(0); + co_active_ = GetCurrentFiber(); + } + return (cothread_t)CreateFiber(heapsize, co_thunk, (void*)coentry); +} + +void co_delete(cothread_t cothread) { + DeleteFiber(cothread); +} + +void co_switch(cothread_t cothread) { + co_active_ = cothread; + SwitchToFiber(cothread); +} + +#ifdef __cplusplus +} +#endif diff --git a/libco.ucontext.cpp b/libco.ucontext.cpp deleted file mode 100644 index da236326..00000000 --- a/libco.ucontext.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/* - libco.ucontext (2007-09-08) - author: byuu - license: public domain -*/ - -#include -#include -#include "../libco.h" - -//WARNING: the overhead of POSIX ucontext is very high, -//averaging ~450x that of standard subroutine calls. -//(tested on FreeBSD 6.2-RELEASE) -//By contrast, on the same system, libco_x86's overhead -//is ~7.25x standard subroutine calls; or fifty times faster. -// -//This library only exists for two reasons: -//1 - as an initial test for the viability of a ucontext implementation -//2 - to demonstrate the power and speed of libco over existing implementations, -// such as pth (which defaults to wrapping ucontext on unix targets) -// -//Use this library only as a *last resort* - -struct cothread_struct { - ucontext_t cohandle; - void (*coentry)(); -}; - -cothread_t __co_active = 0, __co_primary = 0; -void co_entrypoint(cothread_t cothread); -void co_init(); - -/***** - * library functions - *****/ - -cothread_t co_active() { - if(__co_primary == 0)co_init(); - return __co_active; -} - -cothread_t co_create(unsigned int heapsize, void (*coentry)()) { - if(__co_primary == 0)co_init(); -cothread_struct *thread = (cothread_struct*)malloc(sizeof(cothread_struct)); - thread->coentry = coentry; - getcontext(&thread->cohandle); - heapsize += 512; - thread->cohandle.uc_stack.ss_sp = (char*)malloc(heapsize); - thread->cohandle.uc_stack.ss_size = heapsize; - makecontext(&thread->cohandle, (void (*)())co_entrypoint, 1, thread); - return (cothread_t)thread; -} - -void co_delete(cothread_t cothread) { -cothread_struct *thread = (cothread_struct*)cothread; - free(thread->cohandle.uc_stack.ss_sp); - free(thread); -} - -void co_switch(cothread_t cothread) { -cothread_struct *active = (cothread_struct*)__co_active; -cothread_struct *swap = (cothread_struct*)cothread; - __co_active = cothread; - swapcontext(&active->cohandle, &swap->cohandle); -} - -/***** - * internal functions - *****/ - -void co_entrypoint(cothread_t cothread) { - ((cothread_struct*)cothread)->coentry(); -} - -void co_init() { -cothread_struct *thread = (cothread_struct*)malloc(sizeof(cothread_struct)); - thread->coentry = 0; - getcontext(&thread->cohandle); - __co_active = __co_primary = (cothread_t)thread; -} diff --git a/libco.win.cpp b/libco.win.cpp deleted file mode 100644 index f0d16626..00000000 --- a/libco.win.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - libco.win (2007-09-08) - author: byuu - license: public domain -*/ - -#define WINVER 0x0400 -#define _WIN32_WINNT 0x0400 -#include -#include "../libco.h" - -struct cothread_struct { - void *cohandle; - void (*coentry)(); -}; - -cothread_t __co_active = 0, __co_primary = 0; - -void __stdcall co_entryproc(void*); -cothread_t co_init(); - -/***** - * library functions - *****/ - -cothread_t co_active() { - if(__co_primary == 0)co_init(); - return __co_active; -} - -cothread_t co_create(unsigned int heapsize, void (*coentry)()) { - if(__co_primary == 0)co_init(); -cothread_struct *s = (cothread_struct*)malloc(sizeof(cothread_struct)); - s->coentry = coentry; - s->cohandle = CreateFiber(heapsize + 512, co_entryproc, (void*)s); - return (cothread_t)s; -} - -void co_delete(cothread_t cothread) { -cothread_struct *s = (cothread_struct*)cothread; - DeleteFiber(s->cohandle); - free(cothread); -} - -void co_switch(cothread_t cothread) { - __co_active = cothread; -cothread_struct *s = (cothread_struct*)cothread; - SwitchToFiber(s->cohandle); -} - -/***** - * internal functions - *****/ - -void __stdcall co_entryproc(void *cothread) { - ((cothread_struct*)cothread)->coentry(); -} - -cothread_t co_init() { - ConvertThreadToFiber(0); -cothread_struct *s = (cothread_struct*)malloc(sizeof(cothread_struct)); - s->coentry = 0; - s->cohandle = GetCurrentFiber(); - __co_active = __co_primary = (cothread_t)s; - return __co_active; -} diff --git a/libco.x86-64.asm b/libco.x86-64.asm deleted file mode 100644 index 051987f0..00000000 --- a/libco.x86-64.asm +++ /dev/null @@ -1,146 +0,0 @@ -;***** -;libco.x86-64 (2007-12-11) -;author: byuu -;license: public domain -; -;cross-platform x86-64 implementation of libco -;thanks to Aaron Giles and Joel Yliluoma for various optimizations -;thanks to Lucas Newman and Vas Crabb for assistance with OS X support -; -;[ABI compatibility] -;- SystemV ( http://refspecs.freestandards.org/elf/x86_64-SysV-psABI.pdf ) -;- gcc; mac os x; x86-64 -;- gcc; linux; x86-64 -;- gcc; freebsd; x86-64 -; -;[nonvolatile registers] -;- rsp, rbp, rbx, r12, r13, r14, r15 -; -;[volatile registers] -;- rax, rcx, rdx, r8, r9, r10, r11, rdi, rsi -;- st0 - st7 -;- xmm0 - xmm15 -;***** - -;***** -;linker-specific name decorations -;***** - -%ifdef OSX -%define malloc _malloc -%define free _free - -%define co_active _co_active -%define co_create _co_create -%define co_delete _co_delete -%define co_switch _co_switch -%endif - -bits 64 - -section .bss - -align 8 -co_primary_buffer resb 512 - -section .data - -align 8 -co_active_context dq co_primary_buffer - -section .text - -extern malloc -extern free - -global co_active -global co_create -global co_delete -global co_switch - -;***** -;extern "C" cothread_t co_active(); -;return = rax -;***** - -align 16 -co_active: - mov rax,[co_active_context wrt rip] - ret - -;***** -;extern "C" cothread_t co_create(unsigned int heapsize, void (*coentry)()); -;rdi = heapsize -;rsi = coentry -;return = rax -;***** - -align 16 -co_create: -;create heap space (stack + context) - add rdi,512 ;allocate extra memory for contextual info - - push rdi ;backup volatile registers before malloc call - push rsi - - sub rsp,8 ;SSE 16-byte stack alignment - call malloc ;rax = malloc(rdi) - add rsp,8 - - pop rsi ;restore volatile registers - pop rdi - - add rdi,rax ;set rdi to point to top of stack heap - and rdi,-16 ;force 16-byte alignment of stack heap - -;store thread entry point + registers, so that first call to co_switch will execute coentry - mov qword[rdi-8],0 ;crash if entry point returns - mov qword[rdi-16],rsi ;entry point - mov qword[rdi-24],0 ;r15 - mov qword[rdi-32],0 ;r14 - mov qword[rdi-40],0 ;r13 - mov qword[rdi-48],0 ;r12 - mov qword[rdi-56],0 ;rbx - mov qword[rdi-64],0 ;rbp - sub rdi,64 - -;initialize context memory heap and return - mov [rax],rdi ;*cothread_t = stack heap pointer (rsp) - ret ;return allocated memory block as thread handle - -;***** -;extern "C" void co_delete(cothread_t cothread); -;rdi = cothread -;***** - -align 16 -co_delete: - jmp free ;free(rdi) - -;***** -;extern "C" void co_switch(cothread_t cothread); -;rdi = cothread -;***** - -align 16 -co_switch: - mov rax,[co_active_context wrt rip] ;backup current context - mov [co_active_context wrt rip],rdi ;set new active context - - push rbp - push rbx - push r12 - push r13 - push r14 - push r15 - mov [rax],rsp - - mov rsp,[rdi] - pop r15 - pop r14 - pop r13 - pop r12 - pop rbx - pop rbp - - ret diff --git a/libco.x86.asm b/libco.x86.asm deleted file mode 100644 index f151c4a8..00000000 --- a/libco.x86.asm +++ /dev/null @@ -1,155 +0,0 @@ -;***** -;libco.x86 (2007-12-11) -;author: byuu -;license: public domain -; -;cross-platform x86 implementation of libco -;thanks to Aaron Giles and Joel Yliluoma for various optimizations -;thanks to Lucas Newman and Vas Crabb for assistance with OS X support -; -;[ABI compatibility] -;- visual c++; windows; x86 -;- mingw; windows; x86 -;- gcc; mac os x; x86 -;- gcc; linux; x86 -;- gcc; freebsd; x86 -; -;[nonvolatile registers] -;- esp, ebp, edi, esi, ebx -; -;[volatile registers] -;- eax, ecx, edx -;- st0 - st7 -;- xmm0 - xmm15 -;***** - -;***** -;linker-specific name decorations -;***** - -%ifdef WIN -%define malloc _malloc -%define free _free - -%define co_active @co_active@0 -%define co_create @co_create@8 -%define co_delete @co_delete@4 -%define co_switch @co_switch@4 -%endif - -%ifdef OSX -%define malloc _malloc -%define free _free - -%define co_active _co_active -%define co_create _co_create -%define co_delete _co_delete -%define co_switch _co_switch -%endif - -bits 32 - -section .bss - -align 4 -co_primary_buffer resb 512 - -section .data - -align 4 -co_active_context dd co_primary_buffer - -section .text - -extern malloc -extern free - -global co_active -global co_create -global co_delete -global co_switch - -;***** -;extern "C" cothread_t fastcall co_active(); -;return = eax -;***** - -align 16 -co_active: - mov eax,[co_active_context] - ret - -;***** -;extern "C" cothread_t fastcall co_create(unsigned int heapsize, void (*coentry)()); -;ecx = heapsize -;edx = coentry -;return = eax -;***** - -align 16 -co_create: -;create heap space (stack + context) - add ecx,512 ;allocate extra memory for contextual info - - push ecx ;backup volatile registers before malloc call - push edx - - push ecx - call malloc ;eax = malloc(ecx) - add esp,4 - - pop edx ;restore volatile registers - pop ecx - - add ecx,eax ;set edx to point to top of stack heap - and ecx,-16 ;force 16-byte alignment of stack heap - -;store thread entry point + registers, so that first call to co_switch will execute coentry - mov dword[ecx-4],0 ;crash if entry point returns - mov dword[ecx-8],edx ;entry point - mov dword[ecx-12],0 ;ebp - mov dword[ecx-16],0 ;esi - mov dword[ecx-20],0 ;edi - mov dword[ecx-24],0 ;ebx - sub ecx,24 - -;initialize context memory heap and return - mov [eax],ecx ;*cothread_t = stack heap pointer (esp) - ret ;return allocated memory block as thread handle - -;***** -;extern "C" void fastcall co_delete(cothread_t cothread); -;ecx = cothread -;***** - -align 16 -co_delete: - sub esp,8 ;SSE 16-byte stack alignment - push ecx - call free ;free(ecx) - add esp,4+8 - ret - -;***** -;extern "C" void fastcall co_switch(cothread_t cothread); -;ecx = cothread -;***** - -align 16 -co_switch: - mov eax,[co_active_context] ;backup current context - mov [co_active_context],ecx ;set new active context - - push ebp - push esi - push edi - push ebx - mov [eax],esp - - mov esp,[ecx] - pop ebx - pop edi - pop esi - pop ebp - - ret diff --git a/libco.ppc.s b/ppc.s similarity index 100% rename from libco.ppc.s rename to ppc.s diff --git a/libco.ppc64.s b/ppc64.s similarity index 100% rename from libco.ppc64.s rename to ppc64.s diff --git a/sjlj.c b/sjlj.c new file mode 100644 index 00000000..86b2ea26 --- /dev/null +++ b/sjlj.c @@ -0,0 +1,102 @@ +/* + libco.sjlj (2008-01-28) + author: Nach + license: public domain +*/ + +/* + * Note this was designed for UNIX systems. Based on ideas expressed in a paper + * by Ralf Engelschall. + * For SJLJ on other systems, one would want to rewrite springboard() and + * co_create() and hack the jmb_buf stack pointer. + */ + +#define LIBCO_C +#include "../libco.h" +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + sigjmp_buf context; + void (*coentry)(void); + void *stack; +} cothread_struct; + +static thread_local cothread_struct co_primary; +static thread_local cothread_struct *creating, *co_running = 0; + +static void springboard(int ignored) { + if(sigsetjmp(creating->context, 0)) { + co_running->coentry(); + } +} + +cothread_t co_active() { + if(!co_running) co_running = &co_primary; + return (cothread_t)co_running; +} + +cothread_t co_create(unsigned int size, void (*coentry)(void)) { + if(!co_running) co_running = &co_primary; + + cothread_struct *thread = (cothread_struct*)malloc(sizeof(cothread_struct)); + if(thread) { + struct sigaction handler; + struct sigaction old_handler; + + stack_t stack; + stack_t old_stack; + + thread->coentry = thread->stack = 0; + + stack.ss_flags = 0; + stack.ss_size = size; + thread->stack = stack.ss_sp = malloc(size); + if(stack.ss_sp && !sigaltstack(&stack, &old_stack)) { + handler.sa_handler = springboard; + handler.sa_flags = SA_ONSTACK; + sigemptyset(&handler.sa_mask); + creating = thread; + + if(!sigaction(SIGUSR1, &handler, &old_handler)) { + if(!raise(SIGUSR1)) { + thread->coentry = coentry; + } + sigaltstack(&old_stack, 0); + sigaction(SIGUSR1, &old_handler, 0); + } + } + + if(thread->coentry != coentry) { + co_delete(thread); + thread = 0; + } + } + + return (cothread_t)thread; +} + +void co_delete(cothread_t cothread) { + if(cothread) { + if(((cothread_struct*)cothread)->stack) { + free(((cothread_struct*)cothread)->stack); + } + free(cothread); + } +} + +void co_switch(cothread_t cothread) { + if(!sigsetjmp(co_running->context, 0)) { + co_running = (cothread_struct*)cothread; + siglongjmp(co_running->context, 1); + } +} + +#ifdef __cplusplus +} +#endif diff --git a/ucontext.c b/ucontext.c new file mode 100644 index 00000000..47766e64 --- /dev/null +++ b/ucontext.c @@ -0,0 +1,67 @@ +/* + libco.ucontext (2008-01-28) + author: Nach + license: public domain +*/ + +/* + * WARNING: the overhead of POSIX ucontext is very high, + * assembly versions of libco or libco_sjlj should be much faster + * + * This library only exists for two reasons: + * 1 - as an initial test for the viability of a ucontext implementation + * 2 - to demonstrate the power and speed of libco over existing implementations, + * such as pth (which defaults to wrapping ucontext on unix targets) + * + * Use this library only as a *last resort* + */ + +#define LIBCO_C +#include "../libco.h" +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static thread_local ucontext_t co_primary; +static thread_local ucontext_t *co_running = 0; + +cothread_t co_active() { + if(!co_running) co_running = &co_primary; + return (cothread_t)co_running; +} + +cothread_t co_create(unsigned int heapsize, void (*coentry)(void)) { + if(!co_running) co_running = &co_primary; + ucontext_t *thread = (ucontext_t*)malloc(sizeof(ucontext_t)); + if(thread) { + if((!getcontext(thread) && !(thread->uc_stack.ss_sp = 0)) && (thread->uc_stack.ss_sp = malloc(heapsize))) { + thread->uc_link = co_running; + thread->uc_stack.ss_size = heapsize; + makecontext(thread, coentry, 0); + } else { + co_delete((cothread_t)thread); + thread = 0; + } + } + return (cothread_t)thread; +} + +void co_delete(cothread_t cothread) { + if(cothread) { + if(((ucontext_t*)cothread)->uc_stack.ss_sp) { free(((ucontext_t*)cothread)->uc_stack.ss_sp); } + free(cothread); + } +} + +void co_switch(cothread_t cothread) { + ucontext_t *old_thread = co_running; + co_running = (ucontext_t*)cothread; + swapcontext(old_thread, co_running); +} + +#ifdef __cplusplus +} +#endif diff --git a/x86-64.c b/x86-64.c new file mode 100644 index 00000000..e1e8c7f3 --- /dev/null +++ b/x86-64.c @@ -0,0 +1,81 @@ +/* + libco.x86-64 (2008-01-28) + author: byuu + license: public domain +*/ + +#define LIBCO_C +#include "../libco.h" +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static thread_local long co_active_buffer[32]; +static thread_local cothread_t co_active_ = 0; + +static void crash() { + assert(0); /* called only if cothread_t entrypoint returns */ +} + +cothread_t co_active() { + if(!co_active_) co_active_ = &co_active_buffer; + return co_active_; +} + +cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { + cothread_t handle; + assert(sizeof(long) == 8); + if(!co_active_) co_active_ = &co_active_buffer; + size += 128; /* allocate additional space for storage */ + size &= ~15; /* align stack to 16-byte boundary */ + + if(handle = (cothread_t)calloc(size, 1)) { + long *p = (long*)((char*)handle + size); /* seek to top of stack */ + *--p = (long)crash; /* crash if entrypoint returns */ + *--p = (long)entrypoint; /* start of function */ + *(long*)handle = (long)p; /* stack pointer */ + } + + return handle; +} + +void co_delete(cothread_t handle) { + free(handle); +} + +void co_switch(cothread_t to) { + register long stack = *(long*)to; /* stack[0] = "to" thread entry point */ + register cothread_t from = co_active_; + co_active_ = to; + + __asm__ __volatile__( + "movq %%rsp,(%1) \n\t" /* save old stack pointer */ + "movq (%0),%%rsp \n\t" /* load new stack pointer */ + "addq $8,%%rsp \n\t" /* "pop" return address off stack */ + + "movq %%rbp, 8(%1) \n\t" /* backup non-volatile registers */ + "movq %%rbx,16(%1) \n\t" + "movq %%r12,24(%1) \n\t" + "movq %%r13,32(%1) \n\t" + "movq %%r14,40(%1) \n\t" + "movq %%r15,48(%1) \n\t" + + "movq 8(%0),%%rbp \n\t" /* restore non-volatile registers */ + "movq 16(%0),%%rbx \n\t" + "movq 24(%0),%%r12 \n\t" + "movq 32(%0),%%r13 \n\t" + "movq 40(%0),%%r14 \n\t" + "movq 48(%0),%%r15 \n\t" + + "jmp *(%2) \n\t" /* jump into "to" thread */ + : /* no outputs */ + : "r" (to), "r" (from), "r" (stack) + ); +} + +#ifdef __cplusplus +} +#endif diff --git a/x86.c b/x86.c new file mode 100644 index 00000000..18af8ac4 --- /dev/null +++ b/x86.c @@ -0,0 +1,110 @@ +/* + libco.x86 (2008-01-28) + author: byuu + license: public domain +*/ + +#define LIBCO_C +#include "../libco.h" +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static thread_local long co_active_buffer[32]; +static thread_local cothread_t co_active_ = 0; + +static void crash() { + assert(0); /* called only if cothread_t entrypoint returns */ +} + +cothread_t co_active() { + if(!co_active_) co_active_ = &co_active_buffer; + return co_active_; +} + +cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { + cothread_t handle; + assert(sizeof(long) == 4); + if(!co_active_) co_active_ = &co_active_buffer; + size += 128; /* allocate additional space for storage */ + size &= ~15; /* align stack to 16-byte boundary */ + + if(handle = (cothread_t)calloc(size, 1)) { + long *p = (long*)((char*)handle + size); /* seek to top of stack */ + *--p = (long)crash; /* crash if entrypoint returns */ + *--p = (long)entrypoint; /* start of function */ + *(long*)handle = (long)p; /* stack pointer */ + } + + return handle; +} + +void co_delete(cothread_t handle) { + free(handle); +} + +#if defined(__GNUC__) + +void co_switch(cothread_t to) { + register long stack = *(long*)to; /* stack[0] = "to" thread entry point */ + register cothread_t from = co_active_; + co_active_ = to; + + __asm__ __volatile__( + "movl %%esp,(%1) \n\t" /* save old stack pointer */ + "movl (%0),%%esp \n\t" /* load new stack pointer */ + "addl $4,%%esp \n\t" /* "pop" return address off stack */ + + "movl %%ebp, 4(%1) \n\t" /* backup non-volatile registers */ + "movl %%esi, 8(%1) \n\t" + "movl %%edi,12(%1) \n\t" + "movl %%ebx,16(%1) \n\t" + + "movl 4(%0),%%ebp \n\t" /* restore non-volatile registers */ + "movl 8(%0),%%esi \n\t" + "movl 12(%0),%%edi \n\t" + "movl 16(%0),%%ebx \n\t" + + "jmp *(%2) \n\t" /* jump into "to" thread */ + : /* no outputs */ + : "r" (to), "r" (from), "r" (stack) + ); +} + +#elif defined(_MSC_VER) + +__declspec(naked) __declspec(noinline) +static void __fastcall co_swap(register cothread_t to, register cothread_t from) { + /* ecx = to, edx = from */ + __asm { + mov [edx],esp + mov esp,[ecx] + pop eax + + mov [edx+ 4],ebp + mov [edx+ 8],esi + mov [edx+12],edi + mov [edx+16],ebx + + mov ebp,[ecx+ 4] + mov esi,[ecx+ 8] + mov edi,[ecx+12] + mov ebx,[ecx+16] + + jmp eax + } +} + +void co_switch(cothread_t handle) { + register cothread_t co_prev_ = co_active_; + co_swap(co_active_ = handle, co_prev_); +} + +#endif + +#ifdef __cplusplus +} +#endif From b45ff0433e9e0578a6c71d3a42cb13518eb592b6 Mon Sep 17 00:00:00 2001 From: byuu Date: Tue, 18 Mar 2008 06:19:43 +0000 Subject: [PATCH 03/43] Update to bsnes v029 release. A new version of bsnes has been released. It contains a few minor emulation fixes, as well as user interface improvements. Behind the scenes, the source has been cleaned up more in preparation for running the CPU and PPU (video processor) separately from each other (eg with no enslavement.) This is required for implementing a clock cycle based PPU renderer. - Greatly improved invalid DMA transfer behavior, should be nearly perfect now - Major code cleanup -- most importantly, almost all PPU timing-related settings moved back to PPU, from CPU - Added option to auto-detect file type by inspecting file headers rather than file extensions - Rewrote video filter system to move it out of the emulation core -- HQ2x and Scale2x will work even in hires and interlace modes now, 50% scanline filter added - Re-added bsnes window icon - Added new controller graphic when assigning joypad keys [FitzRoy] - Redundant "Advanced" panel settings which can be configured via the GUI are no longer displayed - Improved speed regulation settings - XP and Vista themes will now apply to bsnes controls - Added "Path Settings" window to allow easy selection of default file directories - Tab key now mostly works throughout most of the GUI (needs improvement) - Main window will no longer disappear when setting a video multipler which results in a window size larger than the current desktop resolution - Added two new advanced options: one to control GUI window opacity, and one to adjust the statusbar text --- fiber.c | 2 +- libco.c | 21 +++++++++++++++++++++ libco.h | 34 ++++++++++++++++++++++++++++++++++ sjlj.c | 2 +- ucontext.c | 2 +- x86-64.c | 2 +- x86.c | 2 +- 7 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 libco.c create mode 100644 libco.h diff --git a/fiber.c b/fiber.c index d1b39586..02ef5bc7 100644 --- a/fiber.c +++ b/fiber.c @@ -5,7 +5,7 @@ */ #define LIBCO_C -#include "../libco.h" +#include "libco.h" #define WINVER 0x0400 #define _WIN32_WINNT 0x0400 #define WIN32_LEAN_AND_MEAN diff --git a/libco.c b/libco.c new file mode 100644 index 00000000..604b37df --- /dev/null +++ b/libco.c @@ -0,0 +1,21 @@ +/* + libco + auto-selection module + license: public domain +*/ + +#if defined(__GNUC__) && defined(__i386__) + #include "x86.c" +#elif defined(__GNUC__) && defined(__amd64__) && !defined(__MINGW64__) + #include "x86-64.c" +#elif defined(__MINGW64__) + #include "fiber.c" +#elif defined(__GNUC__) + #include "sjlj.c" +#elif defined(_MSC_VER) && defined(_M_IX86) + #include "x86.c" +#elif defined(_MSC_VER) && defined(_M_AMD64) + #include "fiber.c" +#else + #error "libco: unsupported processor, compiler or operating system" +#endif diff --git a/libco.h b/libco.h new file mode 100644 index 00000000..d8348c4e --- /dev/null +++ b/libco.h @@ -0,0 +1,34 @@ +/* + libco + version: 0.13 rc2 (2008-01-28) + license: public domain +*/ + +#ifndef LIBCO_H +#define LIBCO_H + +#ifdef LIBCO_C + #ifdef LIBCO_MP + #define thread_local __thread + #else + #define thread_local + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void* cothread_t; + +cothread_t co_active(); +cothread_t co_create(unsigned int, void (*)(void)); +void co_delete(cothread_t); +void co_switch(cothread_t); + +#ifdef __cplusplus +} +#endif + +/* ifndef LIBCO_H */ +#endif diff --git a/sjlj.c b/sjlj.c index 86b2ea26..8b72b614 100644 --- a/sjlj.c +++ b/sjlj.c @@ -12,7 +12,7 @@ */ #define LIBCO_C -#include "../libco.h" +#include "libco.h" #include #include #include diff --git a/ucontext.c b/ucontext.c index 47766e64..17472f6b 100644 --- a/ucontext.c +++ b/ucontext.c @@ -17,7 +17,7 @@ */ #define LIBCO_C -#include "../libco.h" +#include "libco.h" #include #include diff --git a/x86-64.c b/x86-64.c index e1e8c7f3..2e2a1131 100644 --- a/x86-64.c +++ b/x86-64.c @@ -5,7 +5,7 @@ */ #define LIBCO_C -#include "../libco.h" +#include "libco.h" #include #include diff --git a/x86.c b/x86.c index 18af8ac4..3a5507ff 100644 --- a/x86.c +++ b/x86.c @@ -5,7 +5,7 @@ */ #define LIBCO_C -#include "../libco.h" +#include "libco.h" #include #include From e6e19a7c897574491e19a17edd33664ff4d49942 Mon Sep 17 00:00:00 2001 From: byuu Date: Sun, 18 Oct 2009 17:33:04 +0000 Subject: [PATCH 04/43] Update to bsnes v053 release. This release greatly polishes the user interface, adds a new cheat code search utility, adds the snesfilter library, and adds Qt-based GUI support to both snesfilter and snesreader. snesfilter gains 2xSaI, Super 2xSaI and Super Eagle support, plus full configuration for both the NTSC and scanline filters; and snesreader gains support support for multi-file ROM archives (eg GoodMerge sets.) Statically linking Qt to bsnes, snesfilter and snesreader would be too prohibitive size-wise (~10MB or so.) I have to link dynamically so that all three can share the same Qt runtime, which gets all of bsnes and its modules to ~1MB (including the debugger build); and Qt itself to about ~2.5MB. However, there is some bad news. There's a serious bug in MinGW 4.4+, where it is not generating profile-guided input files (*.gcno files.) There is also a serious bug in Qt 4.5.2/Windows when using dynamic linking: the library is hanging indefinitely, forcing me to manually terminate the process upon exit. This prevents the creation of profile-guided output files (*.gcda files.) It would be tough enough to work around one, but facing both of these issues at once is too much. I'm afraid I have no choice but to disable profile-guided optimizations until these issues can be addressed. I did not know about these bugs until trying to build the official v053 release, so it's too late to revert to an all-in-one binary now. And I'm simply not willing to stop releasing new builds because of bugs in third-party software. As soon as I can work around this, I'll post a new optimized binary. In the mean time, despite the fact that this release is actually more optimized, please understand that the Windows binary will run approximately ~10% slower than previous releases. I recommend keeping v052 for now if you need the performance. Linux and OS X users are unaffected. Changelog: - save RAM is initialized to 0xff again to work around Ken Griffey Jr Baseball issue - libco adds assembly-optimized targets for Win64 and PPC-ELF [the latter courtesy of Kernigh] - libco/x86 and libco/amd64 use pre-assembled blocks now, obviates need for custom compilation flags - added a new cheat code search utility to the tools menu - separated filters from main bsnes binary to libsnesfilter / snesfilter.dll - added 2xSaI, Super 2xSaI and Super Eagle filters [kode54] - added full configuration settings for NTSC and scanline filters (12+ new options) - further optimized HQ2x filter [blargg] - added Vsync support to the Mac OS X OpenGL driver - added folder creation button to custom file load dialog - fixed a few oddities with loading of "game folders" (see older news for an explanation on what this is) - updated to blargg's file_extractor v1.0.0 - added full support for multi-file archives (eg GoodMerge sets) - split multi-cart loading again (BS-X, Sufami Turbo, etc) as required for multi-file support - cleaned up handling of file placement detection for save files (.srm, .cht, etc) - file load dialog now remembers your previous folder path across runs even without a custom games folder assigned - windows now save their exact positioning and size across runs, they no longer forcibly center - menus now have radio button and check box icons where appropriate - debugger's hex editor now has a working scrollbar widget - added resize splitter to settings and tools windows - worked around Qt style sheet bug where subclassed widgets were not properly applying style properties --- amd64.c | 104 +++++++++++++++++ libco.c | 10 +- libco.h | 2 +- ppc-elf.c | 325 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ x86-64.c | 81 -------------- x86.c | 113 ++++++++----------- 6 files changed, 484 insertions(+), 151 deletions(-) create mode 100644 amd64.c create mode 100644 ppc-elf.c delete mode 100644 x86-64.c diff --git a/amd64.c b/amd64.c new file mode 100644 index 00000000..5f1cfca9 --- /dev/null +++ b/amd64.c @@ -0,0 +1,104 @@ +/* + libco.amd64 (2009-10-12) + author: byuu + license: public domain +*/ + +#define LIBCO_C +#include "libco.h" +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static thread_local long long co_active_buffer[64]; +static thread_local cothread_t co_active_handle = 0; +static void (*co_swap)(cothread_t, cothread_t) = 0; + +#ifdef _WIN32 + //ABI: Win64 + static unsigned char co_swap_function[] = { + 0x48, 0x89, 0x22, 0x48, 0x8B, 0x21, 0x58, 0x48, 0x89, 0x6A, 0x08, 0x48, 0x89, 0x72, 0x10, 0x48, + 0x89, 0x7A, 0x18, 0x48, 0x89, 0x5A, 0x20, 0x4C, 0x89, 0x62, 0x28, 0x4C, 0x89, 0x6A, 0x30, 0x4C, + 0x89, 0x72, 0x38, 0x4C, 0x89, 0x7A, 0x40, 0x48, 0x81, 0xC2, 0x80, 0x00, 0x00, 0x00, 0x48, 0x83, + 0xE2, 0xF0, 0x0F, 0x29, 0x32, 0x0F, 0x29, 0x7A, 0x10, 0x44, 0x0F, 0x29, 0x42, 0x20, 0x44, 0x0F, + 0x29, 0x4A, 0x30, 0x44, 0x0F, 0x29, 0x52, 0x40, 0x44, 0x0F, 0x29, 0x5A, 0x50, 0x44, 0x0F, 0x29, + 0x62, 0x60, 0x44, 0x0F, 0x29, 0x6A, 0x70, 0x44, 0x0F, 0x29, 0xB2, 0x80, 0x00, 0x00, 0x00, 0x44, + 0x0F, 0x29, 0xBA, 0x90, 0x00, 0x00, 0x00, 0x48, 0x8B, 0x69, 0x08, 0x48, 0x8B, 0x71, 0x10, 0x48, + 0x8B, 0x79, 0x18, 0x48, 0x8B, 0x59, 0x20, 0x4C, 0x8B, 0x61, 0x28, 0x4C, 0x8B, 0x69, 0x30, 0x4C, + 0x8B, 0x71, 0x38, 0x4C, 0x8B, 0x79, 0x40, 0x48, 0x81, 0xC1, 0x80, 0x00, 0x00, 0x00, 0x48, 0x83, + 0xE1, 0xF0, 0x0F, 0x29, 0x31, 0x0F, 0x29, 0x79, 0x10, 0x44, 0x0F, 0x29, 0x41, 0x20, 0x44, 0x0F, + 0x29, 0x49, 0x30, 0x44, 0x0F, 0x29, 0x51, 0x40, 0x44, 0x0F, 0x29, 0x59, 0x50, 0x44, 0x0F, 0x29, + 0x61, 0x60, 0x44, 0x0F, 0x29, 0x69, 0x70, 0x44, 0x0F, 0x29, 0xB1, 0x80, 0x00, 0x00, 0x00, 0x44, + 0x0F, 0x29, 0xB9, 0x90, 0x00, 0x00, 0x00, 0xFF, 0xE0, + }; + + #include + + void co_init() { + DWORD old_privileges; + VirtualProtect(co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READWRITE, &old_privileges); + } +#else + //ABI: SystemV + static unsigned char co_swap_function[] = { + 0x48, 0x89, 0x26, 0x48, 0x8B, 0x27, 0x58, 0x48, 0x89, 0x6E, 0x08, 0x48, 0x89, 0x5E, 0x10, 0x4C, + 0x89, 0x66, 0x18, 0x4C, 0x89, 0x6E, 0x20, 0x4C, 0x89, 0x76, 0x28, 0x4C, 0x89, 0x7E, 0x30, 0x48, + 0x8B, 0x6F, 0x08, 0x48, 0x8B, 0x5F, 0x10, 0x4C, 0x8B, 0x67, 0x18, 0x4C, 0x8B, 0x6F, 0x20, 0x4C, + 0x8B, 0x77, 0x28, 0x4C, 0x8B, 0x7F, 0x30, 0xFF, 0xE0, + }; + + #include + #include + + void co_init() { + unsigned long long addr = (unsigned long long)co_swap_function; + unsigned long long base = addr - (addr % sysconf(_SC_PAGESIZE)); + unsigned long long size = (addr - base) + sizeof co_swap_function; + mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC); + } +#endif + +static void crash() { + assert(0); /* called only if cothread_t entrypoint returns */ +} + +cothread_t co_active() { + if(!co_active_handle) co_active_handle = &co_active_buffer; + return co_active_handle; +} + +cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { + cothread_t handle; + if(!co_swap) { + co_init(); + co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; + } + if(!co_active_handle) co_active_handle = &co_active_buffer; + size += 512; /* allocate additional space for storage */ + size &= ~15; /* align stack to 16-byte boundary */ + + if(handle = (cothread_t)malloc(size)) { + long long *p = (long long*)((char*)handle + size); /* seek to top of stack */ + *--p = (long long)crash; /* crash if entrypoint returns */ + *--p = (long long)entrypoint; /* start of function */ + *(long long*)handle = (long long)p; /* stack pointer */ + } + + return handle; +} + +void co_delete(cothread_t handle) { + free(handle); +} + +void co_switch(cothread_t handle) { + register cothread_t co_previous_handle = co_active_handle; + co_swap(co_active_handle = handle, co_previous_handle); +} + +#ifdef __cplusplus +} +#endif diff --git a/libco.c b/libco.c index 604b37df..dd020fe3 100644 --- a/libco.c +++ b/libco.c @@ -6,15 +6,17 @@ #if defined(__GNUC__) && defined(__i386__) #include "x86.c" -#elif defined(__GNUC__) && defined(__amd64__) && !defined(__MINGW64__) - #include "x86-64.c" -#elif defined(__MINGW64__) - #include "fiber.c" +#elif defined(__GNUC__) && defined(__amd64__) + #include "amd64.c" +#elif defined(__GNUC__) && defined(__powerpc__) && defined(__ELF__) + #include "ppc-elf.c" #elif defined(__GNUC__) #include "sjlj.c" #elif defined(_MSC_VER) && defined(_M_IX86) #include "x86.c" #elif defined(_MSC_VER) && defined(_M_AMD64) + #include "amd64.c" +#elif defined(_MSC_VER) #include "fiber.c" #else #error "libco: unsupported processor, compiler or operating system" diff --git a/libco.h b/libco.h index d8348c4e..b1b49a29 100644 --- a/libco.h +++ b/libco.h @@ -1,6 +1,6 @@ /* libco - version: 0.13 rc2 (2008-01-28) + version: 0.15 (2009-10-12) license: public domain */ diff --git a/ppc-elf.c b/ppc-elf.c new file mode 100644 index 00000000..5740f77f --- /dev/null +++ b/ppc-elf.c @@ -0,0 +1,325 @@ +/* + * libco.ppc-elf + * author: Kernigh + * license: public domain + * + * PowerPC 32-bit ELF implementation of libco (for compile with GCC), + * ported from PowerPC Mac OS X implementation (ppc.s) by Vas Crabb. + * This ELF version works for OpenBSD, and might also work for FreeBSD, + * NetBSD and Linux. + * + * Note 1: This implementation does not handle the AltiVec/VMX + * registers, because the ELF ABI does not mention them, + * and my OpenBSD system is not using them. + * + * Note 2: If you want position-independent code, then you must + * define __PIC__. gcc -fpic or -fPIC defines __PIC__, but + * gcc -fpie or -fPIE might not. If you want to use -fpie + * or -fPIE, then you might need a manual definition: + * gcc -fpie -D__PIC__=1 + * gcc -fPIE -D__PIC__=2 + * + * The ELF ABI is "System V Application Binary Interface, PowerPC + * Processor Supplement", which you can get from + * + * (PDF file, hosted by Linux Foundation). + * + * ELF and Mac OS X use similar conventions to allocate the registers, + * and to pass arguments and return values through registers. The main + * differences are that ELF has a slightly different stack format, that + * symbols are different (and without an extra underscore at the start), + * and that the assembly syntax is different. + * + * A function may destroy the values of volatile registers, but must + * preserve the values of nonvolatile registers. So the co_switch() + * function only saves the nonvolatile registers. + * + * [nonvolatile registers in ELF] + * %r1, %r14..%r31 + * %f14..%f31 + * %cr2..%cr4 in cr + * + * [volatile registers in ELF] + * %r0, %r3..%r10 + * %f0..%f13 + * %cr0, %cr1, %cr5..%cr7 in cr + * ctr, lr, xer + * + * lr (link register) is volatile, but it contains the return address, + * so co_switch must save lr. + * + * %r13 is the small data pointer. This is constant across threads, so + * co_switch() does not touch %r13. + * + * %r2 is a reserved register, so co_switch() does not touch %r2. Some + * systems might borrow an idea from the PowerPC Embedded ABI, and might + * use %r2 as a small read-only data pointer, which is constant across + * threads. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void * cothread_t; + +/* + * co_active_context is either in a global offset table (if we are + * compiling -fPIC or -fPIE) or has an absolute position. + */ +static void *co_main_stack_pointer; +static cothread_t co_active_context = &co_main_stack_pointer; + +extern cothread_t co_active() { + return co_active_context; +} + +/* + * Embedded assembly. + * + * We are not using the percent-sign substitution feature, + * so we must write "%r1", not "%%r1". + * + * We always write 'bl malloc@plt', not 'bl malloc'. The '@plt' + * is necessary in position-indepent code and seems to have no + * significant effect in fixed-position code. + * + * We never use the 'lmw' or 'stmw' instructions. The ELF ABI + * mentions that these instructions "are usually slower than + * a sequence of other instructions that have the same effect." + * We instead use sequences of 'lwz' or 'stz' instructions. + */ +__asm__("\n" +"### embedded assembly \n" +".section \".text\" \n" +" .balign 4 \n" +" \n" +/* + * void co_switch(co_thread to %r3) + * + * Allocate our stack frame of 240 bytes: + * Old New Value + * 4(%r1) 244(%r1) return address, used by us + * 0(%r1) 240(%r1) frame pointer + * 232(%r1) %f31 + * 224(%r1) %f30 + * ... + * 96(%r1) %f14 + * 92(%r1) %r31 + * 88(%r1) %r30 + * ... + * 24(%r1) %r14 + * 20(%r1) condition register + * 8(%r1) padding of 12 bytes + * 4(%r1) return address, never used + * 0(%r1) frame pointer + * + * Save our registers in our stack frame. + * Save our stack pointer in 0(%r4). + * Switch to the stack of the other thread. + * Restore registers and return. + */ +" .globl co_switch \n" +" .type co_switch, @function \n" +"co_switch: \n" +" mflr %r0 # %r0 = return address \n" +" mfcr %r9 # %r9 = condition register \n" +" stwu %r1, -240(%r1) # allocate stack frame \n" +" \n" +" stw %r0, 244(%r1) # save return address \n" +" stfd %f31, 232(%r1) # save floating-point regs \n" +" stfd %f30, 224(%r1) \n" +" stfd %f29, 216(%r1) \n" +" stfd %f28, 208(%r1) \n" +" stfd %f27, 200(%r1) \n" +" stfd %f26, 192(%r1) \n" +" stfd %f25, 184(%r1) \n" +" stfd %f24, 176(%r1) \n" +" stfd %f23, 168(%r1) \n" +" stfd %f22, 160(%r1) \n" +" stfd %f21, 152(%r1) \n" +" stfd %f20, 144(%r1) \n" +" stfd %f19, 136(%r1) \n" +" stfd %f18, 128(%r1) \n" +" stfd %f17, 120(%r1) \n" +" stfd %f16, 112(%r1) \n" +" stfd %f16, 104(%r1) \n" +" stfd %f14, 96(%r1) \n" +" stw %r31, 92(%r1) # save general-purpose regs \n" +" stw %r30, 88(%r1) \n" +" stw %r29, 84(%r1) \n" +" stw %r28, 80(%r1) \n" +" stw %r27, 76(%r1) \n" +" stw %r26, 72(%r1) \n" +" stw %r25, 68(%r1) \n" +" stw %r24, 64(%r1) \n" +" stw %r23, 60(%r1) \n" +" stw %r22, 56(%r1) \n" +" stw %r21, 52(%r1) \n" +" stw %r20, 48(%r1) \n" +" stw %r19, 44(%r1) \n" +" stw %r18, 40(%r1) \n" +" stw %r17, 36(%r1) \n" +" stw %r16, 32(%r1) \n" +" stw %r15, 28(%r1) \n" +" stw %r14, 24(%r1) \n" +" stw %r9, 20(%r1) # save condition reg \n" +" \n" +" # save current context, set new context \n" +" # %r4 = co_active_context \n" +" # co_active_context = %r3 \n" +#if __PIC__ == 2 +" # position-independent code, large model (-fPIC) \n" +" bl _GLOBAL_OFFSET_TABLE_@local-4 \n" +" mflr %r8 # %r8 = address of got \n" +" addis %r7, %r8, co_active_context@got@ha \n" +" lwz %r6, co_active_context@got@l(%r7) \n" +" lwz %r4, 0(%r6) \n" +" stw %r3, 0(%r6) \n" +#elif __PIC__ == 1 +" # position-independent code, small model (-fpic) \n" +" bl _GLOBAL_OFFSET_TABLE_@local-4 \n" +" mflr %r8 # %r8 = address of got \n" +" lwz %r7, co_active_context@got(%r8) \n" +" lwz %r4, 0(%r7) \n" +" stw %r3, 0(%r7) \n" +#else +" # fixed-position code \n" +" lis %r8, co_active_context@ha \n" +" lwz %r4, co_active_context@l(%r8) \n" +" stw %r3, co_active_context@l(%r8) \n" +#endif +" \n" +" # save current stack pointer \n" +" stw %r1, 0(%r4) \n" +" # get new stack pointer \n" +" lwz %r1, 0(%r3) \n" +" \n" +" lwz %r0, 244(%r1) # get return address \n" +" lfd %f31, 232(%r1) # restore floating-point regs \n" +" lfd %f30, 224(%r1) \n" +" lfd %f29, 216(%r1) \n" +" lfd %f28, 208(%r1) \n" +" lfd %f27, 200(%r1) \n" +" lfd %f26, 192(%r1) \n" +" lfd %f25, 184(%r1) \n" +" lfd %f24, 176(%r1) \n" +" lfd %f23, 168(%r1) \n" +" lfd %f22, 160(%r1) \n" +" lfd %f21, 152(%r1) \n" +" lfd %f20, 144(%r1) \n" +" lfd %f19, 136(%r1) \n" +" lfd %f18, 128(%r1) \n" +" lfd %f17, 120(%r1) \n" +" lfd %f16, 112(%r1) \n" +" lfd %f16, 104(%r1) \n" +" lfd %f14, 96(%r1) \n" +" lwz %r31, 92(%r1) # restore general-purpose regs \n" +" lwz %r30, 88(%r1) \n" +" lwz %r29, 84(%r1) \n" +" lwz %r28, 80(%r1) \n" +" lwz %r27, 76(%r1) \n" +" lwz %r26, 72(%r1) \n" +" lwz %r25, 68(%r1) \n" +" lwz %r24, 64(%r1) \n" +" lwz %r23, 60(%r1) \n" +" lwz %r22, 56(%r1) \n" +" lwz %r21, 52(%r1) \n" +" lwz %r20, 48(%r1) \n" +" lwz %r19, 44(%r1) \n" +" lwz %r18, 40(%r1) \n" +" lwz %r17, 36(%r1) \n" +" lwz %r16, 32(%r1) \n" +" lwz %r15, 28(%r1) \n" +" lwz %r14, 24(%r1) \n" +" lwz %r9, 20(%r1) # get condition reg \n" +" \n" +" addi %r1, %r1, 240 # free stack frame \n" +" mtlr %r0 # restore return address \n" +" mtcr %r9 # restore condition register \n" +" blr # return \n" +" .size co_switch, . - co_switch \n" +" \n" +/* + * cothread_t %r3 co_create(unsigned int stack_size %r3, + * void (*coentry %r4)()) + * + * Allocate a new stack, such that when you co_switch to that + * stack, then co_switch returns to coentry. + */ +" .globl co_create \n" +" .type co_create, @function \n" +"co_create: \n" +" mflr %r0 # %r0 = return address \n" +" stwu %r1, -16(%r1) # allocate my stack frame \n" +" stw %r0, 20(%r1) # save return address \n" +" stw %r31, 12(%r1) # save %r31 \n" +" stw %r30, 8(%r1) # save %r30 \n" +" \n" +" mr %r30, %r3 # %r30 = stack_size \n" +" mr %r31, %r4 # %r31 = coentry \n" +" \n" +" # Call malloc(stack_size %r3) to allocate stack; \n" +" # malloc() probably uses good alignment. \n" +" # \n" +" bl malloc@plt # returns %r3 = low end \n" +" cmpwi %r3, 0 # if returned NULL, \n" +" beq- 1f # then abort \n" +" \n" +" # we return %r3 = low end of stack \n" +" add %r4, %r3, %r30 # %r4 = high end of stack \n" +" \n" +" # uncomment if malloc() uses wrong alignment \n" +" #rlwinm %r4,%r4,0,0,27 # force 16-byte alignment \n" +" \n" + /* + * Allocate two stack frames: + * 16 bytes for stack frame with return address + * 240 bytes for co_switch stack frame + * + * Old New Value + * -8(%r4) 248(%r5) padding of 8 bytes + * -12(%r4) 244(%r5) return address = coentry + * -16(%r4) 240(%r5) frame pointer = NULL + * 232(%r5) %f31 = 0 + * ... + * 20(%r5) condition register = 0 + * 0(%r5) frame pointer + */ +" li %r9, (240-20)/4+1 \n" +" addi %r5, %r4, -16 # allocate first stack frame \n" +" li %r0, 0 \n" +" stwu %r5, -240(%r5) # allocate second stack frame \n" +" li %r8, 20 \n" +" mtctr %r9 # loop %r9 times \n" +"2: # loop to store zero to 20(%r5) through 240(%r5) \n" +" stwx %r0, %r5, %r8 \n" +" addi %r8, %r8, 4 # index += 4 \n" +" bdnz+ 2b # ctr -= 1, branch if nonzero \n" +" \n" +" stw %r31, 244(%r5) # return address = coentry \n" +" stw %r5, 0(%r3) # save stack pointer \n" +" \n" +" lwz %r0, 20(%r1) # get return address \n" +" lwz %r31, 12(%r1) # restore %r31 \n" +" lwz %r30, 8(%r1) # restore %r30 \n" +" mtlr %r0 # restore return address \n" +" addi %r1, %r1, 16 # free stack frame \n" +" blr # return \n" +" \n" +"1: b abort@plt # branch 1f to abort \n" +" .size co_create, . - co_create \n" +" \n" +/* + * void co_delete(cothread_t) => void free(void *) + */ +" .globl co_delete \n" +" .type co_delete, @function \n" +"co_delete: \n" +" b free@plt \n" +" \n" +); + +#ifdef __cplusplus +} +#endif diff --git a/x86-64.c b/x86-64.c deleted file mode 100644 index 2e2a1131..00000000 --- a/x86-64.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - libco.x86-64 (2008-01-28) - author: byuu - license: public domain -*/ - -#define LIBCO_C -#include "libco.h" -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -static thread_local long co_active_buffer[32]; -static thread_local cothread_t co_active_ = 0; - -static void crash() { - assert(0); /* called only if cothread_t entrypoint returns */ -} - -cothread_t co_active() { - if(!co_active_) co_active_ = &co_active_buffer; - return co_active_; -} - -cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { - cothread_t handle; - assert(sizeof(long) == 8); - if(!co_active_) co_active_ = &co_active_buffer; - size += 128; /* allocate additional space for storage */ - size &= ~15; /* align stack to 16-byte boundary */ - - if(handle = (cothread_t)calloc(size, 1)) { - long *p = (long*)((char*)handle + size); /* seek to top of stack */ - *--p = (long)crash; /* crash if entrypoint returns */ - *--p = (long)entrypoint; /* start of function */ - *(long*)handle = (long)p; /* stack pointer */ - } - - return handle; -} - -void co_delete(cothread_t handle) { - free(handle); -} - -void co_switch(cothread_t to) { - register long stack = *(long*)to; /* stack[0] = "to" thread entry point */ - register cothread_t from = co_active_; - co_active_ = to; - - __asm__ __volatile__( - "movq %%rsp,(%1) \n\t" /* save old stack pointer */ - "movq (%0),%%rsp \n\t" /* load new stack pointer */ - "addq $8,%%rsp \n\t" /* "pop" return address off stack */ - - "movq %%rbp, 8(%1) \n\t" /* backup non-volatile registers */ - "movq %%rbx,16(%1) \n\t" - "movq %%r12,24(%1) \n\t" - "movq %%r13,32(%1) \n\t" - "movq %%r14,40(%1) \n\t" - "movq %%r15,48(%1) \n\t" - - "movq 8(%0),%%rbp \n\t" /* restore non-volatile registers */ - "movq 16(%0),%%rbx \n\t" - "movq 24(%0),%%r12 \n\t" - "movq 32(%0),%%r13 \n\t" - "movq 40(%0),%%r14 \n\t" - "movq 48(%0),%%r15 \n\t" - - "jmp *(%2) \n\t" /* jump into "to" thread */ - : /* no outputs */ - : "r" (to), "r" (from), "r" (stack) - ); -} - -#ifdef __cplusplus -} -#endif diff --git a/x86.c b/x86.c index 3a5507ff..d8f820b0 100644 --- a/x86.c +++ b/x86.c @@ -1,5 +1,5 @@ /* - libco.x86 (2008-01-28) + libco.x86 (2009-10-12) author: byuu license: public domain */ @@ -13,26 +13,63 @@ extern "C" { #endif -static thread_local long co_active_buffer[32]; -static thread_local cothread_t co_active_ = 0; +#if defined(_MSC_VER) + #define fastcall __fastcall +#elif defined(__GNUC__) + #define fastcall __attribute__((fastcall)) +#else + #error "libco: please define fastcall macro" +#endif + +static thread_local long co_active_buffer[64]; +static thread_local cothread_t co_active_handle = 0; +static void (fastcall *co_swap)(cothread_t, cothread_t) = 0; + +//ABI: fastcall +static unsigned char co_swap_function[] = { + 0x89, 0x22, 0x8B, 0x21, 0x58, 0x89, 0x6A, 0x04, 0x89, 0x72, 0x08, 0x89, 0x7A, 0x0C, 0x89, 0x5A, + 0x10, 0x8B, 0x69, 0x04, 0x8B, 0x71, 0x08, 0x8B, 0x79, 0x0C, 0x8B, 0x59, 0x10, 0xFF, 0xE0, +}; + +#ifdef _WIN32 + #include + + void co_init() { + DWORD old_privileges; + VirtualProtect(co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READWRITE, &old_privileges); + } +#else + #include + #include + + void co_init() { + unsigned long addr = (unsigned long)co_swap_function; + unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE)); + unsigned long size = (addr - base) + sizeof co_swap_function; + mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC); + } +#endif static void crash() { assert(0); /* called only if cothread_t entrypoint returns */ } cothread_t co_active() { - if(!co_active_) co_active_ = &co_active_buffer; - return co_active_; + if(!co_active_handle) co_active_handle = &co_active_buffer; + return co_active_handle; } cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { cothread_t handle; - assert(sizeof(long) == 4); - if(!co_active_) co_active_ = &co_active_buffer; - size += 128; /* allocate additional space for storage */ + if(!co_swap) { + co_init(); + co_swap = (void (fastcall*)(cothread_t, cothread_t))co_swap_function; + } + if(!co_active_handle) co_active_handle = &co_active_buffer; + size += 256; /* allocate additional space for storage */ size &= ~15; /* align stack to 16-byte boundary */ - if(handle = (cothread_t)calloc(size, 1)) { + if(handle = (cothread_t)malloc(size)) { long *p = (long*)((char*)handle + size); /* seek to top of stack */ *--p = (long)crash; /* crash if entrypoint returns */ *--p = (long)entrypoint; /* start of function */ @@ -46,65 +83,11 @@ void co_delete(cothread_t handle) { free(handle); } -#if defined(__GNUC__) - -void co_switch(cothread_t to) { - register long stack = *(long*)to; /* stack[0] = "to" thread entry point */ - register cothread_t from = co_active_; - co_active_ = to; - - __asm__ __volatile__( - "movl %%esp,(%1) \n\t" /* save old stack pointer */ - "movl (%0),%%esp \n\t" /* load new stack pointer */ - "addl $4,%%esp \n\t" /* "pop" return address off stack */ - - "movl %%ebp, 4(%1) \n\t" /* backup non-volatile registers */ - "movl %%esi, 8(%1) \n\t" - "movl %%edi,12(%1) \n\t" - "movl %%ebx,16(%1) \n\t" - - "movl 4(%0),%%ebp \n\t" /* restore non-volatile registers */ - "movl 8(%0),%%esi \n\t" - "movl 12(%0),%%edi \n\t" - "movl 16(%0),%%ebx \n\t" - - "jmp *(%2) \n\t" /* jump into "to" thread */ - : /* no outputs */ - : "r" (to), "r" (from), "r" (stack) - ); -} - -#elif defined(_MSC_VER) - -__declspec(naked) __declspec(noinline) -static void __fastcall co_swap(register cothread_t to, register cothread_t from) { - /* ecx = to, edx = from */ - __asm { - mov [edx],esp - mov esp,[ecx] - pop eax - - mov [edx+ 4],ebp - mov [edx+ 8],esi - mov [edx+12],edi - mov [edx+16],ebx - - mov ebp,[ecx+ 4] - mov esi,[ecx+ 8] - mov edi,[ecx+12] - mov ebx,[ecx+16] - - jmp eax - } -} - void co_switch(cothread_t handle) { - register cothread_t co_prev_ = co_active_; - co_swap(co_active_ = handle, co_prev_); + register cothread_t co_previous_handle = co_active_handle; + co_swap(co_active_handle = handle, co_previous_handle); } -#endif - #ifdef __cplusplus } #endif From 7dcdaef9bdf960c2237f72a2ae62719e69fae8f2 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Sun, 26 Dec 2010 23:24:34 +1100 Subject: [PATCH 06/43] Update to v073 release. byuu says: This release marks a major step forward, offering full low-level emulation of all four DSP coprocessors based on the NEC uPD77C25 processor core. Many people were responsible for this milestone: Dr. Decapitator for the actual decapping and extraction; Lord Nightmare for the cartridges and some special analysis tools; myself, Jonas Quinn and Cydrak for the uPD77C25 emulation; and all of the donors who raised the necessary $1,000 for the necessary hardware and equipment needed to pull this all off. To say thanks to the donors, I am releasing the uPD77C25 emulation core to the public domain, so that everyone can benefit from it. All four DSP emulations will be improved by this by way of having realistic timing; the DSP-4 will benefit further as the high-level emulation was incomplete and somewhat buggy; and the DSP-3 will benefit the most as the high-levle emulation there was not complete enough to be playable. As a result, most notably, this means bsnes v073 is the first emulator to fully be able to play SD Gundam GX (J)! As bsnes' primary goal is accuracy, the LLE DSP support renders the old HLE DSP support obsolete. Ergo, I have removed the 166KB of HLE source code, and replaced it with the uPD77C25 core, which comprises a mere 20KB of source code. As this LLE module supports save states, this also means that for the first time, DSP-3 and DSP-4 games have save state support. On the other hand, this also means that to run any DSP game, you will need the appropriate program ROM. As these are copyrighted, I cannot distribute them nor tell you where to get them. All I can do is provide you with the necessary filenames and hashes. Changelog (since v072 release): * added NEC uPD77C25 emulation core * added low-level emulation of the DSP-1, DSP-1B, DSP-2, DSP-3, DSP-4 coprocessors * removed high-level emulation of the DSP-n coprocessors * added blargg's libco::ppc.c module, which is far more portable, even running on the PS3 * added software filter support via binary plugins * added debugger (currently Linux-only); but it is as yet unstable * added pause shortcut * updated mightymo's cheat code database --- libco.c | 4 +- libco.h | 2 +- ppc-elf.c | 325 ---------------------------------- ppc.c | 407 +++++++++++++++++++++++++++++++++++++++++++ ppc.s | 478 -------------------------------------------------- ppc64.s | 513 ------------------------------------------------------ 6 files changed, 410 insertions(+), 1319 deletions(-) delete mode 100644 ppc-elf.c create mode 100644 ppc.c delete mode 100644 ppc.s delete mode 100644 ppc64.s diff --git a/libco.c b/libco.c index dd020fe3..55676263 100644 --- a/libco.c +++ b/libco.c @@ -8,8 +8,8 @@ #include "x86.c" #elif defined(__GNUC__) && defined(__amd64__) #include "amd64.c" -#elif defined(__GNUC__) && defined(__powerpc__) && defined(__ELF__) - #include "ppc-elf.c" +#elif defined(__GNUC__) && defined(_ARCH_PPC) + #include "ppc.c" #elif defined(__GNUC__) #include "sjlj.c" #elif defined(_MSC_VER) && defined(_M_IX86) diff --git a/libco.h b/libco.h index b1b49a29..deb954fb 100644 --- a/libco.h +++ b/libco.h @@ -1,6 +1,6 @@ /* libco - version: 0.15 (2009-10-12) + version: 0.16 (2010-12-24) license: public domain */ diff --git a/ppc-elf.c b/ppc-elf.c deleted file mode 100644 index 5740f77f..00000000 --- a/ppc-elf.c +++ /dev/null @@ -1,325 +0,0 @@ -/* - * libco.ppc-elf - * author: Kernigh - * license: public domain - * - * PowerPC 32-bit ELF implementation of libco (for compile with GCC), - * ported from PowerPC Mac OS X implementation (ppc.s) by Vas Crabb. - * This ELF version works for OpenBSD, and might also work for FreeBSD, - * NetBSD and Linux. - * - * Note 1: This implementation does not handle the AltiVec/VMX - * registers, because the ELF ABI does not mention them, - * and my OpenBSD system is not using them. - * - * Note 2: If you want position-independent code, then you must - * define __PIC__. gcc -fpic or -fPIC defines __PIC__, but - * gcc -fpie or -fPIE might not. If you want to use -fpie - * or -fPIE, then you might need a manual definition: - * gcc -fpie -D__PIC__=1 - * gcc -fPIE -D__PIC__=2 - * - * The ELF ABI is "System V Application Binary Interface, PowerPC - * Processor Supplement", which you can get from - * - * (PDF file, hosted by Linux Foundation). - * - * ELF and Mac OS X use similar conventions to allocate the registers, - * and to pass arguments and return values through registers. The main - * differences are that ELF has a slightly different stack format, that - * symbols are different (and without an extra underscore at the start), - * and that the assembly syntax is different. - * - * A function may destroy the values of volatile registers, but must - * preserve the values of nonvolatile registers. So the co_switch() - * function only saves the nonvolatile registers. - * - * [nonvolatile registers in ELF] - * %r1, %r14..%r31 - * %f14..%f31 - * %cr2..%cr4 in cr - * - * [volatile registers in ELF] - * %r0, %r3..%r10 - * %f0..%f13 - * %cr0, %cr1, %cr5..%cr7 in cr - * ctr, lr, xer - * - * lr (link register) is volatile, but it contains the return address, - * so co_switch must save lr. - * - * %r13 is the small data pointer. This is constant across threads, so - * co_switch() does not touch %r13. - * - * %r2 is a reserved register, so co_switch() does not touch %r2. Some - * systems might borrow an idea from the PowerPC Embedded ABI, and might - * use %r2 as a small read-only data pointer, which is constant across - * threads. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -typedef void * cothread_t; - -/* - * co_active_context is either in a global offset table (if we are - * compiling -fPIC or -fPIE) or has an absolute position. - */ -static void *co_main_stack_pointer; -static cothread_t co_active_context = &co_main_stack_pointer; - -extern cothread_t co_active() { - return co_active_context; -} - -/* - * Embedded assembly. - * - * We are not using the percent-sign substitution feature, - * so we must write "%r1", not "%%r1". - * - * We always write 'bl malloc@plt', not 'bl malloc'. The '@plt' - * is necessary in position-indepent code and seems to have no - * significant effect in fixed-position code. - * - * We never use the 'lmw' or 'stmw' instructions. The ELF ABI - * mentions that these instructions "are usually slower than - * a sequence of other instructions that have the same effect." - * We instead use sequences of 'lwz' or 'stz' instructions. - */ -__asm__("\n" -"### embedded assembly \n" -".section \".text\" \n" -" .balign 4 \n" -" \n" -/* - * void co_switch(co_thread to %r3) - * - * Allocate our stack frame of 240 bytes: - * Old New Value - * 4(%r1) 244(%r1) return address, used by us - * 0(%r1) 240(%r1) frame pointer - * 232(%r1) %f31 - * 224(%r1) %f30 - * ... - * 96(%r1) %f14 - * 92(%r1) %r31 - * 88(%r1) %r30 - * ... - * 24(%r1) %r14 - * 20(%r1) condition register - * 8(%r1) padding of 12 bytes - * 4(%r1) return address, never used - * 0(%r1) frame pointer - * - * Save our registers in our stack frame. - * Save our stack pointer in 0(%r4). - * Switch to the stack of the other thread. - * Restore registers and return. - */ -" .globl co_switch \n" -" .type co_switch, @function \n" -"co_switch: \n" -" mflr %r0 # %r0 = return address \n" -" mfcr %r9 # %r9 = condition register \n" -" stwu %r1, -240(%r1) # allocate stack frame \n" -" \n" -" stw %r0, 244(%r1) # save return address \n" -" stfd %f31, 232(%r1) # save floating-point regs \n" -" stfd %f30, 224(%r1) \n" -" stfd %f29, 216(%r1) \n" -" stfd %f28, 208(%r1) \n" -" stfd %f27, 200(%r1) \n" -" stfd %f26, 192(%r1) \n" -" stfd %f25, 184(%r1) \n" -" stfd %f24, 176(%r1) \n" -" stfd %f23, 168(%r1) \n" -" stfd %f22, 160(%r1) \n" -" stfd %f21, 152(%r1) \n" -" stfd %f20, 144(%r1) \n" -" stfd %f19, 136(%r1) \n" -" stfd %f18, 128(%r1) \n" -" stfd %f17, 120(%r1) \n" -" stfd %f16, 112(%r1) \n" -" stfd %f16, 104(%r1) \n" -" stfd %f14, 96(%r1) \n" -" stw %r31, 92(%r1) # save general-purpose regs \n" -" stw %r30, 88(%r1) \n" -" stw %r29, 84(%r1) \n" -" stw %r28, 80(%r1) \n" -" stw %r27, 76(%r1) \n" -" stw %r26, 72(%r1) \n" -" stw %r25, 68(%r1) \n" -" stw %r24, 64(%r1) \n" -" stw %r23, 60(%r1) \n" -" stw %r22, 56(%r1) \n" -" stw %r21, 52(%r1) \n" -" stw %r20, 48(%r1) \n" -" stw %r19, 44(%r1) \n" -" stw %r18, 40(%r1) \n" -" stw %r17, 36(%r1) \n" -" stw %r16, 32(%r1) \n" -" stw %r15, 28(%r1) \n" -" stw %r14, 24(%r1) \n" -" stw %r9, 20(%r1) # save condition reg \n" -" \n" -" # save current context, set new context \n" -" # %r4 = co_active_context \n" -" # co_active_context = %r3 \n" -#if __PIC__ == 2 -" # position-independent code, large model (-fPIC) \n" -" bl _GLOBAL_OFFSET_TABLE_@local-4 \n" -" mflr %r8 # %r8 = address of got \n" -" addis %r7, %r8, co_active_context@got@ha \n" -" lwz %r6, co_active_context@got@l(%r7) \n" -" lwz %r4, 0(%r6) \n" -" stw %r3, 0(%r6) \n" -#elif __PIC__ == 1 -" # position-independent code, small model (-fpic) \n" -" bl _GLOBAL_OFFSET_TABLE_@local-4 \n" -" mflr %r8 # %r8 = address of got \n" -" lwz %r7, co_active_context@got(%r8) \n" -" lwz %r4, 0(%r7) \n" -" stw %r3, 0(%r7) \n" -#else -" # fixed-position code \n" -" lis %r8, co_active_context@ha \n" -" lwz %r4, co_active_context@l(%r8) \n" -" stw %r3, co_active_context@l(%r8) \n" -#endif -" \n" -" # save current stack pointer \n" -" stw %r1, 0(%r4) \n" -" # get new stack pointer \n" -" lwz %r1, 0(%r3) \n" -" \n" -" lwz %r0, 244(%r1) # get return address \n" -" lfd %f31, 232(%r1) # restore floating-point regs \n" -" lfd %f30, 224(%r1) \n" -" lfd %f29, 216(%r1) \n" -" lfd %f28, 208(%r1) \n" -" lfd %f27, 200(%r1) \n" -" lfd %f26, 192(%r1) \n" -" lfd %f25, 184(%r1) \n" -" lfd %f24, 176(%r1) \n" -" lfd %f23, 168(%r1) \n" -" lfd %f22, 160(%r1) \n" -" lfd %f21, 152(%r1) \n" -" lfd %f20, 144(%r1) \n" -" lfd %f19, 136(%r1) \n" -" lfd %f18, 128(%r1) \n" -" lfd %f17, 120(%r1) \n" -" lfd %f16, 112(%r1) \n" -" lfd %f16, 104(%r1) \n" -" lfd %f14, 96(%r1) \n" -" lwz %r31, 92(%r1) # restore general-purpose regs \n" -" lwz %r30, 88(%r1) \n" -" lwz %r29, 84(%r1) \n" -" lwz %r28, 80(%r1) \n" -" lwz %r27, 76(%r1) \n" -" lwz %r26, 72(%r1) \n" -" lwz %r25, 68(%r1) \n" -" lwz %r24, 64(%r1) \n" -" lwz %r23, 60(%r1) \n" -" lwz %r22, 56(%r1) \n" -" lwz %r21, 52(%r1) \n" -" lwz %r20, 48(%r1) \n" -" lwz %r19, 44(%r1) \n" -" lwz %r18, 40(%r1) \n" -" lwz %r17, 36(%r1) \n" -" lwz %r16, 32(%r1) \n" -" lwz %r15, 28(%r1) \n" -" lwz %r14, 24(%r1) \n" -" lwz %r9, 20(%r1) # get condition reg \n" -" \n" -" addi %r1, %r1, 240 # free stack frame \n" -" mtlr %r0 # restore return address \n" -" mtcr %r9 # restore condition register \n" -" blr # return \n" -" .size co_switch, . - co_switch \n" -" \n" -/* - * cothread_t %r3 co_create(unsigned int stack_size %r3, - * void (*coentry %r4)()) - * - * Allocate a new stack, such that when you co_switch to that - * stack, then co_switch returns to coentry. - */ -" .globl co_create \n" -" .type co_create, @function \n" -"co_create: \n" -" mflr %r0 # %r0 = return address \n" -" stwu %r1, -16(%r1) # allocate my stack frame \n" -" stw %r0, 20(%r1) # save return address \n" -" stw %r31, 12(%r1) # save %r31 \n" -" stw %r30, 8(%r1) # save %r30 \n" -" \n" -" mr %r30, %r3 # %r30 = stack_size \n" -" mr %r31, %r4 # %r31 = coentry \n" -" \n" -" # Call malloc(stack_size %r3) to allocate stack; \n" -" # malloc() probably uses good alignment. \n" -" # \n" -" bl malloc@plt # returns %r3 = low end \n" -" cmpwi %r3, 0 # if returned NULL, \n" -" beq- 1f # then abort \n" -" \n" -" # we return %r3 = low end of stack \n" -" add %r4, %r3, %r30 # %r4 = high end of stack \n" -" \n" -" # uncomment if malloc() uses wrong alignment \n" -" #rlwinm %r4,%r4,0,0,27 # force 16-byte alignment \n" -" \n" - /* - * Allocate two stack frames: - * 16 bytes for stack frame with return address - * 240 bytes for co_switch stack frame - * - * Old New Value - * -8(%r4) 248(%r5) padding of 8 bytes - * -12(%r4) 244(%r5) return address = coentry - * -16(%r4) 240(%r5) frame pointer = NULL - * 232(%r5) %f31 = 0 - * ... - * 20(%r5) condition register = 0 - * 0(%r5) frame pointer - */ -" li %r9, (240-20)/4+1 \n" -" addi %r5, %r4, -16 # allocate first stack frame \n" -" li %r0, 0 \n" -" stwu %r5, -240(%r5) # allocate second stack frame \n" -" li %r8, 20 \n" -" mtctr %r9 # loop %r9 times \n" -"2: # loop to store zero to 20(%r5) through 240(%r5) \n" -" stwx %r0, %r5, %r8 \n" -" addi %r8, %r8, 4 # index += 4 \n" -" bdnz+ 2b # ctr -= 1, branch if nonzero \n" -" \n" -" stw %r31, 244(%r5) # return address = coentry \n" -" stw %r5, 0(%r3) # save stack pointer \n" -" \n" -" lwz %r0, 20(%r1) # get return address \n" -" lwz %r31, 12(%r1) # restore %r31 \n" -" lwz %r30, 8(%r1) # restore %r30 \n" -" mtlr %r0 # restore return address \n" -" addi %r1, %r1, 16 # free stack frame \n" -" blr # return \n" -" \n" -"1: b abort@plt # branch 1f to abort \n" -" .size co_create, . - co_create \n" -" \n" -/* - * void co_delete(cothread_t) => void free(void *) - */ -" .globl co_delete \n" -" .type co_delete, @function \n" -"co_delete: \n" -" b free@plt \n" -" \n" -); - -#ifdef __cplusplus -} -#endif diff --git a/ppc.c b/ppc.c new file mode 100644 index 00000000..a6028fdb --- /dev/null +++ b/ppc.c @@ -0,0 +1,407 @@ +/* + libco.ppc (2010-10-17) + author: blargg + license: public domain +*/ + +/* PowerPC 32/64 using embedded or external asm, with optional +floating-point and AltiVec save/restore */ + +#define LIBCO_C +#include "libco.h" +#include +#include +#include + +#define LIBCO_MPROTECT (__unix__ && !LIBCO_PPC_ASM) + +#if LIBCO_MPROTECT + #include + #include +#endif + +/* State format (offsets in 32-bit words) + ++0 Pointer to swap code + Rest of function descriptor for entry function ++8 PC ++10 SP + Special regs + GPRs + FPRs + VRs + stack +*/ + +enum { state_size = 1024 }; +enum { above_stack = 2048 }; +enum { stack_align = 256 }; + +static thread_local cothread_t co_active_handle = 0; + +/**** Determine environment ****/ + +#define LIBCO_PPC64 (_ARCH_PPC64 || __PPC64__ || __ppc64__ || __powerpc64__) + +/* Whether function calls are indirect through a descriptor, +or are directly to function */ +#ifndef LIBCO_PPCDESC + #if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || LIBCO_PPC64) + #define LIBCO_PPCDESC 1 + #endif +#endif + +#ifdef LIBCO_PPC_ASM + + #ifdef __cplusplus + extern "C" + #endif + + /* Swap code is in ppc.S */ + void co_swap_asm( cothread_t, cothread_t ); + #define CO_SWAP_ASM( x, y ) co_swap_asm( x, y ) + +#else + +/* Swap code is here in array. Please leave dieassembly comments, +as they make it easy to see what it does, and reorder instructions +if one wants to see whether that improves performance. */ +static const uint32_t libco_ppc_code [] = { +#if LIBCO_PPC64 + 0x7d000026, /* mfcr r8 */ + 0xf8240028, /* std r1,40(r4) */ + 0x7d2802a6, /* mflr r9 */ + 0xf9c40048, /* std r14,72(r4) */ + 0xf9e40050, /* std r15,80(r4) */ + 0xfa040058, /* std r16,88(r4) */ + 0xfa240060, /* std r17,96(r4) */ + 0xfa440068, /* std r18,104(r4) */ + 0xfa640070, /* std r19,112(r4) */ + 0xfa840078, /* std r20,120(r4) */ + 0xfaa40080, /* std r21,128(r4) */ + 0xfac40088, /* std r22,136(r4) */ + 0xfae40090, /* std r23,144(r4) */ + 0xfb040098, /* std r24,152(r4) */ + 0xfb2400a0, /* std r25,160(r4) */ + 0xfb4400a8, /* std r26,168(r4) */ + 0xfb6400b0, /* std r27,176(r4) */ + 0xfb8400b8, /* std r28,184(r4) */ + 0xfba400c0, /* std r29,192(r4) */ + 0xfbc400c8, /* std r30,200(r4) */ + 0xfbe400d0, /* std r31,208(r4) */ + 0xf9240020, /* std r9,32(r4) */ + 0xe8e30020, /* ld r7,32(r3) */ + 0xe8230028, /* ld r1,40(r3) */ + 0x48000009, /* bl 1 */ + 0x7fe00008, /* trap */ + 0x91040030,/*1:stw r8,48(r4) */ + 0x80c30030, /* lwz r6,48(r3) */ + 0x7ce903a6, /* mtctr r7 */ + 0xe9c30048, /* ld r14,72(r3) */ + 0xe9e30050, /* ld r15,80(r3) */ + 0xea030058, /* ld r16,88(r3) */ + 0xea230060, /* ld r17,96(r3) */ + 0xea430068, /* ld r18,104(r3) */ + 0xea630070, /* ld r19,112(r3) */ + 0xea830078, /* ld r20,120(r3) */ + 0xeaa30080, /* ld r21,128(r3) */ + 0xeac30088, /* ld r22,136(r3) */ + 0xeae30090, /* ld r23,144(r3) */ + 0xeb030098, /* ld r24,152(r3) */ + 0xeb2300a0, /* ld r25,160(r3) */ + 0xeb4300a8, /* ld r26,168(r3) */ + 0xeb6300b0, /* ld r27,176(r3) */ + 0xeb8300b8, /* ld r28,184(r3) */ + 0xeba300c0, /* ld r29,192(r3) */ + 0xebc300c8, /* ld r30,200(r3) */ + 0xebe300d0, /* ld r31,208(r3) */ + 0x7ccff120, /* mtcr r6 */ +#else + 0x7d000026, /* mfcr r8 */ + 0x90240028, /* stw r1,40(r4) */ + 0x7d2802a6, /* mflr r9 */ + 0x91a4003c, /* stw r13,60(r4) */ + 0x91c40040, /* stw r14,64(r4) */ + 0x91e40044, /* stw r15,68(r4) */ + 0x92040048, /* stw r16,72(r4) */ + 0x9224004c, /* stw r17,76(r4) */ + 0x92440050, /* stw r18,80(r4) */ + 0x92640054, /* stw r19,84(r4) */ + 0x92840058, /* stw r20,88(r4) */ + 0x92a4005c, /* stw r21,92(r4) */ + 0x92c40060, /* stw r22,96(r4) */ + 0x92e40064, /* stw r23,100(r4) */ + 0x93040068, /* stw r24,104(r4) */ + 0x9324006c, /* stw r25,108(r4) */ + 0x93440070, /* stw r26,112(r4) */ + 0x93640074, /* stw r27,116(r4) */ + 0x93840078, /* stw r28,120(r4) */ + 0x93a4007c, /* stw r29,124(r4) */ + 0x93c40080, /* stw r30,128(r4) */ + 0x93e40084, /* stw r31,132(r4) */ + 0x91240020, /* stw r9,32(r4) */ + 0x80e30020, /* lwz r7,32(r3) */ + 0x80230028, /* lwz r1,40(r3) */ + 0x48000009, /* bl 1 */ + 0x7fe00008, /* trap */ + 0x91040030,/*1:stw r8,48(r4) */ + 0x80c30030, /* lwz r6,48(r3) */ + 0x7ce903a6, /* mtctr r7 */ + 0x81a3003c, /* lwz r13,60(r3) */ + 0x81c30040, /* lwz r14,64(r3) */ + 0x81e30044, /* lwz r15,68(r3) */ + 0x82030048, /* lwz r16,72(r3) */ + 0x8223004c, /* lwz r17,76(r3) */ + 0x82430050, /* lwz r18,80(r3) */ + 0x82630054, /* lwz r19,84(r3) */ + 0x82830058, /* lwz r20,88(r3) */ + 0x82a3005c, /* lwz r21,92(r3) */ + 0x82c30060, /* lwz r22,96(r3) */ + 0x82e30064, /* lwz r23,100(r3) */ + 0x83030068, /* lwz r24,104(r3) */ + 0x8323006c, /* lwz r25,108(r3) */ + 0x83430070, /* lwz r26,112(r3) */ + 0x83630074, /* lwz r27,116(r3) */ + 0x83830078, /* lwz r28,120(r3) */ + 0x83a3007c, /* lwz r29,124(r3) */ + 0x83c30080, /* lwz r30,128(r3) */ + 0x83e30084, /* lwz r31,132(r3) */ + 0x7ccff120, /* mtcr r6 */ +#endif + +#ifndef LIBCO_PPC_NOFP + 0xd9c400e0, /* stfd f14,224(r4) */ + 0xd9e400e8, /* stfd f15,232(r4) */ + 0xda0400f0, /* stfd f16,240(r4) */ + 0xda2400f8, /* stfd f17,248(r4) */ + 0xda440100, /* stfd f18,256(r4) */ + 0xda640108, /* stfd f19,264(r4) */ + 0xda840110, /* stfd f20,272(r4) */ + 0xdaa40118, /* stfd f21,280(r4) */ + 0xdac40120, /* stfd f22,288(r4) */ + 0xdae40128, /* stfd f23,296(r4) */ + 0xdb040130, /* stfd f24,304(r4) */ + 0xdb240138, /* stfd f25,312(r4) */ + 0xdb440140, /* stfd f26,320(r4) */ + 0xdb640148, /* stfd f27,328(r4) */ + 0xdb840150, /* stfd f28,336(r4) */ + 0xdba40158, /* stfd f29,344(r4) */ + 0xdbc40160, /* stfd f30,352(r4) */ + 0xdbe40168, /* stfd f31,360(r4) */ + 0xc9c300e0, /* lfd f14,224(r3) */ + 0xc9e300e8, /* lfd f15,232(r3) */ + 0xca0300f0, /* lfd f16,240(r3) */ + 0xca2300f8, /* lfd f17,248(r3) */ + 0xca430100, /* lfd f18,256(r3) */ + 0xca630108, /* lfd f19,264(r3) */ + 0xca830110, /* lfd f20,272(r3) */ + 0xcaa30118, /* lfd f21,280(r3) */ + 0xcac30120, /* lfd f22,288(r3) */ + 0xcae30128, /* lfd f23,296(r3) */ + 0xcb030130, /* lfd f24,304(r3) */ + 0xcb230138, /* lfd f25,312(r3) */ + 0xcb430140, /* lfd f26,320(r3) */ + 0xcb630148, /* lfd f27,328(r3) */ + 0xcb830150, /* lfd f28,336(r3) */ + 0xcba30158, /* lfd f29,344(r3) */ + 0xcbc30160, /* lfd f30,352(r3) */ + 0xcbe30168, /* lfd f31,360(r3) */ +#endif + +#ifdef __ALTIVEC__ + 0x7ca042a6, /* mfvrsave r5 */ + 0x39040180, /* addi r8,r4,384 */ + 0x39240190, /* addi r9,r4,400 */ + 0x70a00fff, /* andi. r0,r5,4095 */ + 0x90a40034, /* stw r5,52(r4) */ + 0x4182005c, /* beq- 2 */ + 0x7e8041ce, /* stvx v20,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ea049ce, /* stvx v21,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7ec041ce, /* stvx v22,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ee049ce, /* stvx v23,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f0041ce, /* stvx v24,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f2049ce, /* stvx v25,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f4041ce, /* stvx v26,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f6049ce, /* stvx v27,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f8041ce, /* stvx v28,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7fa049ce, /* stvx v29,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7fc041ce, /* stvx v30,r0,r8 */ + 0x7fe049ce, /* stvx v31,r0,r9 */ + 0x80a30034,/*2:lwz r5,52(r3) */ + 0x39030180, /* addi r8,r3,384 */ + 0x39230190, /* addi r9,r3,400 */ + 0x70a00fff, /* andi. r0,r5,4095 */ + 0x7ca043a6, /* mtvrsave r5 */ + 0x4d820420, /* beqctr */ + 0x7e8040ce, /* lvx v20,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ea048ce, /* lvx v21,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7ec040ce, /* lvx v22,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ee048ce, /* lvx v23,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f0040ce, /* lvx v24,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f2048ce, /* lvx v25,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f4040ce, /* lvx v26,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f6048ce, /* lvx v27,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f8040ce, /* lvx v28,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7fa048ce, /* lvx v29,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7fc040ce, /* lvx v30,r0,r8 */ + 0x7fe048ce, /* lvx v31,r0,r9 */ +#endif + + 0x4e800420, /* bctr */ +}; + + #if LIBCO_PPCDESC + /* Function call goes through indirect descriptor */ + #define CO_SWAP_ASM( x, y ) \ + ((void (*)( cothread_t, cothread_t )) (uintptr_t) x)( x, y ) + #else + /* Function call goes directly to code */ + #define CO_SWAP_ASM( x, y ) \ + ((void (*)( cothread_t, cothread_t )) (uintptr_t) libco_ppc_code)( x, y ) + #endif + +#endif + +static uint32_t* co_create_( unsigned size, uintptr_t entry ) +{ + uint32_t* t = (uint32_t*) malloc( size ); + + (void) entry; + + #if LIBCO_PPCDESC + if ( t ) + { + /* Copy entry's descriptor */ + memcpy( t, (void*) entry, sizeof (void*) * 3 ); + + /* Set function pointer to swap routine */ + #ifdef LIBCO_PPC_ASM + *(const void**) t = *(void**) &co_swap_asm; + #else + *(const void**) t = libco_ppc_code; + #endif + } + #endif + + return t; +} + +cothread_t co_create( unsigned int size, void (*entry_)( void ) ) +{ + uintptr_t entry = (uintptr_t) entry_; + uint32_t* t = NULL; + + /* Be sure main thread was successfully allocated */ + if ( co_active() ) + { + size += state_size + above_stack + stack_align; + t = co_create_( size, entry ); + } + + if ( t ) + { + uintptr_t sp; + int shift; + + /* Save current registers into new thread, so that any special ones will + have proper values when thread is begun */ + CO_SWAP_ASM( t, t ); + + #if LIBCO_PPCDESC + /* Get real address */ + entry = (uintptr_t) *(void**) entry; + #endif + + /* Put stack near end of block, and align */ + sp = (uintptr_t) t + size - above_stack; + sp -= sp % stack_align; + + /* On PPC32, we save and restore GPRs as 32 bits. For PPC64, we + save and restore them as 64 bits, regardless of the size the ABI + uses. So, we manually write pointers at the proper size. We always + save and restore at the same address, and since PPC is big-endian, + we must put the low byte first on PPC32. */ + + /* If uintptr_t is 32 bits, >>32 is undefined behavior, so we do two shifts + and don't have to care how many bits uintptr_t is. */ + #if LIBCO_PPC64 + shift = 16; + #else + shift = 0; + #endif + + /* Set up so entry will be called on next swap */ + t [8] = (uint32_t) (entry >> shift >> shift); + t [9] = (uint32_t) entry; + + t [10] = (uint32_t) (sp >> shift >> shift); + t [11] = (uint32_t) sp; + } + + return t; +} + +void co_delete( cothread_t t ) +{ + free( t ); +} + +static void co_init_( void ) +{ + #if LIBCO_MPROTECT + /* TODO: pre- and post-pad PPC code so that this doesn't make other + data executable and writable */ + long page_size = sysconf( _SC_PAGESIZE ); + if ( page_size > 0 ) + { + uintptr_t align = page_size; + uintptr_t begin = (uintptr_t) libco_ppc_code; + uintptr_t end = begin + sizeof libco_ppc_code; + + /* Align beginning and end */ + end += align - 1; + end -= end % align; + begin -= begin % align; + + mprotect( (void*) begin, end - begin, PROT_READ | PROT_WRITE | PROT_EXEC ); + } + #endif + + co_active_handle = co_create_( state_size, (uintptr_t) &co_switch ); +} + +cothread_t co_active() +{ + if ( !co_active_handle ) + co_init_(); + + return co_active_handle; +} + +void co_switch( cothread_t t ) +{ + cothread_t old = co_active_handle; + co_active_handle = t; + + CO_SWAP_ASM( t, old ); +} diff --git a/ppc.s b/ppc.s deleted file mode 100644 index d7f6b758..00000000 --- a/ppc.s +++ /dev/null @@ -1,478 +0,0 @@ -;***** -;libco.ppc (2007-11-29) -;author: Vas Crabb -;license: public domain -; -;cross-platform PowerPC implementation of libco -;special thanks to byuu for writing the original version -; -;[ABI compatibility] -;- gcc; mac os x; ppc -; -;[nonvolatile registers] -;- GPR1, GPR13 - GPR31 -;- FPR14 - FPR31 -;- V20 - V31 -;- VRSAVE, CR2 - CR4 -; -;[volatile registers] -;- GPR0, GPR2 - GPR12 -;- FPR0 - FPR13 -;- V0 - V19 -;- LR, CTR, XER, CR0, CR1, CR5 - CR7 -;***** - - -;Declare some target-specific stuff - - .section __TEXT,__text,regular,pure_instructions - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .machine ppc - - -;Constants - - .cstring - .align 2 - -_sysctl_altivec: - .ascii "hw.optional.altivec\0" - - -;Declare space for variables - -.lcomm _co_environ,4,2 ;bit 0 = initialised, bit 1 = have Altivec/VMX -.lcomm _co_primary_buffer,1024,2 ;buffer (will be zeroed by loader) - - .data - .align 2 - -_co_active_context: - .long _co_primary_buffer - - - .text - .align 2 - - -;Declare exported names - -.globl _co_active -.globl _co_create -.globl _co_delete -.globl _co_switch - - -;***** -;extern "C" cothread_t co_active(); -;return = GPR3 -;***** - -_co_active: - mflr r0 ;GPR0 = return address - bcl 20,31,L_co_active$spb -L_co_active$spb: - mflr r2 ;GPR2 set for position-independance - addis r3,r2,ha16(_co_active_context-L_co_active$spb) ;get value in GPR3 - lwz r3,lo16(_co_active_context-L_co_active$spb)(r3) - mtlr r0 ;LR = return address - blr ;return - - -;***** -;extern "C" cothread_t co_create(unsigned int heapsize, void (*coentry)()); -;GPR3 = heapsize -;GPR4 = coentry -;return = GPR3 -;***** - -_co_create: - mflr r0 ;GPR0 = return address - stmw r30,-8(r1) ;save GPR30 and GPR31 - stw r0,8(r1) ;save return address - stwu r1,-(2*4+16+24)(r1) ;allocate 16 bytes for locals/parameters - -;create heap space (stack + register storage) - addi r31,r3,1024-24 ;subtract space for linkage - mr r30,r4 ;GPR30 = coentry - addi r3,r3,1024 ;allocate extra memory for contextual info - bl L_malloc$stub ;GPR3 = malloc(heapsize + 1024) - add r4,r3,r31 ;GPR4 points to top-of-stack - rlwinm r5,r4,0,0,27 ;force 16-byte alignment - -;store thread entry point + registers, so that first call to co_switch will execute coentry - stw r30,8(r5) ;store entry point - addi r6,0,2+19+18*2+12*4+1 ;clear for CR, old GPR1, 19 GPRs, 18 FPRs, 12 VRs, VRSAVE - addi r0,0,0 - addi r7,0,4 ;start at 4(GPR5) - mtctr r6 -L_co_create$clear_loop: - stwx r0,r5,r7 ;clear a word - addi r7,r7,-4 ;increment pointer - bdnz L_co_create$clear_loop ;loop - stwu r5,-448(r5) ;store top of stack - -;initialize context memory heap and return - stw r5,0(r3) ;*cothread_t = stack heap pointer (GPR1) - lwz r1,0(r1) ;deallocate stack frame - lwz r8,8(r1) ;fetch return address - lmw r30,-8(r1) ;restore GPR30 and GPR31 - mtlr r8 ;return address in LR - blr ;return - - -;***** -;extern "C" void co_delete(cothread_t cothread); -;GPR3 = cothread -;***** - -_co_delete: - b L_free$stub ;free(GPR3) - - -;***** -;extern "C" void co_switch(cothread_t cothread); -;GPR3 = cothread -;***** -; -;Frame looks like: -; -;Old New Value -; 8(r1) 456(r1) Saved LR -; 4(r1) 452(r1) Saved CR -; 0(r1) 448(r1) Old GPR1 -; -4(r1) 444(r1) Saved GPR31 -; -8(r1) 440(r1) Saved GPR30 -;... ... ... -; -72(r1) 376(r1) Saved GPR14 -; -76(r1) 372(r1) Saved GPR13 -; -80(r1) 368(r1) Saved VRSAVE -; -84(r1) 364(r1) +++ -; -88(r1) 360(r1) Saved FPR31 -; -92(r1) 356(r1) +++ -; -96(r1) 352(r1) Saved FPR30 -;... ... ... -;-212(r1) 236(r1) +++ -;-216(r1) 232(r1) Saved FPR15 -;-220(r1) 228(r1) +++ -;-224(r1) 224(r1) Saved FPR14 -;-228(r1) 220(r1) +++ value -;-232(r1) 216(r1) +++ len -;-236(r1) 212(r1) +++ -;-240(r1) 208(r1) Saved VR31 -;-244(r1) 204(r1) +++ -;-248(r1) 200(r1) +++ -;-252(r1) 196(r1) +++ -;-256(r1) 192(r1) Saved VR30 -;... ... ... -;-388(r1) 60(r1) +++ -;-392(r1) 56(r1) +++ -;-396(r1) 52(r1) +++ -;-400(r1) 48(r1) Saved VR21 -;-404(r1) 44(r1) +++ -;-408(r1) 40(r1) +++ Param 5 (GPR7) -;-412(r1) 36(r1) +++ Param 4 (GPR6) -;-416(r1) 32(r1) Saved VR20 Param 3 (GPR5) -;-420(r1) 28(r1) - Param 2 (GPR4) -;-424(r1) 24(r1) - Param 1 (GPR3) -;-428(r1) 20(r1) - Reserved -;-432(r1) 16(r1) - Reserved -;-436(r1) 12(r1) - Reserved -;-440(r1) 8(r1) - New LR -;-444(r1) 4(r1) - New CR -;-448(r1) 0(r1) Saved GPR1 - - -_co_switch: - stmw r13,-76(r1) ;save preserved GPRs - stfd f14,-224(r1) ;save preserved FPRs - stfd f15,-216(r1) - stfd f16,-208(r1) - stfd f17,-200(r1) - stfd f18,-192(r1) - stfd f19,-184(r1) - stfd f20,-176(r1) - stfd f21,-168(r1) - stfd f22,-160(r1) - stfd f23,-152(r1) - stfd f24,-144(r1) - stfd f25,-136(r1) - stfd f26,-128(r1) - stfd f27,-120(r1) - stfd f28,-112(r1) - stfd f29,-104(r1) - stfd f30,-96(r1) - stfd f31,-88(r1) - mflr r0 ;save return address - stw r0,8(r1) - mfcr r2 ;save condition codes - stw r2,4(r1) - stwu r1,-448(r1) ;create stack frame (save 19 GPRs, 18 FRPs, 12 VRs, VRSAVE) - - mr r30,r3 ;save new context pointer - bcl 20,31,L_co_switch$spb ;get address of co_active_context -L_co_switch$spb: - mflr r31 - - addis r29,r31,ha16(_co_environ-L_co_switch$spb) ;get environment flags - lwz r8,lo16(_co_environ-L_co_switch$spb)(r29) - andis. r9,r8,0x8000 ;is it initialised? - bne+ L_co_switch$initialised - - addi r0,0,4 ;len = sizeof(int) - stw r0,216(r1) - addis r3,r31,ha16(_sysctl_altivec-L_co_switch$spb) ;GPR3 = "hw.optional.altivec" - addi r3,r3,lo16(_sysctl_altivec-L_co_switch$spb) - addi r4,r1,220 ;GPR4 = &value - addi r5,r1,216 ;GPR5 = &len - addi r6,0,0 ;newp = 0 - addi r7,0,0 ;newlen = 0 - bl L_sysctlbyname$stub ;call sysctlbyname - lwz r2,220(r1) ;fetch result - addis r8,0,0x8000 ;set initialised bit - cmpwi cr5,r3,0 ;assume error means not present - cmpwi cr6,r2,0 ;test result - blt- cr5,L_co_switch$store_environ - beq cr6,L_co_switch$store_environ - oris r8,r8,0x4000 ;set the flag to say we have it! -L_co_switch$store_environ: - stw r8,lo16(_co_environ-L_co_switch$spb)(r29) ;store environment flags -L_co_switch$initialised: - - andis. r10,r8,0x4000 ;do we have Altivec/VMX? - beq L_co_switch$save_no_vmx - mfspr r11,256 ;save VRSAVE - andi. r0,r11,0x0FFF ;short-circuit if it's zero - stw r11,368(r1) - beq L_co_switch$save_no_vmx - andi. r0,r11,0x0800 ;check bit 20 - addi r2,0,32 ;starting index - beq L_co_switch$save_skip_vr20 - stvx v20,r1,r2 ;save VR20 -L_co_switch$save_skip_vr20: - addi r2,r2,16 ;stride - andi. r0,r11,0x0400 ;check bit 21 - beq L_co_switch$save_skip_vr21 - stvx v21,r1,r2 ;save VR21 -L_co_switch$save_skip_vr21: - addi r2,r2,16 ;stride - andi. r0,r11,0x0200 ;check bit 22 - beq L_co_switch$save_skip_vr22 - stvx v22,r1,r2 ;save VR22 -L_co_switch$save_skip_vr22: - addi r2,r2,16 ;stride - andi. r0,r11,0x0100 ;check bit 23 - beq L_co_switch$save_skip_vr23 - stvx v23,r1,r2 ;save VR23 -L_co_switch$save_skip_vr23: - addi r2,r2,16 ;stride - andi. r0,r11,0x0080 ;check bit 24 - beq L_co_switch$save_skip_vr24 - stvx v24,r1,r2 ;save VR24 -L_co_switch$save_skip_vr24: - addi r2,r2,16 ;stride - andi. r0,r11,0x0040 ;check bit 25 - beq L_co_switch$save_skip_vr25 - stvx v25,r1,r2 ;save VR25 -L_co_switch$save_skip_vr25: - addi r2,r2,16 ;stride - andi. r0,r11,0x0020 ;check bit 26 - beq L_co_switch$save_skip_vr26 - stvx v26,r1,r2 ;save VR26 -L_co_switch$save_skip_vr26: - addi r2,r2,16 ;stride - andi. r0,r11,0x0010 ;check bit 27 - beq L_co_switch$save_skip_vr27 - stvx v27,r1,r2 ;save VR27 -L_co_switch$save_skip_vr27: - addi r2,r2,16 ;stride - andi. r0,r11,0x0008 ;check bit 28 - beq L_co_switch$save_skip_vr28 - stvx v28,r1,r2 ;save VR28 -L_co_switch$save_skip_vr28: - addi r2,r2,16 ;stride - andi. r0,r11,0x0004 ;check bit 29 - beq L_co_switch$save_skip_vr29 - stvx v29,r1,r2 ;save VR29 -L_co_switch$save_skip_vr29: - addi r2,r2,16 ;stride - andi. r0,r11,0x0002 ;check bit 30 - beq L_co_switch$save_skip_vr30 - stvx v30,r1,r2 ;save VR30 -L_co_switch$save_skip_vr30: - addi r2,r2,16 ;stride - andi. r0,r11,0x0001 ;check bit 31 - beq L_co_switch$save_skip_vr31 - stvx v31,r1,r2 ;save VR31 -L_co_switch$save_skip_vr31: -L_co_switch$save_no_vmx: - - addis r4,r31,ha16(_co_active_context-L_co_switch$spb) ;save current context - lwz r5,lo16(_co_active_context-L_co_switch$spb)(r4) - stw r30,lo16(_co_active_context-L_co_switch$spb)(r4);set new context - stw r1,0(r5) ;save current stack pointer - lwz r1,0(r30) ;get new stack pointer - - andis. r10,r8,0x4000 ;do we have Altivec/VMX? - beq L_co_switch$restore_no_vmx - lwz r11,368(r1) ;restore VRSAVE - andi. r0,r11,0x0FFF ;short-circuit if it's zero - mtspr 256,r11 - beq L_co_switch$restore_no_vmx - andi. r0,r11,0x0800 ;check bit 20 - addi r2,0,32 ;starting index - beq L_co_switch$restore_skip_vr20 - lvx v20,r1,r2 ;restore VR20 -L_co_switch$restore_skip_vr20: - addi r2,r2,16 ;stride - andi. r0,r11,0x0400 ;check bit 21 - beq L_co_switch$restore_skip_vr21 - lvx v21,r1,r2 ;restore VR21 -L_co_switch$restore_skip_vr21: - addi r2,r2,16 ;stride - andi. r0,r11,0x0200 ;check bit 22 - beq L_co_switch$restore_skip_vr22 - lvx v22,r1,r2 ;restore VR22 -L_co_switch$restore_skip_vr22: - addi r2,r2,16 ;stride - andi. r0,r11,0x0100 ;check bit 23 - beq L_co_switch$restore_skip_vr23 - lvx v23,r1,r2 ;restore VR23 -L_co_switch$restore_skip_vr23: - addi r2,r2,16 ;stride - andi. r0,r11,0x0080 ;check bit 24 - beq L_co_switch$restore_skip_vr24 - lvx v24,r1,r2 ;restore VR24 -L_co_switch$restore_skip_vr24: - addi r2,r2,16 ;stride - andi. r0,r11,0x0040 ;check bit 25 - beq L_co_switch$restore_skip_vr25 - lvx v25,r1,r2 ;restore VR25 -L_co_switch$restore_skip_vr25: - addi r2,r2,16 ;stride - andi. r0,r11,0x0020 ;check bit 26 - beq L_co_switch$restore_skip_vr26 - lvx v26,r1,r2 ;restore VR26 -L_co_switch$restore_skip_vr26: - addi r2,r2,16 ;stride - andi. r0,r11,0x0010 ;check bit 27 - beq L_co_switch$restore_skip_vr27 - lvx v27,r1,r2 ;restore VR27 -L_co_switch$restore_skip_vr27: - addi r2,r2,16 ;stride - andi. r0,r11,0x0008 ;check bit 28 - beq L_co_switch$restore_skip_vr28 - lvx v28,r1,r2 ;restore VR28 -L_co_switch$restore_skip_vr28: - addi r2,r2,16 ;stride - andi. r0,r11,0x0004 ;check bit 29 - beq L_co_switch$restore_skip_vr29 - lvx v29,r1,r2 ;restore VR29 -L_co_switch$restore_skip_vr29: - addi r2,r2,16 ;stride - andi. r0,r11,0x0002 ;check bit 30 - beq L_co_switch$restore_skip_vr30 - lvx v30,r1,r2 ;restore VR30 -L_co_switch$restore_skip_vr30: - addi r2,r2,16 ;stride - andi. r0,r11,0x0001 ;check bit 31 - beq L_co_switch$restore_skip_vr31 - lvx v31,r1,r2 ;restore VR31 -L_co_switch$restore_skip_vr31: -L_co_switch$restore_no_vmx: - - lwz r1,0(r1) ;deallocate stack frame - lwz r6,8(r1) ;return address in GPR6 - lwz r7,4(r1) ;condition codes in GPR7 - addi r0,0,0 ;make thread main crash if it returns - lmw r13,-76(r1) ;restore preserved GPRs - lfd f14,-224(r1) ;restore preserved FPRs - lfd f15,-216(r1) - lfd f16,-208(r1) - lfd f17,-200(r1) - lfd f18,-192(r1) - lfd f19,-184(r1) - lfd f20,-176(r1) - lfd f21,-168(r1) - lfd f22,-160(r1) - lfd f23,-152(r1) - lfd f24,-144(r1) - lfd f25,-136(r1) - lfd f26,-128(r1) - lfd f27,-120(r1) - lfd f28,-112(r1) - lfd f29,-104(r1) - lfd f30,-96(r1) - lfd f31,-88(r1) - mtlr r0 - mtctr r6 ;restore return address - mtcrf 32,r7 ;restore preserved condition codes - mtcrf 16,r7 - mtcrf 8,r7 - bctr ;return - - - -;Import external functions - - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .align 5 -L_malloc$stub: - .indirect_symbol _malloc - mflr r0 - bcl 20,31,L_malloc$spb -L_malloc$spb: - mflr r11 - addis r11,r11,ha16(L_malloc$lazy_ptr-L_malloc$spb) - mtlr r0 - lwzu r12,lo16(L_malloc$lazy_ptr-L_malloc$spb)(r11) - mtctr r12 - bctr - .lazy_symbol_pointer -L_malloc$lazy_ptr: - .indirect_symbol _malloc - .long dyld_stub_binding_helper - - - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .align 5 -L_free$stub: - .indirect_symbol _free - mflr r0 - bcl 20,31,L_free$spb -L_free$spb: - mflr r11 - addis r11,r11,ha16(L_free$lazy_ptr-L_free$spb) - mtlr r0 - lwzu r12,lo16(L_free$lazy_ptr-L_free$spb)(r11) - mtctr r12 - bctr - .lazy_symbol_pointer -L_free$lazy_ptr: - .indirect_symbol _free - .long dyld_stub_binding_helper - - - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .align 5 -L_sysctlbyname$stub: - .indirect_symbol _sysctlbyname - mflr r0 - bcl 20,31,L_sysctlbyname$spb -L_sysctlbyname$spb: - mflr r11 - addis r11,r11,ha16(L_sysctlbyname$lazy_ptr-L_sysctlbyname$spb) - mtlr r0 - lwzu r12,lo16(L_sysctlbyname$lazy_ptr-L_sysctlbyname$spb)(r11) - mtctr r12 - bctr - .lazy_symbol_pointer -L_sysctlbyname$lazy_ptr: - .indirect_symbol _sysctlbyname - .long dyld_stub_binding_helper - - -;This needs to be here! - - .subsections_via_symbols - diff --git a/ppc64.s b/ppc64.s deleted file mode 100644 index 2fb048d7..00000000 --- a/ppc64.s +++ /dev/null @@ -1,513 +0,0 @@ -;***** -;libco.ppc64 (2007-12-05) -;author: Vas Crabb -;license: public domain -; -;cross-platform 64-bit PowerPC implementation of libco -;special thanks to byuu for writing the original version -; -;[ABI compatibility] -;- gcc; mac os x; ppc64 -; -;[nonvolatile registers] -;- GPR1, GPR13 - GPR31 -;- FPR14 - FPR31 -;- V20 - V31 -;- VRSAVE, CR2 - CR4 -; -;[volatile registers] -;- GPR0, GPR2 - GPR12 -;- FPR0 - FPR13 -;- V0 - V19 -;- LR, CTR, XER, CR0, CR1, CR5 - CR7 -;***** - - -;Declare some target-specific stuff - - .section __TEXT,__text,regular,pure_instructions - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .machine ppc64 - - -;Constants - - .cstring - .align 3 - -_sysctl_altivec: - .ascii "hw.optional.altivec\0" - - -;Declare space for variables - -.lcomm _co_environ,4,2 ;bit 0 = initialised, bit 1 = have Altivec/VMX -.lcomm _co_primary_buffer,1024,3 ;buffer (will be zeroed by loader) - - .data - .align 3 - -_co_active_context: - .quad _co_primary_buffer - - - .text - .align 2 - - -;Declare exported names - -.globl _co_active -.globl _co_create -.globl _co_delete -.globl _co_switch - - -;***** -;extern "C" cothread_t co_active(); -;return = GPR3 -;***** - -_co_active: - mflr r0 ;GPR0 = return address - bcl 20,31,L_co_active$spb -L_co_active$spb: - mflr r2 ;GPR2 set for position-independance - addis r3,r2,ha16(_co_active_context-L_co_active$spb) ;get value in GPR3 - ld r3,lo16(_co_active_context-L_co_active$spb)(r3) - mtlr r0 ;LR = return address - blr ;return - - -;***** -;extern "C" cothread_t co_create(unsigned int heapsize, void (*coentry)()); -;GPR3 = heapsize -;GPR4 = coentry -;return = GPR3 -;***** - -_co_create: - mflr r0 ;GPR0 = return address - std r30,-16(r1) ;save GPR30 and GPR31 - std r31,-8(r1) - std r0,16(r1) ;save return address - stdu r1,-(2*8+16+48)(r1) ;allocate 16 bytes for locals/parameters - -;create heap space (stack + register storage) - addi r31,r3,1024-48 ;subtract space for linkage - mr r30,r4 ;GPR30 = coentry - addi r3,r3,1024 ;allocate extra memory for contextual info - bl L_malloc$stub ;GPR3 = malloc(heapsize + 1024) - add r4,r3,r31 ;GPR4 points to top-of-stack - rldicr r5,r4,0,59 ;force 16-byte alignment - -;store thread entry point + registers, so that first call to co_switch will execute coentry - std r30,16(r5) ;store entry point - addi r6,0,2+19+18+12*2+1 ;clear for CR, old GPR1, 19 GPRs, 18 FPRs, 12 VRs, VRSAVE - addi r0,0,0 - addi r7,0,8 ;start at 8(GPR5) - mtctr r6 -L_co_create$clear_loop: - stdx r0,r5,r7 ;clear a double - addi r7,r7,-8 ;increment pointer - bdnz L_co_create$clear_loop ;loop - stdu r5,-544(r5) ;store top of stack - -;initialize context memory heap and return - addis r9,0,0x8000 ;GPR13 not set (system TLS) - std r5,0(r3) ;*cothread_t = stack heap pointer (GPR1) - stw r9,8(r3) ;this is a flag word - ld r1,0(r1) ;deallocate stack frame - ld r8,16(r1) ;fetch return address - ld r30,-16(r1) ;restore GPR30 and GPR31 - ld r31,-8(r1) - mtlr r8 ;return address in LR - blr ;return - - -;***** -;extern "C" void co_delete(cothread_t cothread); -;GPR3 = cothread -;***** - -_co_delete: - b L_free$stub ;free(GPR3) - - -;***** -;extern "C" void co_switch(cothread_t cothread); -;GPR3 = cothread -;***** -; -;Frame looks like: -; -;Old New Value -; 16(r1) 560(r1) Saved LR -; 8(r1) 552(r1) Saved CR -; 0(r1) 544(r1) Old GPR1 -; -8(r1) 536(r1) Saved GPR31 -; -16(r1) 528(r1) Saved GPR30 -;... ... ... -;-144(r1) 400(r1) Saved GPR14 -;-152(r1) 392(r1) Saved GPR13 -;-160(r1) 384(r1) Saved FPR31 -;-168(r1) 376(r1) Saved FPR30 -;... ... ... -;-288(r1) 256(r1) Saved FPR15 -;-296(r1) 248(r1) Saved FPR14 -;-304(r1) 240(r1) Saved VRSAVE -;-312(r1) 232(r1) +++ value -;-320(r1) 224(r1) Saved VR31 len -;-328(r1) 216(r1) +++ -;-336(r1) 208(r1) Saved VR30 -;... ... ... -;-456(r1) 88(r1) +++ -;-464(r1) 80(r1) Saved VR22 Param 5 (GPR7) -;-472(r1) 72(r1) +++ Param 4 (GPR6) -;-480(r1) 64(r1) Saved VR21 Param 3 (GPR5) -;-488(r1) 56(r1) +++ Param 2 (GPR4) -;-496(r1) 48(r1) Saved VR20 Param 1 (GPR3) -;-504(r1) 40(r1) - Reserved -;-512(r1) 32(r1) - Reserved -;-520(r1) 24(r1) - Reserved -;-528(r1) 16(r1) - New LR -;-536(r1) 8(r1) - New CR -;-544(r1) 0(r1) Saved GPR1 - - -_co_switch: - std r13,-152(r1) ;save preserved GPRs - std r14,-144(r1) - std r15,-136(r1) - std r16,-128(r1) - std r17,-120(r1) - std r18,-112(r1) - std r19,-104(r1) - std r20,-96(r1) - std r21,-88(r1) - std r22,-80(r1) - std r23,-72(r1) - std r24,-64(r1) - std r25,-56(r1) - std r26,-48(r1) - std r27,-40(r1) - std r28,-32(r1) - std r29,-24(r1) - std r30,-16(r1) - std r31,-8(r1) - mflr r0 ;save return address - std r0,16(r1) - mfcr r2 ;save condition codes - stw r2,8(r1) - stdu r1,-544(r1) ;create stack frame (save 19 GPRs, 18 FRPs, 12 VRs, VRSAVE) - stfd f14,248(r1) ;save preserved FPRs - stfd f15,256(r1) - stfd f16,264(r1) - stfd f17,272(r1) - stfd f18,280(r1) - stfd f19,288(r1) - stfd f20,296(r1) - stfd f21,304(r1) - stfd f22,312(r1) - stfd f23,320(r1) - stfd f24,328(r1) - stfd f25,336(r1) - stfd f26,344(r1) - stfd f27,352(r1) - stfd f28,360(r1) - stfd f29,368(r1) - stfd f30,376(r1) - stfd f31,384(r1) - - mr r30,r3 ;save new context pointer - bcl 20,31,L_co_switch$spb ;get address of co_active_context -L_co_switch$spb: - mflr r31 - - addis r29,r31,ha16(_co_environ-L_co_switch$spb) ;get environment flags - lwz r8,lo16(_co_environ-L_co_switch$spb)(r29) - andis. r9,r8,0x8000 ;is it initialised? - bne+ L_co_switch$initialised - - addi r0,0,4 ;len = sizeof(int) - std r0,224(r1) - addis r3,r31,ha16(_sysctl_altivec-L_co_switch$spb) ;GPR3 = "hw.optional.altivec" - addi r3,r3,lo16(_sysctl_altivec-L_co_switch$spb) - addi r4,r1,232 ;GPR4 = &value - addi r5,r1,224 ;GPR5 = &len - addi r6,0,0 ;newp = 0 - addi r7,0,0 ;newlen = 0 - bl L_sysctlbyname$stub ;call sysctlbyname - lwz r2,232(r1) ;fetch result - addis r8,0,0x8000 ;set initialised bit - cmpdi cr5,r3,0 ;assume error means not present - cmpwi cr6,r2,0 ;test result - blt- cr5,L_co_switch$store_environ - beq cr6,L_co_switch$store_environ - oris r8,r8,0x4000 ;set the flag to say we have it! -L_co_switch$store_environ: - stw r8,lo16(_co_environ-L_co_switch$spb)(r29) ;store environment flags -L_co_switch$initialised: - - andis. r10,r8,0x4000 ;do we have Altivec/VMX? - beq L_co_switch$save_no_vmx - mfspr r11,256 ;save VRSAVE - andi. r0,r11,0x0FFF ;short-circuit if it's zero - stw r11,240(r1) - beq L_co_switch$save_no_vmx - andi. r0,r11,0x0800 ;check bit 20 - addi r2,0,48 ;starting index - beq L_co_switch$save_skip_vr20 - stvx v20,r1,r2 ;save VR20 -L_co_switch$save_skip_vr20: - addi r2,r2,16 ;stride - andi. r0,r11,0x0400 ;check bit 21 - beq L_co_switch$save_skip_vr21 - stvx v21,r1,r2 ;save VR21 -L_co_switch$save_skip_vr21: - addi r2,r2,16 ;stride - andi. r0,r11,0x0200 ;check bit 22 - beq L_co_switch$save_skip_vr22 - stvx v22,r1,r2 ;save VR22 -L_co_switch$save_skip_vr22: - addi r2,r2,16 ;stride - andi. r0,r11,0x0100 ;check bit 23 - beq L_co_switch$save_skip_vr23 - stvx v23,r1,r2 ;save VR23 -L_co_switch$save_skip_vr23: - addi r2,r2,16 ;stride - andi. r0,r11,0x0080 ;check bit 24 - beq L_co_switch$save_skip_vr24 - stvx v24,r1,r2 ;save VR24 -L_co_switch$save_skip_vr24: - addi r2,r2,16 ;stride - andi. r0,r11,0x0040 ;check bit 25 - beq L_co_switch$save_skip_vr25 - stvx v25,r1,r2 ;save VR25 -L_co_switch$save_skip_vr25: - addi r2,r2,16 ;stride - andi. r0,r11,0x0020 ;check bit 26 - beq L_co_switch$save_skip_vr26 - stvx v26,r1,r2 ;save VR26 -L_co_switch$save_skip_vr26: - addi r2,r2,16 ;stride - andi. r0,r11,0x0010 ;check bit 27 - beq L_co_switch$save_skip_vr27 - stvx v27,r1,r2 ;save VR27 -L_co_switch$save_skip_vr27: - addi r2,r2,16 ;stride - andi. r0,r11,0x0008 ;check bit 28 - beq L_co_switch$save_skip_vr28 - stvx v28,r1,r2 ;save VR28 -L_co_switch$save_skip_vr28: - addi r2,r2,16 ;stride - andi. r0,r11,0x0004 ;check bit 29 - beq L_co_switch$save_skip_vr29 - stvx v29,r1,r2 ;save VR29 -L_co_switch$save_skip_vr29: - addi r2,r2,16 ;stride - andi. r0,r11,0x0002 ;check bit 30 - beq L_co_switch$save_skip_vr30 - stvx v30,r1,r2 ;save VR30 -L_co_switch$save_skip_vr30: - addi r2,r2,16 ;stride - andi. r0,r11,0x0001 ;check bit 31 - beq L_co_switch$save_skip_vr31 - stvx v31,r1,r2 ;save VR31 -L_co_switch$save_skip_vr31: -L_co_switch$save_no_vmx: - - addis r4,r31,ha16(_co_active_context-L_co_switch$spb) ;save current context - ld r5,lo16(_co_active_context-L_co_switch$spb)(r4) - std r30,lo16(_co_active_context-L_co_switch$spb)(r4);set new context - std r1,0(r5) ;save current stack pointer - ld r1,0(r30) ;get new stack pointer - lwz r12,8(r30) ;have we already set GPR13 (system TLS)? - andis. r0,r12,0x8000 - beq+ L_co_switch$gpr13_set - std r13,392(r1) - xoris r12,r12,0x8000 - stw r12,8(r30) -L_co_switch$gpr13_set: - - andis. r10,r8,0x4000 ;do we have Altivec/VMX? - beq L_co_switch$restore_no_vmx - lwz r11,240(r1) ;restore VRSAVE - andi. r0,r11,0x0FFF ;short-circuit if it's zero - mtspr 256,r11 - beq L_co_switch$restore_no_vmx - andi. r0,r11,0x0800 ;check bit 20 - addi r2,0,48 ;starting index - beq L_co_switch$restore_skip_vr20 - lvx v20,r1,r2 ;restore VR20 -L_co_switch$restore_skip_vr20: - addi r2,r2,16 ;stride - andi. r0,r11,0x0400 ;check bit 21 - beq L_co_switch$restore_skip_vr21 - lvx v21,r1,r2 ;restore VR21 -L_co_switch$restore_skip_vr21: - addi r2,r2,16 ;stride - andi. r0,r11,0x0200 ;check bit 22 - beq L_co_switch$restore_skip_vr22 - lvx v22,r1,r2 ;restore VR22 -L_co_switch$restore_skip_vr22: - addi r2,r2,16 ;stride - andi. r0,r11,0x0100 ;check bit 23 - beq L_co_switch$restore_skip_vr23 - lvx v23,r1,r2 ;restore VR23 -L_co_switch$restore_skip_vr23: - addi r2,r2,16 ;stride - andi. r0,r11,0x0080 ;check bit 24 - beq L_co_switch$restore_skip_vr24 - lvx v24,r1,r2 ;restore VR24 -L_co_switch$restore_skip_vr24: - addi r2,r2,16 ;stride - andi. r0,r11,0x0040 ;check bit 25 - beq L_co_switch$restore_skip_vr25 - lvx v25,r1,r2 ;restore VR25 -L_co_switch$restore_skip_vr25: - addi r2,r2,16 ;stride - andi. r0,r11,0x0020 ;check bit 26 - beq L_co_switch$restore_skip_vr26 - lvx v26,r1,r2 ;restore VR26 -L_co_switch$restore_skip_vr26: - addi r2,r2,16 ;stride - andi. r0,r11,0x0010 ;check bit 27 - beq L_co_switch$restore_skip_vr27 - lvx v27,r1,r2 ;restore VR27 -L_co_switch$restore_skip_vr27: - addi r2,r2,16 ;stride - andi. r0,r11,0x0008 ;check bit 28 - beq L_co_switch$restore_skip_vr28 - lvx v28,r1,r2 ;restore VR28 -L_co_switch$restore_skip_vr28: - addi r2,r2,16 ;stride - andi. r0,r11,0x0004 ;check bit 29 - beq L_co_switch$restore_skip_vr29 - lvx v29,r1,r2 ;restore VR29 -L_co_switch$restore_skip_vr29: - addi r2,r2,16 ;stride - andi. r0,r11,0x0002 ;check bit 30 - beq L_co_switch$restore_skip_vr30 - lvx v30,r1,r2 ;restore VR30 -L_co_switch$restore_skip_vr30: - addi r2,r2,16 ;stride - andi. r0,r11,0x0001 ;check bit 31 - beq L_co_switch$restore_skip_vr31 - lvx v31,r1,r2 ;restore VR31 -L_co_switch$restore_skip_vr31: -L_co_switch$restore_no_vmx: - - lfd f14,248(r1) ;restore preserved FPRs - lfd f15,256(r1) - lfd f16,264(r1) - lfd f17,272(r1) - lfd f18,280(r1) - lfd f19,288(r1) - lfd f20,296(r1) - lfd f21,304(r1) - lfd f22,312(r1) - lfd f23,320(r1) - lfd f24,328(r1) - lfd f25,336(r1) - lfd f26,344(r1) - lfd f27,352(r1) - lfd f28,360(r1) - lfd f29,368(r1) - lfd f30,376(r1) - lfd f31,384(r1) - addi r0,0,0 ;make thread main crash if it returns - ld r1,0(r1) ;deallocate stack frame - ld r6,16(r1) ;return address in GPR6 - lwz r7,8(r1) ;condition codes in GPR7 - ld r13,-152(r1) ;restore preserved GPRs - ld r14,-144(r1) - ld r15,-136(r1) - ld r16,-128(r1) - ld r17,-120(r1) - ld r18,-112(r1) - ld r19,-104(r1) - ld r20,-96(r1) - ld r21,-88(r1) - ld r22,-80(r1) - ld r23,-72(r1) - ld r24,-64(r1) - ld r25,-56(r1) - ld r26,-48(r1) - ld r27,-40(r1) - ld r28,-32(r1) - ld r29,-24(r1) - ld r30,-16(r1) - ld r31,-8(r1) - mtlr r0 - mtctr r6 ;restore return address - mtcrf 32,r7 ;restore preserved condition codes - mtcrf 16,r7 - mtcrf 8,r7 - bctr ;return - - - -;Import external functions - - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .align 5 -L_malloc$stub: - .indirect_symbol _malloc - mflr r0 - bcl 20,31,L_malloc$spb -L_malloc$spb: - mflr r11 - addis r11,r11,ha16(L_malloc$lazy_ptr-L_malloc$spb) - mtlr r0 - ldu r12,lo16(L_malloc$lazy_ptr-L_malloc$spb)(r11) - mtctr r12 - bctr - .lazy_symbol_pointer -L_malloc$lazy_ptr: - .indirect_symbol _malloc - .quad dyld_stub_binding_helper - - - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .align 5 -L_free$stub: - .indirect_symbol _free - mflr r0 - bcl 20,31,L_free$spb -L_free$spb: - mflr r11 - addis r11,r11,ha16(L_free$lazy_ptr-L_free$spb) - mtlr r0 - ldu r12,lo16(L_free$lazy_ptr-L_free$spb)(r11) - mtctr r12 - bctr - .lazy_symbol_pointer -L_free$lazy_ptr: - .indirect_symbol _free - .quad dyld_stub_binding_helper - - - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .align 5 -L_sysctlbyname$stub: - .indirect_symbol _sysctlbyname - mflr r0 - bcl 20,31,L_sysctlbyname$spb -L_sysctlbyname$spb: - mflr r11 - addis r11,r11,ha16(L_sysctlbyname$lazy_ptr-L_sysctlbyname$spb) - mtlr r0 - ldu r12,lo16(L_sysctlbyname$lazy_ptr-L_sysctlbyname$spb)(r11) - mtctr r12 - bctr - .lazy_symbol_pointer -L_sysctlbyname$lazy_ptr: - .indirect_symbol _sysctlbyname - .quad dyld_stub_binding_helper - - -;This needs to be here! - - .subsections_via_symbols - From 648061bb95297cb5427d3deb7929a4cee2723151 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Tue, 28 Dec 2010 12:53:15 +1100 Subject: [PATCH 07/43] Initial commit of bgameboy v000. The source tarball also included empty obj/ and out/ directories which git does not support. byuu says: Project started, so basically everything is new. It's basically a rough skeleton that mimics bsnes project structure. Eventually the src/gameboy folder will be copied into bsnes-official and used by the chip/supergameboy core. The middleware layer (supergameboy/interface) will be merged into a new chip/icd2 folder that will represent direct Super Game Boy emulation in the future. At least, if all goes according to plan. There is a simple GUI that can load ROMs, but do nothing after it. It's not hooked up to ruby yet. There is a basic system class and interface to expose the video/audio/input functions. There is a basic memory bus that doesn't support any MBCs yet. There is a CPU skeleton that only handles easy read/write access to the CPU registers (AF is a really fucked up register.) The core is not hooked up to libco yet, but I intend for it to be, so that I can run the CPU + LCD how I like. If it turns out the LCD+audio is easily enslavable, then I'll probably drop libco and just run it like a regular emulator, using a thread wrapper around it in bsnes only. We'll see. The CPU doesn't actually support any opcodes, and loading a ROM won't actually execute anything. --- amd64.c | 104 ++++++++++++++ fiber.c | 51 +++++++ libco.c | 23 +++ libco.h | 34 +++++ ppc.c | 407 +++++++++++++++++++++++++++++++++++++++++++++++++++++ sjlj.c | 102 ++++++++++++++ ucontext.c | 67 +++++++++ x86.c | 93 ++++++++++++ 8 files changed, 881 insertions(+) create mode 100644 amd64.c create mode 100644 fiber.c create mode 100644 libco.c create mode 100644 libco.h create mode 100644 ppc.c create mode 100644 sjlj.c create mode 100644 ucontext.c create mode 100644 x86.c diff --git a/amd64.c b/amd64.c new file mode 100644 index 00000000..5f1cfca9 --- /dev/null +++ b/amd64.c @@ -0,0 +1,104 @@ +/* + libco.amd64 (2009-10-12) + author: byuu + license: public domain +*/ + +#define LIBCO_C +#include "libco.h" +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static thread_local long long co_active_buffer[64]; +static thread_local cothread_t co_active_handle = 0; +static void (*co_swap)(cothread_t, cothread_t) = 0; + +#ifdef _WIN32 + //ABI: Win64 + static unsigned char co_swap_function[] = { + 0x48, 0x89, 0x22, 0x48, 0x8B, 0x21, 0x58, 0x48, 0x89, 0x6A, 0x08, 0x48, 0x89, 0x72, 0x10, 0x48, + 0x89, 0x7A, 0x18, 0x48, 0x89, 0x5A, 0x20, 0x4C, 0x89, 0x62, 0x28, 0x4C, 0x89, 0x6A, 0x30, 0x4C, + 0x89, 0x72, 0x38, 0x4C, 0x89, 0x7A, 0x40, 0x48, 0x81, 0xC2, 0x80, 0x00, 0x00, 0x00, 0x48, 0x83, + 0xE2, 0xF0, 0x0F, 0x29, 0x32, 0x0F, 0x29, 0x7A, 0x10, 0x44, 0x0F, 0x29, 0x42, 0x20, 0x44, 0x0F, + 0x29, 0x4A, 0x30, 0x44, 0x0F, 0x29, 0x52, 0x40, 0x44, 0x0F, 0x29, 0x5A, 0x50, 0x44, 0x0F, 0x29, + 0x62, 0x60, 0x44, 0x0F, 0x29, 0x6A, 0x70, 0x44, 0x0F, 0x29, 0xB2, 0x80, 0x00, 0x00, 0x00, 0x44, + 0x0F, 0x29, 0xBA, 0x90, 0x00, 0x00, 0x00, 0x48, 0x8B, 0x69, 0x08, 0x48, 0x8B, 0x71, 0x10, 0x48, + 0x8B, 0x79, 0x18, 0x48, 0x8B, 0x59, 0x20, 0x4C, 0x8B, 0x61, 0x28, 0x4C, 0x8B, 0x69, 0x30, 0x4C, + 0x8B, 0x71, 0x38, 0x4C, 0x8B, 0x79, 0x40, 0x48, 0x81, 0xC1, 0x80, 0x00, 0x00, 0x00, 0x48, 0x83, + 0xE1, 0xF0, 0x0F, 0x29, 0x31, 0x0F, 0x29, 0x79, 0x10, 0x44, 0x0F, 0x29, 0x41, 0x20, 0x44, 0x0F, + 0x29, 0x49, 0x30, 0x44, 0x0F, 0x29, 0x51, 0x40, 0x44, 0x0F, 0x29, 0x59, 0x50, 0x44, 0x0F, 0x29, + 0x61, 0x60, 0x44, 0x0F, 0x29, 0x69, 0x70, 0x44, 0x0F, 0x29, 0xB1, 0x80, 0x00, 0x00, 0x00, 0x44, + 0x0F, 0x29, 0xB9, 0x90, 0x00, 0x00, 0x00, 0xFF, 0xE0, + }; + + #include + + void co_init() { + DWORD old_privileges; + VirtualProtect(co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READWRITE, &old_privileges); + } +#else + //ABI: SystemV + static unsigned char co_swap_function[] = { + 0x48, 0x89, 0x26, 0x48, 0x8B, 0x27, 0x58, 0x48, 0x89, 0x6E, 0x08, 0x48, 0x89, 0x5E, 0x10, 0x4C, + 0x89, 0x66, 0x18, 0x4C, 0x89, 0x6E, 0x20, 0x4C, 0x89, 0x76, 0x28, 0x4C, 0x89, 0x7E, 0x30, 0x48, + 0x8B, 0x6F, 0x08, 0x48, 0x8B, 0x5F, 0x10, 0x4C, 0x8B, 0x67, 0x18, 0x4C, 0x8B, 0x6F, 0x20, 0x4C, + 0x8B, 0x77, 0x28, 0x4C, 0x8B, 0x7F, 0x30, 0xFF, 0xE0, + }; + + #include + #include + + void co_init() { + unsigned long long addr = (unsigned long long)co_swap_function; + unsigned long long base = addr - (addr % sysconf(_SC_PAGESIZE)); + unsigned long long size = (addr - base) + sizeof co_swap_function; + mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC); + } +#endif + +static void crash() { + assert(0); /* called only if cothread_t entrypoint returns */ +} + +cothread_t co_active() { + if(!co_active_handle) co_active_handle = &co_active_buffer; + return co_active_handle; +} + +cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { + cothread_t handle; + if(!co_swap) { + co_init(); + co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; + } + if(!co_active_handle) co_active_handle = &co_active_buffer; + size += 512; /* allocate additional space for storage */ + size &= ~15; /* align stack to 16-byte boundary */ + + if(handle = (cothread_t)malloc(size)) { + long long *p = (long long*)((char*)handle + size); /* seek to top of stack */ + *--p = (long long)crash; /* crash if entrypoint returns */ + *--p = (long long)entrypoint; /* start of function */ + *(long long*)handle = (long long)p; /* stack pointer */ + } + + return handle; +} + +void co_delete(cothread_t handle) { + free(handle); +} + +void co_switch(cothread_t handle) { + register cothread_t co_previous_handle = co_active_handle; + co_swap(co_active_handle = handle, co_previous_handle); +} + +#ifdef __cplusplus +} +#endif diff --git a/fiber.c b/fiber.c new file mode 100644 index 00000000..02ef5bc7 --- /dev/null +++ b/fiber.c @@ -0,0 +1,51 @@ +/* + libco.win (2008-01-28) + authors: Nach, byuu + license: public domain +*/ + +#define LIBCO_C +#include "libco.h" +#define WINVER 0x0400 +#define _WIN32_WINNT 0x0400 +#define WIN32_LEAN_AND_MEAN +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static thread_local cothread_t co_active_ = 0; + +static void __stdcall co_thunk(void *coentry) { + ((void (*)(void))coentry)(); +} + +cothread_t co_active() { + if(!co_active_) { + ConvertThreadToFiber(0); + co_active_ = GetCurrentFiber(); + } + return co_active_; +} + +cothread_t co_create(unsigned int heapsize, void (*coentry)(void)) { + if(!co_active_) { + ConvertThreadToFiber(0); + co_active_ = GetCurrentFiber(); + } + return (cothread_t)CreateFiber(heapsize, co_thunk, (void*)coentry); +} + +void co_delete(cothread_t cothread) { + DeleteFiber(cothread); +} + +void co_switch(cothread_t cothread) { + co_active_ = cothread; + SwitchToFiber(cothread); +} + +#ifdef __cplusplus +} +#endif diff --git a/libco.c b/libco.c new file mode 100644 index 00000000..55676263 --- /dev/null +++ b/libco.c @@ -0,0 +1,23 @@ +/* + libco + auto-selection module + license: public domain +*/ + +#if defined(__GNUC__) && defined(__i386__) + #include "x86.c" +#elif defined(__GNUC__) && defined(__amd64__) + #include "amd64.c" +#elif defined(__GNUC__) && defined(_ARCH_PPC) + #include "ppc.c" +#elif defined(__GNUC__) + #include "sjlj.c" +#elif defined(_MSC_VER) && defined(_M_IX86) + #include "x86.c" +#elif defined(_MSC_VER) && defined(_M_AMD64) + #include "amd64.c" +#elif defined(_MSC_VER) + #include "fiber.c" +#else + #error "libco: unsupported processor, compiler or operating system" +#endif diff --git a/libco.h b/libco.h new file mode 100644 index 00000000..deb954fb --- /dev/null +++ b/libco.h @@ -0,0 +1,34 @@ +/* + libco + version: 0.16 (2010-12-24) + license: public domain +*/ + +#ifndef LIBCO_H +#define LIBCO_H + +#ifdef LIBCO_C + #ifdef LIBCO_MP + #define thread_local __thread + #else + #define thread_local + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void* cothread_t; + +cothread_t co_active(); +cothread_t co_create(unsigned int, void (*)(void)); +void co_delete(cothread_t); +void co_switch(cothread_t); + +#ifdef __cplusplus +} +#endif + +/* ifndef LIBCO_H */ +#endif diff --git a/ppc.c b/ppc.c new file mode 100644 index 00000000..a6028fdb --- /dev/null +++ b/ppc.c @@ -0,0 +1,407 @@ +/* + libco.ppc (2010-10-17) + author: blargg + license: public domain +*/ + +/* PowerPC 32/64 using embedded or external asm, with optional +floating-point and AltiVec save/restore */ + +#define LIBCO_C +#include "libco.h" +#include +#include +#include + +#define LIBCO_MPROTECT (__unix__ && !LIBCO_PPC_ASM) + +#if LIBCO_MPROTECT + #include + #include +#endif + +/* State format (offsets in 32-bit words) + ++0 Pointer to swap code + Rest of function descriptor for entry function ++8 PC ++10 SP + Special regs + GPRs + FPRs + VRs + stack +*/ + +enum { state_size = 1024 }; +enum { above_stack = 2048 }; +enum { stack_align = 256 }; + +static thread_local cothread_t co_active_handle = 0; + +/**** Determine environment ****/ + +#define LIBCO_PPC64 (_ARCH_PPC64 || __PPC64__ || __ppc64__ || __powerpc64__) + +/* Whether function calls are indirect through a descriptor, +or are directly to function */ +#ifndef LIBCO_PPCDESC + #if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || LIBCO_PPC64) + #define LIBCO_PPCDESC 1 + #endif +#endif + +#ifdef LIBCO_PPC_ASM + + #ifdef __cplusplus + extern "C" + #endif + + /* Swap code is in ppc.S */ + void co_swap_asm( cothread_t, cothread_t ); + #define CO_SWAP_ASM( x, y ) co_swap_asm( x, y ) + +#else + +/* Swap code is here in array. Please leave dieassembly comments, +as they make it easy to see what it does, and reorder instructions +if one wants to see whether that improves performance. */ +static const uint32_t libco_ppc_code [] = { +#if LIBCO_PPC64 + 0x7d000026, /* mfcr r8 */ + 0xf8240028, /* std r1,40(r4) */ + 0x7d2802a6, /* mflr r9 */ + 0xf9c40048, /* std r14,72(r4) */ + 0xf9e40050, /* std r15,80(r4) */ + 0xfa040058, /* std r16,88(r4) */ + 0xfa240060, /* std r17,96(r4) */ + 0xfa440068, /* std r18,104(r4) */ + 0xfa640070, /* std r19,112(r4) */ + 0xfa840078, /* std r20,120(r4) */ + 0xfaa40080, /* std r21,128(r4) */ + 0xfac40088, /* std r22,136(r4) */ + 0xfae40090, /* std r23,144(r4) */ + 0xfb040098, /* std r24,152(r4) */ + 0xfb2400a0, /* std r25,160(r4) */ + 0xfb4400a8, /* std r26,168(r4) */ + 0xfb6400b0, /* std r27,176(r4) */ + 0xfb8400b8, /* std r28,184(r4) */ + 0xfba400c0, /* std r29,192(r4) */ + 0xfbc400c8, /* std r30,200(r4) */ + 0xfbe400d0, /* std r31,208(r4) */ + 0xf9240020, /* std r9,32(r4) */ + 0xe8e30020, /* ld r7,32(r3) */ + 0xe8230028, /* ld r1,40(r3) */ + 0x48000009, /* bl 1 */ + 0x7fe00008, /* trap */ + 0x91040030,/*1:stw r8,48(r4) */ + 0x80c30030, /* lwz r6,48(r3) */ + 0x7ce903a6, /* mtctr r7 */ + 0xe9c30048, /* ld r14,72(r3) */ + 0xe9e30050, /* ld r15,80(r3) */ + 0xea030058, /* ld r16,88(r3) */ + 0xea230060, /* ld r17,96(r3) */ + 0xea430068, /* ld r18,104(r3) */ + 0xea630070, /* ld r19,112(r3) */ + 0xea830078, /* ld r20,120(r3) */ + 0xeaa30080, /* ld r21,128(r3) */ + 0xeac30088, /* ld r22,136(r3) */ + 0xeae30090, /* ld r23,144(r3) */ + 0xeb030098, /* ld r24,152(r3) */ + 0xeb2300a0, /* ld r25,160(r3) */ + 0xeb4300a8, /* ld r26,168(r3) */ + 0xeb6300b0, /* ld r27,176(r3) */ + 0xeb8300b8, /* ld r28,184(r3) */ + 0xeba300c0, /* ld r29,192(r3) */ + 0xebc300c8, /* ld r30,200(r3) */ + 0xebe300d0, /* ld r31,208(r3) */ + 0x7ccff120, /* mtcr r6 */ +#else + 0x7d000026, /* mfcr r8 */ + 0x90240028, /* stw r1,40(r4) */ + 0x7d2802a6, /* mflr r9 */ + 0x91a4003c, /* stw r13,60(r4) */ + 0x91c40040, /* stw r14,64(r4) */ + 0x91e40044, /* stw r15,68(r4) */ + 0x92040048, /* stw r16,72(r4) */ + 0x9224004c, /* stw r17,76(r4) */ + 0x92440050, /* stw r18,80(r4) */ + 0x92640054, /* stw r19,84(r4) */ + 0x92840058, /* stw r20,88(r4) */ + 0x92a4005c, /* stw r21,92(r4) */ + 0x92c40060, /* stw r22,96(r4) */ + 0x92e40064, /* stw r23,100(r4) */ + 0x93040068, /* stw r24,104(r4) */ + 0x9324006c, /* stw r25,108(r4) */ + 0x93440070, /* stw r26,112(r4) */ + 0x93640074, /* stw r27,116(r4) */ + 0x93840078, /* stw r28,120(r4) */ + 0x93a4007c, /* stw r29,124(r4) */ + 0x93c40080, /* stw r30,128(r4) */ + 0x93e40084, /* stw r31,132(r4) */ + 0x91240020, /* stw r9,32(r4) */ + 0x80e30020, /* lwz r7,32(r3) */ + 0x80230028, /* lwz r1,40(r3) */ + 0x48000009, /* bl 1 */ + 0x7fe00008, /* trap */ + 0x91040030,/*1:stw r8,48(r4) */ + 0x80c30030, /* lwz r6,48(r3) */ + 0x7ce903a6, /* mtctr r7 */ + 0x81a3003c, /* lwz r13,60(r3) */ + 0x81c30040, /* lwz r14,64(r3) */ + 0x81e30044, /* lwz r15,68(r3) */ + 0x82030048, /* lwz r16,72(r3) */ + 0x8223004c, /* lwz r17,76(r3) */ + 0x82430050, /* lwz r18,80(r3) */ + 0x82630054, /* lwz r19,84(r3) */ + 0x82830058, /* lwz r20,88(r3) */ + 0x82a3005c, /* lwz r21,92(r3) */ + 0x82c30060, /* lwz r22,96(r3) */ + 0x82e30064, /* lwz r23,100(r3) */ + 0x83030068, /* lwz r24,104(r3) */ + 0x8323006c, /* lwz r25,108(r3) */ + 0x83430070, /* lwz r26,112(r3) */ + 0x83630074, /* lwz r27,116(r3) */ + 0x83830078, /* lwz r28,120(r3) */ + 0x83a3007c, /* lwz r29,124(r3) */ + 0x83c30080, /* lwz r30,128(r3) */ + 0x83e30084, /* lwz r31,132(r3) */ + 0x7ccff120, /* mtcr r6 */ +#endif + +#ifndef LIBCO_PPC_NOFP + 0xd9c400e0, /* stfd f14,224(r4) */ + 0xd9e400e8, /* stfd f15,232(r4) */ + 0xda0400f0, /* stfd f16,240(r4) */ + 0xda2400f8, /* stfd f17,248(r4) */ + 0xda440100, /* stfd f18,256(r4) */ + 0xda640108, /* stfd f19,264(r4) */ + 0xda840110, /* stfd f20,272(r4) */ + 0xdaa40118, /* stfd f21,280(r4) */ + 0xdac40120, /* stfd f22,288(r4) */ + 0xdae40128, /* stfd f23,296(r4) */ + 0xdb040130, /* stfd f24,304(r4) */ + 0xdb240138, /* stfd f25,312(r4) */ + 0xdb440140, /* stfd f26,320(r4) */ + 0xdb640148, /* stfd f27,328(r4) */ + 0xdb840150, /* stfd f28,336(r4) */ + 0xdba40158, /* stfd f29,344(r4) */ + 0xdbc40160, /* stfd f30,352(r4) */ + 0xdbe40168, /* stfd f31,360(r4) */ + 0xc9c300e0, /* lfd f14,224(r3) */ + 0xc9e300e8, /* lfd f15,232(r3) */ + 0xca0300f0, /* lfd f16,240(r3) */ + 0xca2300f8, /* lfd f17,248(r3) */ + 0xca430100, /* lfd f18,256(r3) */ + 0xca630108, /* lfd f19,264(r3) */ + 0xca830110, /* lfd f20,272(r3) */ + 0xcaa30118, /* lfd f21,280(r3) */ + 0xcac30120, /* lfd f22,288(r3) */ + 0xcae30128, /* lfd f23,296(r3) */ + 0xcb030130, /* lfd f24,304(r3) */ + 0xcb230138, /* lfd f25,312(r3) */ + 0xcb430140, /* lfd f26,320(r3) */ + 0xcb630148, /* lfd f27,328(r3) */ + 0xcb830150, /* lfd f28,336(r3) */ + 0xcba30158, /* lfd f29,344(r3) */ + 0xcbc30160, /* lfd f30,352(r3) */ + 0xcbe30168, /* lfd f31,360(r3) */ +#endif + +#ifdef __ALTIVEC__ + 0x7ca042a6, /* mfvrsave r5 */ + 0x39040180, /* addi r8,r4,384 */ + 0x39240190, /* addi r9,r4,400 */ + 0x70a00fff, /* andi. r0,r5,4095 */ + 0x90a40034, /* stw r5,52(r4) */ + 0x4182005c, /* beq- 2 */ + 0x7e8041ce, /* stvx v20,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ea049ce, /* stvx v21,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7ec041ce, /* stvx v22,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ee049ce, /* stvx v23,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f0041ce, /* stvx v24,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f2049ce, /* stvx v25,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f4041ce, /* stvx v26,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f6049ce, /* stvx v27,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f8041ce, /* stvx v28,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7fa049ce, /* stvx v29,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7fc041ce, /* stvx v30,r0,r8 */ + 0x7fe049ce, /* stvx v31,r0,r9 */ + 0x80a30034,/*2:lwz r5,52(r3) */ + 0x39030180, /* addi r8,r3,384 */ + 0x39230190, /* addi r9,r3,400 */ + 0x70a00fff, /* andi. r0,r5,4095 */ + 0x7ca043a6, /* mtvrsave r5 */ + 0x4d820420, /* beqctr */ + 0x7e8040ce, /* lvx v20,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ea048ce, /* lvx v21,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7ec040ce, /* lvx v22,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ee048ce, /* lvx v23,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f0040ce, /* lvx v24,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f2048ce, /* lvx v25,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f4040ce, /* lvx v26,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f6048ce, /* lvx v27,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f8040ce, /* lvx v28,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7fa048ce, /* lvx v29,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7fc040ce, /* lvx v30,r0,r8 */ + 0x7fe048ce, /* lvx v31,r0,r9 */ +#endif + + 0x4e800420, /* bctr */ +}; + + #if LIBCO_PPCDESC + /* Function call goes through indirect descriptor */ + #define CO_SWAP_ASM( x, y ) \ + ((void (*)( cothread_t, cothread_t )) (uintptr_t) x)( x, y ) + #else + /* Function call goes directly to code */ + #define CO_SWAP_ASM( x, y ) \ + ((void (*)( cothread_t, cothread_t )) (uintptr_t) libco_ppc_code)( x, y ) + #endif + +#endif + +static uint32_t* co_create_( unsigned size, uintptr_t entry ) +{ + uint32_t* t = (uint32_t*) malloc( size ); + + (void) entry; + + #if LIBCO_PPCDESC + if ( t ) + { + /* Copy entry's descriptor */ + memcpy( t, (void*) entry, sizeof (void*) * 3 ); + + /* Set function pointer to swap routine */ + #ifdef LIBCO_PPC_ASM + *(const void**) t = *(void**) &co_swap_asm; + #else + *(const void**) t = libco_ppc_code; + #endif + } + #endif + + return t; +} + +cothread_t co_create( unsigned int size, void (*entry_)( void ) ) +{ + uintptr_t entry = (uintptr_t) entry_; + uint32_t* t = NULL; + + /* Be sure main thread was successfully allocated */ + if ( co_active() ) + { + size += state_size + above_stack + stack_align; + t = co_create_( size, entry ); + } + + if ( t ) + { + uintptr_t sp; + int shift; + + /* Save current registers into new thread, so that any special ones will + have proper values when thread is begun */ + CO_SWAP_ASM( t, t ); + + #if LIBCO_PPCDESC + /* Get real address */ + entry = (uintptr_t) *(void**) entry; + #endif + + /* Put stack near end of block, and align */ + sp = (uintptr_t) t + size - above_stack; + sp -= sp % stack_align; + + /* On PPC32, we save and restore GPRs as 32 bits. For PPC64, we + save and restore them as 64 bits, regardless of the size the ABI + uses. So, we manually write pointers at the proper size. We always + save and restore at the same address, and since PPC is big-endian, + we must put the low byte first on PPC32. */ + + /* If uintptr_t is 32 bits, >>32 is undefined behavior, so we do two shifts + and don't have to care how many bits uintptr_t is. */ + #if LIBCO_PPC64 + shift = 16; + #else + shift = 0; + #endif + + /* Set up so entry will be called on next swap */ + t [8] = (uint32_t) (entry >> shift >> shift); + t [9] = (uint32_t) entry; + + t [10] = (uint32_t) (sp >> shift >> shift); + t [11] = (uint32_t) sp; + } + + return t; +} + +void co_delete( cothread_t t ) +{ + free( t ); +} + +static void co_init_( void ) +{ + #if LIBCO_MPROTECT + /* TODO: pre- and post-pad PPC code so that this doesn't make other + data executable and writable */ + long page_size = sysconf( _SC_PAGESIZE ); + if ( page_size > 0 ) + { + uintptr_t align = page_size; + uintptr_t begin = (uintptr_t) libco_ppc_code; + uintptr_t end = begin + sizeof libco_ppc_code; + + /* Align beginning and end */ + end += align - 1; + end -= end % align; + begin -= begin % align; + + mprotect( (void*) begin, end - begin, PROT_READ | PROT_WRITE | PROT_EXEC ); + } + #endif + + co_active_handle = co_create_( state_size, (uintptr_t) &co_switch ); +} + +cothread_t co_active() +{ + if ( !co_active_handle ) + co_init_(); + + return co_active_handle; +} + +void co_switch( cothread_t t ) +{ + cothread_t old = co_active_handle; + co_active_handle = t; + + CO_SWAP_ASM( t, old ); +} diff --git a/sjlj.c b/sjlj.c new file mode 100644 index 00000000..8b72b614 --- /dev/null +++ b/sjlj.c @@ -0,0 +1,102 @@ +/* + libco.sjlj (2008-01-28) + author: Nach + license: public domain +*/ + +/* + * Note this was designed for UNIX systems. Based on ideas expressed in a paper + * by Ralf Engelschall. + * For SJLJ on other systems, one would want to rewrite springboard() and + * co_create() and hack the jmb_buf stack pointer. + */ + +#define LIBCO_C +#include "libco.h" +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + sigjmp_buf context; + void (*coentry)(void); + void *stack; +} cothread_struct; + +static thread_local cothread_struct co_primary; +static thread_local cothread_struct *creating, *co_running = 0; + +static void springboard(int ignored) { + if(sigsetjmp(creating->context, 0)) { + co_running->coentry(); + } +} + +cothread_t co_active() { + if(!co_running) co_running = &co_primary; + return (cothread_t)co_running; +} + +cothread_t co_create(unsigned int size, void (*coentry)(void)) { + if(!co_running) co_running = &co_primary; + + cothread_struct *thread = (cothread_struct*)malloc(sizeof(cothread_struct)); + if(thread) { + struct sigaction handler; + struct sigaction old_handler; + + stack_t stack; + stack_t old_stack; + + thread->coentry = thread->stack = 0; + + stack.ss_flags = 0; + stack.ss_size = size; + thread->stack = stack.ss_sp = malloc(size); + if(stack.ss_sp && !sigaltstack(&stack, &old_stack)) { + handler.sa_handler = springboard; + handler.sa_flags = SA_ONSTACK; + sigemptyset(&handler.sa_mask); + creating = thread; + + if(!sigaction(SIGUSR1, &handler, &old_handler)) { + if(!raise(SIGUSR1)) { + thread->coentry = coentry; + } + sigaltstack(&old_stack, 0); + sigaction(SIGUSR1, &old_handler, 0); + } + } + + if(thread->coentry != coentry) { + co_delete(thread); + thread = 0; + } + } + + return (cothread_t)thread; +} + +void co_delete(cothread_t cothread) { + if(cothread) { + if(((cothread_struct*)cothread)->stack) { + free(((cothread_struct*)cothread)->stack); + } + free(cothread); + } +} + +void co_switch(cothread_t cothread) { + if(!sigsetjmp(co_running->context, 0)) { + co_running = (cothread_struct*)cothread; + siglongjmp(co_running->context, 1); + } +} + +#ifdef __cplusplus +} +#endif diff --git a/ucontext.c b/ucontext.c new file mode 100644 index 00000000..17472f6b --- /dev/null +++ b/ucontext.c @@ -0,0 +1,67 @@ +/* + libco.ucontext (2008-01-28) + author: Nach + license: public domain +*/ + +/* + * WARNING: the overhead of POSIX ucontext is very high, + * assembly versions of libco or libco_sjlj should be much faster + * + * This library only exists for two reasons: + * 1 - as an initial test for the viability of a ucontext implementation + * 2 - to demonstrate the power and speed of libco over existing implementations, + * such as pth (which defaults to wrapping ucontext on unix targets) + * + * Use this library only as a *last resort* + */ + +#define LIBCO_C +#include "libco.h" +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static thread_local ucontext_t co_primary; +static thread_local ucontext_t *co_running = 0; + +cothread_t co_active() { + if(!co_running) co_running = &co_primary; + return (cothread_t)co_running; +} + +cothread_t co_create(unsigned int heapsize, void (*coentry)(void)) { + if(!co_running) co_running = &co_primary; + ucontext_t *thread = (ucontext_t*)malloc(sizeof(ucontext_t)); + if(thread) { + if((!getcontext(thread) && !(thread->uc_stack.ss_sp = 0)) && (thread->uc_stack.ss_sp = malloc(heapsize))) { + thread->uc_link = co_running; + thread->uc_stack.ss_size = heapsize; + makecontext(thread, coentry, 0); + } else { + co_delete((cothread_t)thread); + thread = 0; + } + } + return (cothread_t)thread; +} + +void co_delete(cothread_t cothread) { + if(cothread) { + if(((ucontext_t*)cothread)->uc_stack.ss_sp) { free(((ucontext_t*)cothread)->uc_stack.ss_sp); } + free(cothread); + } +} + +void co_switch(cothread_t cothread) { + ucontext_t *old_thread = co_running; + co_running = (ucontext_t*)cothread; + swapcontext(old_thread, co_running); +} + +#ifdef __cplusplus +} +#endif diff --git a/x86.c b/x86.c new file mode 100644 index 00000000..d8f820b0 --- /dev/null +++ b/x86.c @@ -0,0 +1,93 @@ +/* + libco.x86 (2009-10-12) + author: byuu + license: public domain +*/ + +#define LIBCO_C +#include "libco.h" +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_MSC_VER) + #define fastcall __fastcall +#elif defined(__GNUC__) + #define fastcall __attribute__((fastcall)) +#else + #error "libco: please define fastcall macro" +#endif + +static thread_local long co_active_buffer[64]; +static thread_local cothread_t co_active_handle = 0; +static void (fastcall *co_swap)(cothread_t, cothread_t) = 0; + +//ABI: fastcall +static unsigned char co_swap_function[] = { + 0x89, 0x22, 0x8B, 0x21, 0x58, 0x89, 0x6A, 0x04, 0x89, 0x72, 0x08, 0x89, 0x7A, 0x0C, 0x89, 0x5A, + 0x10, 0x8B, 0x69, 0x04, 0x8B, 0x71, 0x08, 0x8B, 0x79, 0x0C, 0x8B, 0x59, 0x10, 0xFF, 0xE0, +}; + +#ifdef _WIN32 + #include + + void co_init() { + DWORD old_privileges; + VirtualProtect(co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READWRITE, &old_privileges); + } +#else + #include + #include + + void co_init() { + unsigned long addr = (unsigned long)co_swap_function; + unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE)); + unsigned long size = (addr - base) + sizeof co_swap_function; + mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC); + } +#endif + +static void crash() { + assert(0); /* called only if cothread_t entrypoint returns */ +} + +cothread_t co_active() { + if(!co_active_handle) co_active_handle = &co_active_buffer; + return co_active_handle; +} + +cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { + cothread_t handle; + if(!co_swap) { + co_init(); + co_swap = (void (fastcall*)(cothread_t, cothread_t))co_swap_function; + } + if(!co_active_handle) co_active_handle = &co_active_buffer; + size += 256; /* allocate additional space for storage */ + size &= ~15; /* align stack to 16-byte boundary */ + + if(handle = (cothread_t)malloc(size)) { + long *p = (long*)((char*)handle + size); /* seek to top of stack */ + *--p = (long)crash; /* crash if entrypoint returns */ + *--p = (long)entrypoint; /* start of function */ + *(long*)handle = (long)p; /* stack pointer */ + } + + return handle; +} + +void co_delete(cothread_t handle) { + free(handle); +} + +void co_switch(cothread_t handle) { + register cothread_t co_previous_handle = co_active_handle; + co_swap(co_active_handle = handle, co_previous_handle); +} + +#ifdef __cplusplus +} +#endif From ddffcd76000cf0d8b72c6a387dfafed25021a874 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Sat, 20 Jun 2015 15:44:05 +1000 Subject: [PATCH 11/43] Update to v094r28 release. byuu says: This WIP substantially restructures the ruby API for the first time since that project started. It is my hope that with this restructuring, destruction of the ruby objects should now be deterministic, which should fix the crashing on closing the emulator on Linux. We'll see I guess ... either way, it removed two layers of wrappers from ruby, so it's a pretty nice code cleanup. It won't compile on Windows due to a few issues I didn't see until uploading the WIP, too lazy to upload another. But I fixed all the compilation issues locally, so it'll work on Windows again with the next WIP (unless I break something else.) (Kind of annoying that Linux defines glActiveTexture but Windows doesn't.) --- ucontext.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ucontext.c b/ucontext.c index 17472f6b..2e9e90ec 100644 --- a/ucontext.c +++ b/ucontext.c @@ -18,6 +18,8 @@ #define LIBCO_C #include "libco.h" + +#define _BSD_SOURCE #include #include From 6b44980c6c00e6b429613402e55ffb8a95e391d6 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Mon, 22 Jun 2015 23:31:49 +1000 Subject: [PATCH 12/43] Update to v094r29 release. byuu says: Note: for Windows users, please go to nall/intrinsics.hpp line 60 and correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before compiling, otherwise things won't work at all. This will be a really major WIP for the core SNES emulation, so please test as thoroughly as possible. I rewrote the 65816 CPU core's dispatcher from a jump table to a switch table. This was so that I could pass class variables as parameters to opcodes without crazy theatrics. With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^= stuff, and all of the template versions of opcodes. I also removed some stupid pointless flag tests in xcn and pflag that would always be true. I sure hope that AWJ is happy with this; because this change was so that my flag assignments and branch tests won't need to build regs.P into a full 8-bit variable anymore. It does of course incur a slight performance hit when you pass in variables by-value to functions, but it should help with binary size (and thus cache) by reducing a lot of extra functions. (I know I could have used template parameters for some things even with a switch table, but chose not to for the aforementioned reasons.) Overall, it's about a ~1% speedup from the previous build. The CPU core instructions were never a bottleneck, but I did want to fix the P flag building stuff because that really was a dumb mistake v_v' --- amd64.c | 104 +++++++++++++++++++++++++++++++++++++++++--------------- arm.c | 71 ++++++++++++++++++++++++++++++++++++++ fiber.c | 2 +- libco.c | 35 +++++++++++-------- libco.h | 3 +- ppc.c | 1 + sjlj.c | 1 + x86.c | 37 +++++++++++++------- 8 files changed, 198 insertions(+), 56 deletions(-) create mode 100644 arm.c diff --git a/amd64.c b/amd64.c index 5f1cfca9..37122f44 100644 --- a/amd64.c +++ b/amd64.c @@ -1,11 +1,16 @@ /* - libco.amd64 (2009-10-12) + libco.amd64 (2015-06-19) author: byuu license: public domain */ #define LIBCO_C #include "libco.h" + +//Win64 only: provides a substantial speed-up, but will thrash XMM regs +//do not use this unless you are certain your application won't use SSE +//#define LIBCO_AMD64_NO_SSE + #include #include @@ -18,21 +23,54 @@ static thread_local cothread_t co_active_handle = 0; static void (*co_swap)(cothread_t, cothread_t) = 0; #ifdef _WIN32 - //ABI: Win64 + /* ABI: Win64 */ static unsigned char co_swap_function[] = { - 0x48, 0x89, 0x22, 0x48, 0x8B, 0x21, 0x58, 0x48, 0x89, 0x6A, 0x08, 0x48, 0x89, 0x72, 0x10, 0x48, - 0x89, 0x7A, 0x18, 0x48, 0x89, 0x5A, 0x20, 0x4C, 0x89, 0x62, 0x28, 0x4C, 0x89, 0x6A, 0x30, 0x4C, - 0x89, 0x72, 0x38, 0x4C, 0x89, 0x7A, 0x40, 0x48, 0x81, 0xC2, 0x80, 0x00, 0x00, 0x00, 0x48, 0x83, - 0xE2, 0xF0, 0x0F, 0x29, 0x32, 0x0F, 0x29, 0x7A, 0x10, 0x44, 0x0F, 0x29, 0x42, 0x20, 0x44, 0x0F, - 0x29, 0x4A, 0x30, 0x44, 0x0F, 0x29, 0x52, 0x40, 0x44, 0x0F, 0x29, 0x5A, 0x50, 0x44, 0x0F, 0x29, - 0x62, 0x60, 0x44, 0x0F, 0x29, 0x6A, 0x70, 0x44, 0x0F, 0x29, 0xB2, 0x80, 0x00, 0x00, 0x00, 0x44, - 0x0F, 0x29, 0xBA, 0x90, 0x00, 0x00, 0x00, 0x48, 0x8B, 0x69, 0x08, 0x48, 0x8B, 0x71, 0x10, 0x48, - 0x8B, 0x79, 0x18, 0x48, 0x8B, 0x59, 0x20, 0x4C, 0x8B, 0x61, 0x28, 0x4C, 0x8B, 0x69, 0x30, 0x4C, - 0x8B, 0x71, 0x38, 0x4C, 0x8B, 0x79, 0x40, 0x48, 0x81, 0xC1, 0x80, 0x00, 0x00, 0x00, 0x48, 0x83, - 0xE1, 0xF0, 0x0F, 0x29, 0x31, 0x0F, 0x29, 0x79, 0x10, 0x44, 0x0F, 0x29, 0x41, 0x20, 0x44, 0x0F, - 0x29, 0x49, 0x30, 0x44, 0x0F, 0x29, 0x51, 0x40, 0x44, 0x0F, 0x29, 0x59, 0x50, 0x44, 0x0F, 0x29, - 0x61, 0x60, 0x44, 0x0F, 0x29, 0x69, 0x70, 0x44, 0x0F, 0x29, 0xB1, 0x80, 0x00, 0x00, 0x00, 0x44, - 0x0F, 0x29, 0xB9, 0x90, 0x00, 0x00, 0x00, 0xFF, 0xE0, + 0x48, 0x89, 0x22, /* mov [rdx],rsp */ + 0x48, 0x8b, 0x21, /* mov rsp,[rcx] */ + 0x58, /* pop rax */ + 0x48, 0x89, 0x6a, 0x08, /* mov [rdx+ 8],rbp */ + 0x48, 0x89, 0x72, 0x10, /* mov [rdx+16],rsi */ + 0x48, 0x89, 0x7a, 0x18, /* mov [rdx+24],rdi */ + 0x48, 0x89, 0x5a, 0x20, /* mov [rdx+32],rbx */ + 0x4c, 0x89, 0x62, 0x28, /* mov [rdx+40],r12 */ + 0x4c, 0x89, 0x6a, 0x30, /* mov [rdx+48],r13 */ + 0x4c, 0x89, 0x72, 0x38, /* mov [rdx+56],r14 */ + 0x4c, 0x89, 0x7a, 0x40, /* mov [rdx+64],r15 */ + #if !defined(LIBCO_AMD64_NO_SSE) + 0x0f, 0x29, 0x72, 0x50, /* movaps [rdx+ 80],xmm6 */ + 0x0f, 0x29, 0x7a, 0x60, /* movaps [rdx+ 96],xmm7 */ + 0x44, 0x0f, 0x29, 0x42, 0x70, /* movaps [rdx+112],xmm8 */ + 0x48, 0x83, 0xc2, 0x70, /* add rdx,112 */ + 0x44, 0x0f, 0x29, 0x4a, 0x10, /* movaps [rdx+ 16],xmm9 */ + 0x44, 0x0f, 0x29, 0x52, 0x20, /* movaps [rdx+ 32],xmm10 */ + 0x44, 0x0f, 0x29, 0x5a, 0x30, /* movaps [rdx+ 48],xmm11 */ + 0x44, 0x0f, 0x29, 0x62, 0x40, /* movaps [rdx+ 64],xmm12 */ + 0x44, 0x0f, 0x29, 0x6a, 0x50, /* movaps [rdx+ 80],xmm13 */ + 0x44, 0x0f, 0x29, 0x72, 0x60, /* movaps [rdx+ 96],xmm14 */ + 0x44, 0x0f, 0x29, 0x7a, 0x70, /* movaps [rdx+112],xmm15 */ + #endif + 0x48, 0x8b, 0x69, 0x08, /* mov rbp,[rcx+ 8] */ + 0x48, 0x8b, 0x71, 0x10, /* mov rsi,[rcx+16] */ + 0x48, 0x8b, 0x79, 0x18, /* mov rdi,[rcx+24] */ + 0x48, 0x8b, 0x59, 0x20, /* mov rbx,[rcx+32] */ + 0x4c, 0x8b, 0x61, 0x28, /* mov r12,[rcx+40] */ + 0x4c, 0x8b, 0x69, 0x30, /* mov r13,[rcx+48] */ + 0x4c, 0x8b, 0x71, 0x38, /* mov r14,[rcx+56] */ + 0x4c, 0x8b, 0x79, 0x40, /* mov r15,[rcx+64] */ + #if !defined(LIBCO_AMD64_NO_SSE) + 0x0f, 0x28, 0x71, 0x50, /* movaps xmm6, [rcx+ 80] */ + 0x0f, 0x28, 0x79, 0x60, /* movaps xmm7, [rcx+ 96] */ + 0x44, 0x0f, 0x28, 0x41, 0x70, /* movaps xmm8, [rcx+112] */ + 0x48, 0x83, 0xc1, 0x70, /* add rcx,112 */ + 0x44, 0x0f, 0x28, 0x49, 0x10, /* movaps xmm9, [rcx+ 16] */ + 0x44, 0x0f, 0x28, 0x51, 0x20, /* movaps xmm10,[rcx+ 32] */ + 0x44, 0x0f, 0x28, 0x59, 0x30, /* movaps xmm11,[rcx+ 48] */ + 0x44, 0x0f, 0x28, 0x61, 0x40, /* movaps xmm12,[rcx+ 64] */ + 0x44, 0x0f, 0x28, 0x69, 0x50, /* movaps xmm13,[rcx+ 80] */ + 0x44, 0x0f, 0x28, 0x71, 0x60, /* movaps xmm14,[rcx+ 96] */ + 0x44, 0x0f, 0x28, 0x79, 0x70, /* movaps xmm15,[rcx+112] */ + #endif + 0xff, 0xe0, /* jmp rax */ }; #include @@ -42,12 +80,24 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; VirtualProtect(co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READWRITE, &old_privileges); } #else - //ABI: SystemV + /* ABI: SystemV */ static unsigned char co_swap_function[] = { - 0x48, 0x89, 0x26, 0x48, 0x8B, 0x27, 0x58, 0x48, 0x89, 0x6E, 0x08, 0x48, 0x89, 0x5E, 0x10, 0x4C, - 0x89, 0x66, 0x18, 0x4C, 0x89, 0x6E, 0x20, 0x4C, 0x89, 0x76, 0x28, 0x4C, 0x89, 0x7E, 0x30, 0x48, - 0x8B, 0x6F, 0x08, 0x48, 0x8B, 0x5F, 0x10, 0x4C, 0x8B, 0x67, 0x18, 0x4C, 0x8B, 0x6F, 0x20, 0x4C, - 0x8B, 0x77, 0x28, 0x4C, 0x8B, 0x7F, 0x30, 0xFF, 0xE0, + 0x48, 0x89, 0x26, /* mov [rsi],rsp */ + 0x48, 0x8b, 0x27, /* mov rsp,[rdi] */ + 0x58, /* pop rax */ + 0x48, 0x89, 0x6e, 0x08, /* mov [rsi+ 8],rbp */ + 0x48, 0x89, 0x5e, 0x10, /* mov [rsi+16],rbx */ + 0x4c, 0x89, 0x66, 0x18, /* mov [rsi+24],r12 */ + 0x4c, 0x89, 0x6e, 0x20, /* mov [rsi+32],r13 */ + 0x4c, 0x89, 0x76, 0x28, /* mov [rsi+40],r14 */ + 0x4c, 0x89, 0x7e, 0x30, /* mov [rsi+48],r15 */ + 0x48, 0x8b, 0x6f, 0x08, /* mov rbp,[rdi+ 8] */ + 0x48, 0x8b, 0x5f, 0x10, /* mov rbx,[rdi+16] */ + 0x4c, 0x8b, 0x67, 0x18, /* mov r12,[rdi+24] */ + 0x4c, 0x8b, 0x6f, 0x20, /* mov r13,[rdi+32] */ + 0x4c, 0x8b, 0x77, 0x28, /* mov r14,[rdi+40] */ + 0x4c, 0x8b, 0x7f, 0x30, /* mov r15,[rdi+48] */ + 0xff, 0xe0, /* jmp rax */ }; #include @@ -62,7 +112,7 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; #endif static void crash() { - assert(0); /* called only if cothread_t entrypoint returns */ + assert(0); /* called only if cothread_t entrypoint returns */ } cothread_t co_active() { @@ -77,14 +127,14 @@ cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; } if(!co_active_handle) co_active_handle = &co_active_buffer; - size += 512; /* allocate additional space for storage */ - size &= ~15; /* align stack to 16-byte boundary */ + size += 512; /* allocate additional space for storage */ + size &= ~15; /* align stack to 16-byte boundary */ if(handle = (cothread_t)malloc(size)) { - long long *p = (long long*)((char*)handle + size); /* seek to top of stack */ - *--p = (long long)crash; /* crash if entrypoint returns */ - *--p = (long long)entrypoint; /* start of function */ - *(long long*)handle = (long long)p; /* stack pointer */ + long long *p = (long long*)((char*)handle + size); /* seek to top of stack */ + *--p = (long long)crash; /* crash if entrypoint returns */ + *--p = (long long)entrypoint; /* start of function */ + *(long long*)handle = (long long)p; /* stack pointer */ } return handle; diff --git a/arm.c b/arm.c new file mode 100644 index 00000000..70dbdd1b --- /dev/null +++ b/arm.c @@ -0,0 +1,71 @@ +/* + libco.arm (2015-06-18) + author: byuu + license: public domain +*/ + +#define LIBCO_C +#include "libco.h" + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static thread_local unsigned long co_active_buffer[64]; +static thread_local cothread_t co_active_handle = 0; +static void (*co_swap)(cothread_t, cothread_t) = 0; + +static unsigned long co_swap_function[] = { + 0xe8a16ff0, /* stmia r1!, {r4-r11,sp,lr} */ + 0xe8b0aff0, /* ldmia r0!, {r4-r11,sp,pc} */ + 0xe12fff1e, /* bx lr */ +}; + +void co_init() { + unsigned long addr = (unsigned long)co_swap_function; + unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE)); + unsigned long size = (addr - base) + sizeof co_swap_function; + mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC); +} + +cothread_t co_active() { + if(!co_active_handle) co_active_handle = &co_active_buffer; + return co_active_handle; +} + +cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { + unsigned long* handle = 0; + if(!co_swap) { + co_init(); + co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; + } + if(!co_active_handle) co_active_handle = &co_active_buffer; + size += 256; + size &= ~15; + + if(handle = (unsigned long*)malloc(size)) { + unsigned long* p = (unsigned long*)((unsigned char*)handle + size); + handle[8] = (unsigned long)p; + handle[9] = (unsigned long)entrypoint; + } + + return handle; +} + +void co_delete(cothread_t handle) { + free(handle); +} + +void co_switch(cothread_t handle) { + cothread_t co_previous_handle = co_active_handle; + co_swap(co_active_handle = handle, co_previous_handle); +} + +#ifdef __cplusplus +} +#endif diff --git a/fiber.c b/fiber.c index 02ef5bc7..f57c0799 100644 --- a/fiber.c +++ b/fiber.c @@ -6,9 +6,9 @@ #define LIBCO_C #include "libco.h" + #define WINVER 0x0400 #define _WIN32_WINNT 0x0400 -#define WIN32_LEAN_AND_MEAN #include #ifdef __cplusplus diff --git a/libco.c b/libco.c index 55676263..c48ffd97 100644 --- a/libco.c +++ b/libco.c @@ -1,23 +1,30 @@ /* libco - auto-selection module license: public domain */ -#if defined(__GNUC__) && defined(__i386__) - #include "x86.c" -#elif defined(__GNUC__) && defined(__amd64__) - #include "amd64.c" -#elif defined(__GNUC__) && defined(_ARCH_PPC) - #include "ppc.c" -#elif defined(__GNUC__) - #include "sjlj.c" -#elif defined(_MSC_VER) && defined(_M_IX86) - #include "x86.c" -#elif defined(_MSC_VER) && defined(_M_AMD64) - #include "amd64.c" +#if defined(__clang__) || defined(__GNUC__) + #if defined(__i386__) + #include "x86.c" + #elif defined(__amd64__) + #include "amd64.c" + #elif defined(__arm__) + #include "arm.c" + #elif defined(_ARCH_PPC) + #include "ppc.c" + #elif defined(_WIN32) + #include "fiber.c" + #else + #include "sjlj.c" + #endif #elif defined(_MSC_VER) - #include "fiber.c" + #if defined(_M_IX86) + #include "x86.c" + #elif defined(_M_AMD64) + #include "amd64.c" + #else + #include "fiber.c" + #endif #else #error "libco: unsupported processor, compiler or operating system" #endif diff --git a/libco.h b/libco.h index deb954fb..1851696e 100644 --- a/libco.h +++ b/libco.h @@ -1,6 +1,7 @@ /* libco - version: 0.16 (2010-12-24) + version: 0.17 (2015-06-18) + author: byuu license: public domain */ diff --git a/ppc.c b/ppc.c index a6028fdb..d509cd9e 100644 --- a/ppc.c +++ b/ppc.c @@ -9,6 +9,7 @@ floating-point and AltiVec save/restore */ #define LIBCO_C #include "libco.h" + #include #include #include diff --git a/sjlj.c b/sjlj.c index 8b72b614..9203efe7 100644 --- a/sjlj.c +++ b/sjlj.c @@ -13,6 +13,7 @@ #define LIBCO_C #include "libco.h" + #include #include #include diff --git a/x86.c b/x86.c index d8f820b0..44bbe4b8 100644 --- a/x86.c +++ b/x86.c @@ -6,6 +6,7 @@ #define LIBCO_C #include "libco.h" + #include #include @@ -13,10 +14,10 @@ extern "C" { #endif -#if defined(_MSC_VER) - #define fastcall __fastcall -#elif defined(__GNUC__) +#if defined(__clang__) || defined(__GNUC__) #define fastcall __attribute__((fastcall)) +#elif defined(_MSC_VER) + #define fastcall __fastcall #else #error "libco: please define fastcall macro" #endif @@ -25,10 +26,20 @@ static thread_local long co_active_buffer[64]; static thread_local cothread_t co_active_handle = 0; static void (fastcall *co_swap)(cothread_t, cothread_t) = 0; -//ABI: fastcall +/* ABI: fastcall */ static unsigned char co_swap_function[] = { - 0x89, 0x22, 0x8B, 0x21, 0x58, 0x89, 0x6A, 0x04, 0x89, 0x72, 0x08, 0x89, 0x7A, 0x0C, 0x89, 0x5A, - 0x10, 0x8B, 0x69, 0x04, 0x8B, 0x71, 0x08, 0x8B, 0x79, 0x0C, 0x8B, 0x59, 0x10, 0xFF, 0xE0, + 0x89, 0x22, /* mov [edx],esp */ + 0x8b, 0x21, /* mov esp,[ecx] */ + 0x58, /* pop eax */ + 0x89, 0x6a, 0x04, /* mov [edx+ 4],ebp */ + 0x89, 0x72, 0x08, /* mov [edx+ 8],esi */ + 0x89, 0x7a, 0x0c, /* mov [edx+12],edi */ + 0x89, 0x5a, 0x10, /* mov [edx+16],ebx */ + 0x8b, 0x69, 0x04, /* mov ebp,[ecx+ 4] */ + 0x8b, 0x71, 0x08, /* mov esi,[ecx+ 8] */ + 0x8b, 0x79, 0x0c, /* mov edi,[ecx+12] */ + 0x8b, 0x59, 0x10, /* mov ebx,[ecx+16] */ + 0xff, 0xe0, /* jmp eax */ }; #ifdef _WIN32 @@ -51,7 +62,7 @@ static unsigned char co_swap_function[] = { #endif static void crash() { - assert(0); /* called only if cothread_t entrypoint returns */ + assert(0); /* called only if cothread_t entrypoint returns */ } cothread_t co_active() { @@ -66,14 +77,14 @@ cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { co_swap = (void (fastcall*)(cothread_t, cothread_t))co_swap_function; } if(!co_active_handle) co_active_handle = &co_active_buffer; - size += 256; /* allocate additional space for storage */ - size &= ~15; /* align stack to 16-byte boundary */ + size += 256; /* allocate additional space for storage */ + size &= ~15; /* align stack to 16-byte boundary */ if(handle = (cothread_t)malloc(size)) { - long *p = (long*)((char*)handle + size); /* seek to top of stack */ - *--p = (long)crash; /* crash if entrypoint returns */ - *--p = (long)entrypoint; /* start of function */ - *(long*)handle = (long)p; /* stack pointer */ + long *p = (long*)((char*)handle + size); /* seek to top of stack */ + *--p = (long)crash; /* crash if entrypoint returns */ + *--p = (long)entrypoint; /* start of function */ + *(long*)handle = (long)p; /* stack pointer */ } return handle; From f0d1b7fa8cce45dde3ef92783ed865707cf0e7b5 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Wed, 30 Dec 2015 17:41:46 +1100 Subject: [PATCH 13/43] Update to v096r01 release. byuu says: Changelog: - restructured the project and removed a whole bunch of old/dead directives from higan/GNUmakefile - huge amounts of work on hiro/cocoa (compiles but ~70% of the functionality is commented out) - fixed a masking error in my ARM CPU disassembler [Lioncash] - SFC: decided to change board cic=(411,413) back to board region=(ntsc,pal) ... the former was too obtuse If you rename Boolean (it's a problem with an include from ruby, not from hiro) and disable all the ruby drivers, you can compile an OS X binary, but obviously it's not going to do anything. It's a boring WIP, I just wanted to push out the project structure change now at the start of this WIP cycle. --- doc/style.css | 8 ++++ doc/targets.html | 89 +++++++++++++++++++++++++++++++++++++++ doc/usage.html | 107 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 204 insertions(+) create mode 100644 doc/style.css create mode 100644 doc/targets.html create mode 100644 doc/usage.html diff --git a/doc/style.css b/doc/style.css new file mode 100644 index 00000000..5181afde --- /dev/null +++ b/doc/style.css @@ -0,0 +1,8 @@ +body { + background: #333; + color: #fff; +} + +code { + background: #444; +} diff --git a/doc/targets.html b/doc/targets.html new file mode 100644 index 00000000..d6211a15 --- /dev/null +++ b/doc/targets.html @@ -0,0 +1,89 @@ + + + + + + + +Supported targets:

+ +Note that supported targets are only those that have been tested and confirmed +working. It is quite possible that libco will work on more processors, compilers +and operating systems than those listed below. +
+ +libco.x86
+Overhead: ~5x
+Supported processor(s): 32-bit x86
+Supported compiler(s): any
+Supported operating system(s):
    +
  • Windows
  • +
  • Mac OS X
  • +
  • Linux
  • +
  • BSD
  • +
+
+ +libco.amd64
+Overhead: ~10x (Windows), ~6x (all other platforms)
+Supported processor(s): 64-bit amd64
+Supported compiler(s): any
+Supported operating system(s):
    +
  • Windows
  • +
  • Mac OS X
  • +
  • Linux
  • +
  • BSD
  • +
+
+ +libco.ppc
+Overhead: ~20x
+Supported processor(s): 32-bit PowerPC, 64-bit PowerPC
+Supported compiler(s): GNU GCC
+Supported operating system(s):
    +
+
  • Mac OS X
  • +
  • Linux
  • +
  • BSD
  • +
  • Playstation 3
  • + +
    + +Note: this module contains compiler flags to enable/disable FPU and Altivec +support. + +
    + +libco.fiber
    +Overhead: ~15x
    +Supported processor(s): Processor independent
    +Supported compiler(s): any
    +Supported operating system(s):
      +
    • Windows
    • +
    +
    + +libco.sjlj
    +Overhead: ~30x
    +Supported processor(s): Processor independent
    +Supported compiler(s): any
    +Supported operating system(s):
      +
    • Mac OS X
    • +
    • Linux
    • +
    • BSD
    • +
    • Solaris
    • +
    +
    + +libco.ucontext
    +Overhead: ~300x
    +Supported processor(s): Processor independent
    +Supported compiler(s): any
    +Supported operating system(s):
      +
    • Linux
    • +
    • BSD
    • +
    +
    + + + diff --git a/doc/usage.html b/doc/usage.html new file mode 100644 index 00000000..3f0d81cc --- /dev/null +++ b/doc/usage.html @@ -0,0 +1,107 @@ + + + + + + + +License:

    +libco is released to the public domain. +
    + +Contact:

    +At present, you may contact me at setsunakun0 at hotmail dot com.
    +I am interested in knowing of any projects that make use of this library, +though this is only a courtesy. +
    + +Foreword:

    +libco is a cross-platform, public domain implementation of +cooperative-multithreading; a feature that is sorely lacking +from the ISO C/C++ standard.
    +The library is designed for maximum speed and portability, and +not for safety or features. If safety or extra functionality is desired, +a wrapper API can easily be written to encapsulate all library functions.
    +Behavior of executing operations that are listed as not permitted +below result in undefined behavior. They may work anyway, they +may cause undesired / unknown behavior, or they may crash the +program entirely.
    +The goal of this library was to simplify the base API as much as possible, +implementing only that which cannot be implemented using pure C. Additional +functionality after this would only complicate ports of this library to new +platforms. +
    + +Porting:

    +This document is included as a reference for porting libco. Please submit any +ports you create to me, so that libco can become more useful. Please note that +since libco is public domain, you must submit your code as a work of the +public domain in order for it to be included in the official distribution. +Full credit will be given in the source code of the official release. Please +do not bother submitting code to me under any other license -- including GPL, +LGPL, BSD or CC -- I am not interested in creating a library with multiple +different licenses depending on which targets are used. +
    + +Synopsis:

    + +typedef void* cothread_t;
    +
    +cothread_t co_active();
    +cothread_t co_create(unsigned int heapsize, void (*coentry)(void));
    +void       co_delete(cothread_t cothread);
    +void       co_switch(cothread_t cothread);
    +
    +
    + +Usage: +
    + +typedef void* cothread_t;

    +Handle to cothread.
    +Handle must be of type void*.
    +A value of null (0) indicates an uninitialized or invalid +handle, whereas a non-zero value indicates a valid handle. +
    + +cothread_t co_active();

    +Return handle to current cothread. Always returns a valid handle, even when +called from the main program thread. +
    + +cothread_t co_create(unsigned int heapsize, void (*coentry)(void));

    +Create new cothread.
    +Heapsize is the amount of memory allocated for the cothread stack, specified +in bytes. This is unfortunately impossible to make fully portable. It is +recommended to specify sizes using `n * sizeof(void*)'. It is better to err +on the side of caution and allocate more memory than will be needed to ensure +compatibility with other platforms, within reason. A typical heapsize for a +32-bit architecture is ~1MB.
    +When the new cothread is first called, program execution jumps to coentry. +This function does not take any arguments, due to portability issues with +passing function arguments. However, arguments can be simulated by the use +of global variables, which can be set before the first call to each cothread.
    +coentry() must not return, and should end with an appropriate co_switch() +statement. Behavior is undefined if entry point returns normally.
    +Library is responsible for allocating cothread stack memory, to free +the user from needing to allocate special memory capable of being used +as program stack memory on platforms where this is required.
    +User is always responsible for deleting cothreads with co_delete().
    +Return value of null (0) indicates cothread creation failed. +
    + +void co_delete(cothread_t cothread);

    +Delete specified cothread.
    +Null (0) or invalid cothread handle is not allowed.
    +Passing handle of active cothread to this function is not allowed.
    +Passing handle of primary cothread is not allowed. +
    + +void co_switch(cothread_t cothread);

    +Switch to specified cothread.
    +Null (0) or invalid cothread handle is not allowed.
    +Passing handle of active cothread to this function is not allowed. +
    + + + From 750af6ebc38afcdb6d3aedeca7e44c1c473badff Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Tue, 5 Jan 2016 13:59:19 +1100 Subject: [PATCH 14/43] Update to v096r02 (OS X Preview for Developers) release. byuu says: Warning: this is not for the faint of heart. This is a very early, unpolished, buggy release. But help testing/fixing bugs would be greatly appreciated for anyone willing. Requirements: - Mac OS X 10.7+ - Xcode 7.2+ Installation Commands: cd higan gmake -j 4 gmake install cd ../icarus gmake -j 4 gmake install (gmake install is absolutely required, sorry. You'll be missing key files in key places if you don't run it, and nothing will work.) (gmake uninstall also exists, or you can just delete the .app bundles from your Applications folder, and the Dev folder on your desktop.) If you want to use the GBA emulation, then you need to drop the GBA BIOS into ~/Emulation/System/Game\ Boy\ Advance.sys\bios.rom Usage: You'll now find higan.app and icarus.app in your Applications folders. First, run icarus.app, navigate to where you keep your game ROMs. Now click the settings button at the bottom right, and check "Create Manifests", and click OK. (You'll need to do this every time you run icarus because there's some sort of bug on OSX saving the settings.) Now click "Import", and let it bring in your games into ~/Emulation. Note: "Create Manifests" is required. I don't yet have a pipe implementation on OS X for higan to invoke icarus yet. If you don't check this box, it won't create manifest.bml files, and your games won't run at all. Now you can run higan.app. The first thing you'll want to do is go to higan->Preferences... and assign inputs for your gamepads. At the very least, do it for the default controller for all the systems you want to emulate. Now this is very important ... close the application at this point so that it writes your config file to disk. There's a serious crashing bug, and if you trigger it, you'll lose your input bindings. Now the really annoying part ... go to Library->{System} and pick the game you want to play. Right now, there's a ~50% chance the application will bomb. It seems the hiro::pListView object is getting destroyed, yet somehow the internal Cocoa callbacks are being triggered anyway. I don't know how this is possible, and my attempts to debug with lldb have been a failure :( If you're unlucky, the application will crash. Restart and try again. If it crashes every single time, then you can try launching your game from the command-line instead. Example: open /Applications/higan.app \ --args ~/Emulation/Super\ Famicom/Zelda3.sfc/ Help wanted: I could really, really, really use some help with that crashing on game loading. There's a lot of rough edges, but they're all cosmetic. This one thing is pretty much the only major show-stopping issue at the moment, preventing a wider general audience pre-compiled binary preview. --- libco.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libco.c b/libco.c index c48ffd97..13eb2379 100644 --- a/libco.c +++ b/libco.c @@ -3,6 +3,10 @@ license: public domain */ +#if defined(__clang__) + #pragma clang diagnostic ignored "-Wparentheses" +#endif + #if defined(__clang__) || defined(__GNUC__) #if defined(__i386__) #include "x86.c" From 45a725e4b7262609740d8a6d55f45d383d6f2ec3 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Wed, 14 Sep 2016 21:55:53 +1000 Subject: [PATCH 15/43] Update to v101r18 release. byuu says: Changelog: - added 30 new PAL games to icarus (courtesy of Mikerochip) - new version of libco no longer requires mprotect nor W|X permissions - nall: default C compiler to -std=c11 instead of -std=c99 - nall: use `-fno-strict-aliasing` during compilation - updated nall/certificates (hopefully for the last time) - updated nall/http to newer coding conventions - nall: improve handling of range() function I didn't really work on higan at all, this is mostly just a release because lots of other things have changed. The most interesting is `-fno-strict-aliasing` ... basically, it joins `-fwrapv` as being "stop the GCC developers from doing *really* evil shit that could lead to security vulnerabilities or instabilities." For the most part, it's a ~2% speed penalty for higan. Except for the Sega Genesis, where it's a ~10% speedup. I have no idea how that's possible, but clearly something's going very wrong with strict aliasing on the Genesis core. So ... it is what it is. If you need the performance for the non-Genesis cores, you can turn it off in your builds. But I'm getting quite sick of C++'s "surprises" and clever compiler developers, so I'm keeping it on in all of my software going forward. --- amd64.c | 32 +-- arm.c | 16 +- fiber.c | 2 +- libco.h | 11 +- ppc.c | 675 +++++++++++++++++++++++++---------------------------- settings.h | 36 +++ sjlj.c | 13 +- ucontext.c | 26 +-- x86.c | 22 +- 9 files changed, 421 insertions(+), 412 deletions(-) create mode 100644 settings.h diff --git a/amd64.c b/amd64.c index 37122f44..235708ab 100644 --- a/amd64.c +++ b/amd64.c @@ -1,15 +1,12 @@ /* - libco.amd64 (2015-06-19) + libco.amd64 (2016-09-14) author: byuu license: public domain */ #define LIBCO_C #include "libco.h" - -//Win64 only: provides a substantial speed-up, but will thrash XMM regs -//do not use this unless you are certain your application won't use SSE -//#define LIBCO_AMD64_NO_SSE +#include "settings.h" #include #include @@ -22,9 +19,14 @@ static thread_local long long co_active_buffer[64]; static thread_local cothread_t co_active_handle = 0; static void (*co_swap)(cothread_t, cothread_t) = 0; +#ifdef LIBCO_MPROTECT + alignas(4096) +#else + section(text) +#endif #ifdef _WIN32 /* ABI: Win64 */ - static unsigned char co_swap_function[] = { + static const unsigned char co_swap_function[4096] = { 0x48, 0x89, 0x22, /* mov [rdx],rsp */ 0x48, 0x8b, 0x21, /* mov rsp,[rcx] */ 0x58, /* pop rax */ @@ -36,7 +38,7 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; 0x4c, 0x89, 0x6a, 0x30, /* mov [rdx+48],r13 */ 0x4c, 0x89, 0x72, 0x38, /* mov [rdx+56],r14 */ 0x4c, 0x89, 0x7a, 0x40, /* mov [rdx+64],r15 */ - #if !defined(LIBCO_AMD64_NO_SSE) + #if !defined(LIBCO_NO_SSE) 0x0f, 0x29, 0x72, 0x50, /* movaps [rdx+ 80],xmm6 */ 0x0f, 0x29, 0x7a, 0x60, /* movaps [rdx+ 96],xmm7 */ 0x44, 0x0f, 0x29, 0x42, 0x70, /* movaps [rdx+112],xmm8 */ @@ -57,7 +59,7 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; 0x4c, 0x8b, 0x69, 0x30, /* mov r13,[rcx+48] */ 0x4c, 0x8b, 0x71, 0x38, /* mov r14,[rcx+56] */ 0x4c, 0x8b, 0x79, 0x40, /* mov r15,[rcx+64] */ - #if !defined(LIBCO_AMD64_NO_SSE) + #if !defined(LIBCO_NO_SSE) 0x0f, 0x28, 0x71, 0x50, /* movaps xmm6, [rcx+ 80] */ 0x0f, 0x28, 0x79, 0x60, /* movaps xmm7, [rcx+ 96] */ 0x44, 0x0f, 0x28, 0x41, 0x70, /* movaps xmm8, [rcx+112] */ @@ -75,13 +77,15 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; #include - void co_init() { + static void co_init() { + #ifdef LIBCO_MPROTECT DWORD old_privileges; - VirtualProtect(co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READWRITE, &old_privileges); + VirtualProtect((void*)co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READ, &old_privileges); + #endif } #else /* ABI: SystemV */ - static unsigned char co_swap_function[] = { + static const unsigned char co_swap_function[4096] = { 0x48, 0x89, 0x26, /* mov [rsi],rsp */ 0x48, 0x8b, 0x27, /* mov rsp,[rdi] */ 0x58, /* pop rax */ @@ -103,11 +107,13 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; #include #include - void co_init() { + static void co_init() { + #ifdef LIBCO_MPROTECT unsigned long long addr = (unsigned long long)co_swap_function; unsigned long long base = addr - (addr % sysconf(_SC_PAGESIZE)); unsigned long long size = (addr - base) + sizeof co_swap_function; - mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC); + mprotect((void*)base, size, PROT_READ | PROT_EXEC); + #endif } #endif diff --git a/arm.c b/arm.c index 70dbdd1b..25f0b16c 100644 --- a/arm.c +++ b/arm.c @@ -1,11 +1,12 @@ /* - libco.arm (2015-06-18) + libco.arm (2016-09-14) author: byuu license: public domain */ #define LIBCO_C #include "libco.h" +#include "settings.h" #include #include @@ -20,17 +21,24 @@ static thread_local unsigned long co_active_buffer[64]; static thread_local cothread_t co_active_handle = 0; static void (*co_swap)(cothread_t, cothread_t) = 0; -static unsigned long co_swap_function[] = { +#ifdef LIBCO_MPROTECT + alignas(4096) +#else + section(text) +#endif +static const unsigned long co_swap_function[1024] = { 0xe8a16ff0, /* stmia r1!, {r4-r11,sp,lr} */ 0xe8b0aff0, /* ldmia r0!, {r4-r11,sp,pc} */ 0xe12fff1e, /* bx lr */ }; -void co_init() { +static void co_init() { + #ifdef LIBCO_MPROTECT unsigned long addr = (unsigned long)co_swap_function; unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE)); unsigned long size = (addr - base) + sizeof co_swap_function; - mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC); + mprotect((void*)base, size, PROT_READ | PROT_EXEC); + #endif } cothread_t co_active() { diff --git a/fiber.c b/fiber.c index f57c0799..f2c5b726 100644 --- a/fiber.c +++ b/fiber.c @@ -17,7 +17,7 @@ extern "C" { static thread_local cothread_t co_active_ = 0; -static void __stdcall co_thunk(void *coentry) { +static void __stdcall co_thunk(void* coentry) { ((void (*)(void))coentry)(); } diff --git a/libco.h b/libco.h index 1851696e..792df0bd 100644 --- a/libco.h +++ b/libco.h @@ -1,6 +1,5 @@ /* - libco - version: 0.17 (2015-06-18) + libco v18 (2016-09-14) author: byuu license: public domain */ @@ -8,14 +7,6 @@ #ifndef LIBCO_H #define LIBCO_H -#ifdef LIBCO_C - #ifdef LIBCO_MP - #define thread_local __thread - #else - #define thread_local - #endif -#endif - #ifdef __cplusplus extern "C" { #endif diff --git a/ppc.c b/ppc.c index d509cd9e..efec3aa8 100644 --- a/ppc.c +++ b/ppc.c @@ -1,37 +1,33 @@ /* - libco.ppc (2010-10-17) + libco.ppc (2016-09-14) author: blargg license: public domain */ -/* PowerPC 32/64 using embedded or external asm, with optional -floating-point and AltiVec save/restore */ - #define LIBCO_C #include "libco.h" +#include "settings.h" #include #include #include -#define LIBCO_MPROTECT (__unix__ && !LIBCO_PPC_ASM) - #if LIBCO_MPROTECT - #include - #include + #include + #include #endif -/* State format (offsets in 32-bit words) +/* state format (offsets in 32-bit words) -+0 Pointer to swap code - Rest of function descriptor for entry function -+8 PC -+10 SP - Special regs - GPRs - FPRs - VRs - stack + +0 pointer to swap code + rest of function descriptor for entry function + +8 PC ++10 SP + special registers + GPRs + FPRs + VRs + stack */ enum { state_size = 1024 }; @@ -40,369 +36,332 @@ enum { stack_align = 256 }; static thread_local cothread_t co_active_handle = 0; -/**** Determine environment ****/ +/* determine environment */ #define LIBCO_PPC64 (_ARCH_PPC64 || __PPC64__ || __ppc64__ || __powerpc64__) -/* Whether function calls are indirect through a descriptor, -or are directly to function */ +/* whether function calls are indirect through a descriptor, or are directly to function */ #ifndef LIBCO_PPCDESC - #if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || LIBCO_PPC64) - #define LIBCO_PPCDESC 1 - #endif + #if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || LIBCO_PPC64) + #define LIBCO_PPCDESC 1 + #endif #endif -#ifdef LIBCO_PPC_ASM - - #ifdef __cplusplus - extern "C" - #endif - - /* Swap code is in ppc.S */ - void co_swap_asm( cothread_t, cothread_t ); - #define CO_SWAP_ASM( x, y ) co_swap_asm( x, y ) - +#ifdef LIBCO_MPROTECT + alignas(4096) #else - -/* Swap code is here in array. Please leave dieassembly comments, -as they make it easy to see what it does, and reorder instructions -if one wants to see whether that improves performance. */ -static const uint32_t libco_ppc_code [] = { -#if LIBCO_PPC64 - 0x7d000026, /* mfcr r8 */ - 0xf8240028, /* std r1,40(r4) */ - 0x7d2802a6, /* mflr r9 */ - 0xf9c40048, /* std r14,72(r4) */ - 0xf9e40050, /* std r15,80(r4) */ - 0xfa040058, /* std r16,88(r4) */ - 0xfa240060, /* std r17,96(r4) */ - 0xfa440068, /* std r18,104(r4) */ - 0xfa640070, /* std r19,112(r4) */ - 0xfa840078, /* std r20,120(r4) */ - 0xfaa40080, /* std r21,128(r4) */ - 0xfac40088, /* std r22,136(r4) */ - 0xfae40090, /* std r23,144(r4) */ - 0xfb040098, /* std r24,152(r4) */ - 0xfb2400a0, /* std r25,160(r4) */ - 0xfb4400a8, /* std r26,168(r4) */ - 0xfb6400b0, /* std r27,176(r4) */ - 0xfb8400b8, /* std r28,184(r4) */ - 0xfba400c0, /* std r29,192(r4) */ - 0xfbc400c8, /* std r30,200(r4) */ - 0xfbe400d0, /* std r31,208(r4) */ - 0xf9240020, /* std r9,32(r4) */ - 0xe8e30020, /* ld r7,32(r3) */ - 0xe8230028, /* ld r1,40(r3) */ - 0x48000009, /* bl 1 */ - 0x7fe00008, /* trap */ - 0x91040030,/*1:stw r8,48(r4) */ - 0x80c30030, /* lwz r6,48(r3) */ - 0x7ce903a6, /* mtctr r7 */ - 0xe9c30048, /* ld r14,72(r3) */ - 0xe9e30050, /* ld r15,80(r3) */ - 0xea030058, /* ld r16,88(r3) */ - 0xea230060, /* ld r17,96(r3) */ - 0xea430068, /* ld r18,104(r3) */ - 0xea630070, /* ld r19,112(r3) */ - 0xea830078, /* ld r20,120(r3) */ - 0xeaa30080, /* ld r21,128(r3) */ - 0xeac30088, /* ld r22,136(r3) */ - 0xeae30090, /* ld r23,144(r3) */ - 0xeb030098, /* ld r24,152(r3) */ - 0xeb2300a0, /* ld r25,160(r3) */ - 0xeb4300a8, /* ld r26,168(r3) */ - 0xeb6300b0, /* ld r27,176(r3) */ - 0xeb8300b8, /* ld r28,184(r3) */ - 0xeba300c0, /* ld r29,192(r3) */ - 0xebc300c8, /* ld r30,200(r3) */ - 0xebe300d0, /* ld r31,208(r3) */ - 0x7ccff120, /* mtcr r6 */ -#else - 0x7d000026, /* mfcr r8 */ - 0x90240028, /* stw r1,40(r4) */ - 0x7d2802a6, /* mflr r9 */ - 0x91a4003c, /* stw r13,60(r4) */ - 0x91c40040, /* stw r14,64(r4) */ - 0x91e40044, /* stw r15,68(r4) */ - 0x92040048, /* stw r16,72(r4) */ - 0x9224004c, /* stw r17,76(r4) */ - 0x92440050, /* stw r18,80(r4) */ - 0x92640054, /* stw r19,84(r4) */ - 0x92840058, /* stw r20,88(r4) */ - 0x92a4005c, /* stw r21,92(r4) */ - 0x92c40060, /* stw r22,96(r4) */ - 0x92e40064, /* stw r23,100(r4) */ - 0x93040068, /* stw r24,104(r4) */ - 0x9324006c, /* stw r25,108(r4) */ - 0x93440070, /* stw r26,112(r4) */ - 0x93640074, /* stw r27,116(r4) */ - 0x93840078, /* stw r28,120(r4) */ - 0x93a4007c, /* stw r29,124(r4) */ - 0x93c40080, /* stw r30,128(r4) */ - 0x93e40084, /* stw r31,132(r4) */ - 0x91240020, /* stw r9,32(r4) */ - 0x80e30020, /* lwz r7,32(r3) */ - 0x80230028, /* lwz r1,40(r3) */ - 0x48000009, /* bl 1 */ - 0x7fe00008, /* trap */ - 0x91040030,/*1:stw r8,48(r4) */ - 0x80c30030, /* lwz r6,48(r3) */ - 0x7ce903a6, /* mtctr r7 */ - 0x81a3003c, /* lwz r13,60(r3) */ - 0x81c30040, /* lwz r14,64(r3) */ - 0x81e30044, /* lwz r15,68(r3) */ - 0x82030048, /* lwz r16,72(r3) */ - 0x8223004c, /* lwz r17,76(r3) */ - 0x82430050, /* lwz r18,80(r3) */ - 0x82630054, /* lwz r19,84(r3) */ - 0x82830058, /* lwz r20,88(r3) */ - 0x82a3005c, /* lwz r21,92(r3) */ - 0x82c30060, /* lwz r22,96(r3) */ - 0x82e30064, /* lwz r23,100(r3) */ - 0x83030068, /* lwz r24,104(r3) */ - 0x8323006c, /* lwz r25,108(r3) */ - 0x83430070, /* lwz r26,112(r3) */ - 0x83630074, /* lwz r27,116(r3) */ - 0x83830078, /* lwz r28,120(r3) */ - 0x83a3007c, /* lwz r29,124(r3) */ - 0x83c30080, /* lwz r30,128(r3) */ - 0x83e30084, /* lwz r31,132(r3) */ - 0x7ccff120, /* mtcr r6 */ + section(text) #endif +static const uint32_t libco_ppc_code[1024] = { + #if LIBCO_PPC64 + 0x7d000026, /* mfcr r8 */ + 0xf8240028, /* std r1,40(r4) */ + 0x7d2802a6, /* mflr r9 */ + 0xf9c40048, /* std r14,72(r4) */ + 0xf9e40050, /* std r15,80(r4) */ + 0xfa040058, /* std r16,88(r4) */ + 0xfa240060, /* std r17,96(r4) */ + 0xfa440068, /* std r18,104(r4) */ + 0xfa640070, /* std r19,112(r4) */ + 0xfa840078, /* std r20,120(r4) */ + 0xfaa40080, /* std r21,128(r4) */ + 0xfac40088, /* std r22,136(r4) */ + 0xfae40090, /* std r23,144(r4) */ + 0xfb040098, /* std r24,152(r4) */ + 0xfb2400a0, /* std r25,160(r4) */ + 0xfb4400a8, /* std r26,168(r4) */ + 0xfb6400b0, /* std r27,176(r4) */ + 0xfb8400b8, /* std r28,184(r4) */ + 0xfba400c0, /* std r29,192(r4) */ + 0xfbc400c8, /* std r30,200(r4) */ + 0xfbe400d0, /* std r31,208(r4) */ + 0xf9240020, /* std r9,32(r4) */ + 0xe8e30020, /* ld r7,32(r3) */ + 0xe8230028, /* ld r1,40(r3) */ + 0x48000009, /* bl 1 */ + 0x7fe00008, /* trap */ + 0x91040030, /*1:stw r8,48(r4) */ + 0x80c30030, /* lwz r6,48(r3) */ + 0x7ce903a6, /* mtctr r7 */ + 0xe9c30048, /* ld r14,72(r3) */ + 0xe9e30050, /* ld r15,80(r3) */ + 0xea030058, /* ld r16,88(r3) */ + 0xea230060, /* ld r17,96(r3) */ + 0xea430068, /* ld r18,104(r3) */ + 0xea630070, /* ld r19,112(r3) */ + 0xea830078, /* ld r20,120(r3) */ + 0xeaa30080, /* ld r21,128(r3) */ + 0xeac30088, /* ld r22,136(r3) */ + 0xeae30090, /* ld r23,144(r3) */ + 0xeb030098, /* ld r24,152(r3) */ + 0xeb2300a0, /* ld r25,160(r3) */ + 0xeb4300a8, /* ld r26,168(r3) */ + 0xeb6300b0, /* ld r27,176(r3) */ + 0xeb8300b8, /* ld r28,184(r3) */ + 0xeba300c0, /* ld r29,192(r3) */ + 0xebc300c8, /* ld r30,200(r3) */ + 0xebe300d0, /* ld r31,208(r3) */ + 0x7ccff120, /* mtcr r6 */ + #else + 0x7d000026, /* mfcr r8 */ + 0x90240028, /* stw r1,40(r4) */ + 0x7d2802a6, /* mflr r9 */ + 0x91a4003c, /* stw r13,60(r4) */ + 0x91c40040, /* stw r14,64(r4) */ + 0x91e40044, /* stw r15,68(r4) */ + 0x92040048, /* stw r16,72(r4) */ + 0x9224004c, /* stw r17,76(r4) */ + 0x92440050, /* stw r18,80(r4) */ + 0x92640054, /* stw r19,84(r4) */ + 0x92840058, /* stw r20,88(r4) */ + 0x92a4005c, /* stw r21,92(r4) */ + 0x92c40060, /* stw r22,96(r4) */ + 0x92e40064, /* stw r23,100(r4) */ + 0x93040068, /* stw r24,104(r4) */ + 0x9324006c, /* stw r25,108(r4) */ + 0x93440070, /* stw r26,112(r4) */ + 0x93640074, /* stw r27,116(r4) */ + 0x93840078, /* stw r28,120(r4) */ + 0x93a4007c, /* stw r29,124(r4) */ + 0x93c40080, /* stw r30,128(r4) */ + 0x93e40084, /* stw r31,132(r4) */ + 0x91240020, /* stw r9,32(r4) */ + 0x80e30020, /* lwz r7,32(r3) */ + 0x80230028, /* lwz r1,40(r3) */ + 0x48000009, /* bl 1 */ + 0x7fe00008, /* trap */ + 0x91040030, /*1:stw r8,48(r4) */ + 0x80c30030, /* lwz r6,48(r3) */ + 0x7ce903a6, /* mtctr r7 */ + 0x81a3003c, /* lwz r13,60(r3) */ + 0x81c30040, /* lwz r14,64(r3) */ + 0x81e30044, /* lwz r15,68(r3) */ + 0x82030048, /* lwz r16,72(r3) */ + 0x8223004c, /* lwz r17,76(r3) */ + 0x82430050, /* lwz r18,80(r3) */ + 0x82630054, /* lwz r19,84(r3) */ + 0x82830058, /* lwz r20,88(r3) */ + 0x82a3005c, /* lwz r21,92(r3) */ + 0x82c30060, /* lwz r22,96(r3) */ + 0x82e30064, /* lwz r23,100(r3) */ + 0x83030068, /* lwz r24,104(r3) */ + 0x8323006c, /* lwz r25,108(r3) */ + 0x83430070, /* lwz r26,112(r3) */ + 0x83630074, /* lwz r27,116(r3) */ + 0x83830078, /* lwz r28,120(r3) */ + 0x83a3007c, /* lwz r29,124(r3) */ + 0x83c30080, /* lwz r30,128(r3) */ + 0x83e30084, /* lwz r31,132(r3) */ + 0x7ccff120, /* mtcr r6 */ + #endif -#ifndef LIBCO_PPC_NOFP - 0xd9c400e0, /* stfd f14,224(r4) */ - 0xd9e400e8, /* stfd f15,232(r4) */ - 0xda0400f0, /* stfd f16,240(r4) */ - 0xda2400f8, /* stfd f17,248(r4) */ - 0xda440100, /* stfd f18,256(r4) */ - 0xda640108, /* stfd f19,264(r4) */ - 0xda840110, /* stfd f20,272(r4) */ - 0xdaa40118, /* stfd f21,280(r4) */ - 0xdac40120, /* stfd f22,288(r4) */ - 0xdae40128, /* stfd f23,296(r4) */ - 0xdb040130, /* stfd f24,304(r4) */ - 0xdb240138, /* stfd f25,312(r4) */ - 0xdb440140, /* stfd f26,320(r4) */ - 0xdb640148, /* stfd f27,328(r4) */ - 0xdb840150, /* stfd f28,336(r4) */ - 0xdba40158, /* stfd f29,344(r4) */ - 0xdbc40160, /* stfd f30,352(r4) */ - 0xdbe40168, /* stfd f31,360(r4) */ - 0xc9c300e0, /* lfd f14,224(r3) */ - 0xc9e300e8, /* lfd f15,232(r3) */ - 0xca0300f0, /* lfd f16,240(r3) */ - 0xca2300f8, /* lfd f17,248(r3) */ - 0xca430100, /* lfd f18,256(r3) */ - 0xca630108, /* lfd f19,264(r3) */ - 0xca830110, /* lfd f20,272(r3) */ - 0xcaa30118, /* lfd f21,280(r3) */ - 0xcac30120, /* lfd f22,288(r3) */ - 0xcae30128, /* lfd f23,296(r3) */ - 0xcb030130, /* lfd f24,304(r3) */ - 0xcb230138, /* lfd f25,312(r3) */ - 0xcb430140, /* lfd f26,320(r3) */ - 0xcb630148, /* lfd f27,328(r3) */ - 0xcb830150, /* lfd f28,336(r3) */ - 0xcba30158, /* lfd f29,344(r3) */ - 0xcbc30160, /* lfd f30,352(r3) */ - 0xcbe30168, /* lfd f31,360(r3) */ -#endif + #ifndef LIBCO_PPC_NOFP + 0xd9c400e0, /* stfd f14,224(r4) */ + 0xd9e400e8, /* stfd f15,232(r4) */ + 0xda0400f0, /* stfd f16,240(r4) */ + 0xda2400f8, /* stfd f17,248(r4) */ + 0xda440100, /* stfd f18,256(r4) */ + 0xda640108, /* stfd f19,264(r4) */ + 0xda840110, /* stfd f20,272(r4) */ + 0xdaa40118, /* stfd f21,280(r4) */ + 0xdac40120, /* stfd f22,288(r4) */ + 0xdae40128, /* stfd f23,296(r4) */ + 0xdb040130, /* stfd f24,304(r4) */ + 0xdb240138, /* stfd f25,312(r4) */ + 0xdb440140, /* stfd f26,320(r4) */ + 0xdb640148, /* stfd f27,328(r4) */ + 0xdb840150, /* stfd f28,336(r4) */ + 0xdba40158, /* stfd f29,344(r4) */ + 0xdbc40160, /* stfd f30,352(r4) */ + 0xdbe40168, /* stfd f31,360(r4) */ + 0xc9c300e0, /* lfd f14,224(r3) */ + 0xc9e300e8, /* lfd f15,232(r3) */ + 0xca0300f0, /* lfd f16,240(r3) */ + 0xca2300f8, /* lfd f17,248(r3) */ + 0xca430100, /* lfd f18,256(r3) */ + 0xca630108, /* lfd f19,264(r3) */ + 0xca830110, /* lfd f20,272(r3) */ + 0xcaa30118, /* lfd f21,280(r3) */ + 0xcac30120, /* lfd f22,288(r3) */ + 0xcae30128, /* lfd f23,296(r3) */ + 0xcb030130, /* lfd f24,304(r3) */ + 0xcb230138, /* lfd f25,312(r3) */ + 0xcb430140, /* lfd f26,320(r3) */ + 0xcb630148, /* lfd f27,328(r3) */ + 0xcb830150, /* lfd f28,336(r3) */ + 0xcba30158, /* lfd f29,344(r3) */ + 0xcbc30160, /* lfd f30,352(r3) */ + 0xcbe30168, /* lfd f31,360(r3) */ + #endif -#ifdef __ALTIVEC__ - 0x7ca042a6, /* mfvrsave r5 */ - 0x39040180, /* addi r8,r4,384 */ - 0x39240190, /* addi r9,r4,400 */ - 0x70a00fff, /* andi. r0,r5,4095 */ - 0x90a40034, /* stw r5,52(r4) */ - 0x4182005c, /* beq- 2 */ - 0x7e8041ce, /* stvx v20,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7ea049ce, /* stvx v21,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7ec041ce, /* stvx v22,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7ee049ce, /* stvx v23,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7f0041ce, /* stvx v24,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7f2049ce, /* stvx v25,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7f4041ce, /* stvx v26,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7f6049ce, /* stvx v27,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7f8041ce, /* stvx v28,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7fa049ce, /* stvx v29,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7fc041ce, /* stvx v30,r0,r8 */ - 0x7fe049ce, /* stvx v31,r0,r9 */ - 0x80a30034,/*2:lwz r5,52(r3) */ - 0x39030180, /* addi r8,r3,384 */ - 0x39230190, /* addi r9,r3,400 */ - 0x70a00fff, /* andi. r0,r5,4095 */ - 0x7ca043a6, /* mtvrsave r5 */ - 0x4d820420, /* beqctr */ - 0x7e8040ce, /* lvx v20,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7ea048ce, /* lvx v21,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7ec040ce, /* lvx v22,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7ee048ce, /* lvx v23,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7f0040ce, /* lvx v24,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7f2048ce, /* lvx v25,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7f4040ce, /* lvx v26,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7f6048ce, /* lvx v27,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7f8040ce, /* lvx v28,r0,r8 */ - 0x39080020, /* addi r8,r8,32 */ - 0x7fa048ce, /* lvx v29,r0,r9 */ - 0x39290020, /* addi r9,r9,32 */ - 0x7fc040ce, /* lvx v30,r0,r8 */ - 0x7fe048ce, /* lvx v31,r0,r9 */ -#endif + #ifdef __ALTIVEC__ + 0x7ca042a6, /* mfvrsave r5 */ + 0x39040180, /* addi r8,r4,384 */ + 0x39240190, /* addi r9,r4,400 */ + 0x70a00fff, /* andi. r0,r5,4095 */ + 0x90a40034, /* stw r5,52(r4) */ + 0x4182005c, /* beq- 2 */ + 0x7e8041ce, /* stvx v20,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ea049ce, /* stvx v21,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7ec041ce, /* stvx v22,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ee049ce, /* stvx v23,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f0041ce, /* stvx v24,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f2049ce, /* stvx v25,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f4041ce, /* stvx v26,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f6049ce, /* stvx v27,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f8041ce, /* stvx v28,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7fa049ce, /* stvx v29,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7fc041ce, /* stvx v30,r0,r8 */ + 0x7fe049ce, /* stvx v31,r0,r9 */ + 0x80a30034, /*2:lwz r5,52(r3) */ + 0x39030180, /* addi r8,r3,384 */ + 0x39230190, /* addi r9,r3,400 */ + 0x70a00fff, /* andi. r0,r5,4095 */ + 0x7ca043a6, /* mtvrsave r5 */ + 0x4d820420, /* beqctr */ + 0x7e8040ce, /* lvx v20,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ea048ce, /* lvx v21,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7ec040ce, /* lvx v22,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ee048ce, /* lvx v23,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f0040ce, /* lvx v24,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f2048ce, /* lvx v25,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f4040ce, /* lvx v26,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f6048ce, /* lvx v27,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f8040ce, /* lvx v28,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7fa048ce, /* lvx v29,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7fc040ce, /* lvx v30,r0,r8 */ + 0x7fe048ce, /* lvx v31,r0,r9 */ + #endif - 0x4e800420, /* bctr */ + 0x4e800420, /* bctr */ }; - #if LIBCO_PPCDESC - /* Function call goes through indirect descriptor */ - #define CO_SWAP_ASM( x, y ) \ - ((void (*)( cothread_t, cothread_t )) (uintptr_t) x)( x, y ) - #else - /* Function call goes directly to code */ - #define CO_SWAP_ASM( x, y ) \ - ((void (*)( cothread_t, cothread_t )) (uintptr_t) libco_ppc_code)( x, y ) - #endif - +#if LIBCO_PPCDESC + /* function call goes through indirect descriptor */ + #define CO_SWAP_ASM(x, y) ((void (*)(cothread_t, cothread_t))(uintptr_t)x)(x, y) +#else + /* function call goes directly to code */ + #define CO_SWAP_ASM(x, y) ((void (*)(cothread_t, cothread_t))(uintptr_t)libco_ppc_code)(x, y) #endif -static uint32_t* co_create_( unsigned size, uintptr_t entry ) -{ - uint32_t* t = (uint32_t*) malloc( size ); - - (void) entry; - - #if LIBCO_PPCDESC - if ( t ) - { - /* Copy entry's descriptor */ - memcpy( t, (void*) entry, sizeof (void*) * 3 ); - - /* Set function pointer to swap routine */ - #ifdef LIBCO_PPC_ASM - *(const void**) t = *(void**) &co_swap_asm; - #else - *(const void**) t = libco_ppc_code; - #endif - } - #endif - - return t; +static uint32_t* co_create_(unsigned size, uintptr_t entry) { + (void)entry; + + uint32_t* t = (uint32_t*)malloc(size); + + #if LIBCO_PPCDESC + if(t) { + memcpy(t, (void*)entry, sizeof(void*) * 3); /* copy entry's descriptor */ + *(const void**)t = libco_ppc_code; /* set function pointer to swap routine */ + } + #endif + + return t; } -cothread_t co_create( unsigned int size, void (*entry_)( void ) ) -{ - uintptr_t entry = (uintptr_t) entry_; - uint32_t* t = NULL; - - /* Be sure main thread was successfully allocated */ - if ( co_active() ) - { - size += state_size + above_stack + stack_align; - t = co_create_( size, entry ); - } - - if ( t ) - { - uintptr_t sp; - int shift; - - /* Save current registers into new thread, so that any special ones will - have proper values when thread is begun */ - CO_SWAP_ASM( t, t ); - - #if LIBCO_PPCDESC - /* Get real address */ - entry = (uintptr_t) *(void**) entry; - #endif - - /* Put stack near end of block, and align */ - sp = (uintptr_t) t + size - above_stack; - sp -= sp % stack_align; - - /* On PPC32, we save and restore GPRs as 32 bits. For PPC64, we - save and restore them as 64 bits, regardless of the size the ABI - uses. So, we manually write pointers at the proper size. We always - save and restore at the same address, and since PPC is big-endian, - we must put the low byte first on PPC32. */ - - /* If uintptr_t is 32 bits, >>32 is undefined behavior, so we do two shifts - and don't have to care how many bits uintptr_t is. */ - #if LIBCO_PPC64 - shift = 16; - #else - shift = 0; - #endif - - /* Set up so entry will be called on next swap */ - t [8] = (uint32_t) (entry >> shift >> shift); - t [9] = (uint32_t) entry; - - t [10] = (uint32_t) (sp >> shift >> shift); - t [11] = (uint32_t) sp; - } - - return t; +cothread_t co_create(unsigned int size, void (*entry_)(void)) { + uintptr_t entry = (uintptr_t)entry_; + uint32_t* t = 0; + + /* be sure main thread was successfully allocated */ + if(co_active()) { + size += state_size + above_stack + stack_align; + t = co_create_(size, entry); + } + + if(t) { + uintptr_t sp; + int shift; + + /* save current registers into new thread, so that any special ones will have proper values when thread is begun */ + CO_SWAP_ASM(t, t); + + #if LIBCO_PPCDESC + entry = (uintptr_t)*(void**)entry; /* get real address */ + #endif + + /* put stack near end of block, and align */ + sp = (uintptr_t)t + size - above_stack; + sp -= sp % stack_align; + + /* on PPC32, we save and restore GPRs as 32 bits. for PPC64, we + save and restore them as 64 bits, regardless of the size the ABI + uses. so, we manually write pointers at the proper size. we always + save and restore at the same address, and since PPC is big-endian, + we must put the low byte first on PPC32. */ + + /* if uintptr_t is 32 bits, >>32 is undefined behavior, + so we do two shifts and don't have to care how many bits uintptr_t is. */ + #if LIBCO_PPC64 + shift = 16; + #else + shift = 0; + #endif + + /* set up so entry will be called on next swap */ + t[ 8] = (uint32_t)(entry >> shift >> shift); + t[ 9] = (uint32_t)entry; + + t[10] = (uint32_t)(sp >> shift >> shift); + t[11] = (uint32_t)sp; + } + + return t; } -void co_delete( cothread_t t ) -{ - free( t ); +void co_delete(cothread_t t) { + free(t); } -static void co_init_( void ) -{ - #if LIBCO_MPROTECT - /* TODO: pre- and post-pad PPC code so that this doesn't make other - data executable and writable */ - long page_size = sysconf( _SC_PAGESIZE ); - if ( page_size > 0 ) - { - uintptr_t align = page_size; - uintptr_t begin = (uintptr_t) libco_ppc_code; - uintptr_t end = begin + sizeof libco_ppc_code; - - /* Align beginning and end */ - end += align - 1; - end -= end % align; - begin -= begin % align; - - mprotect( (void*) begin, end - begin, PROT_READ | PROT_WRITE | PROT_EXEC ); - } - #endif - - co_active_handle = co_create_( state_size, (uintptr_t) &co_switch ); +static void co_init_(void) { + #if LIBCO_MPROTECT + long page_size = sysconf(_SC_PAGESIZE); + if(page_size > 0) { + uintptr_t align = page_size; + uintptr_t begin = (uintptr_t)libco_ppc_code; + uintptr_t end = begin + sizeof libco_ppc_code; + + /* align beginning and end */ + end += align - 1; + end -= end % align; + begin -= begin % align; + + mprotect((void*)begin, end - begin, PROT_READ | PROT_EXEC); + } + #endif + + co_active_handle = co_create_(state_size, (uintptr_t)&co_switch); } -cothread_t co_active() -{ - if ( !co_active_handle ) - co_init_(); - - return co_active_handle; +cothread_t co_active() { + if(!co_active_handle) co_init_(); + + return co_active_handle; } -void co_switch( cothread_t t ) -{ - cothread_t old = co_active_handle; - co_active_handle = t; - - CO_SWAP_ASM( t, old ); +void co_switch(cothread_t t) { + cothread_t old = co_active_handle; + co_active_handle = t; + + CO_SWAP_ASM(t, old); } diff --git a/settings.h b/settings.h new file mode 100644 index 00000000..b419683a --- /dev/null +++ b/settings.h @@ -0,0 +1,36 @@ +#ifdef LIBCO_C + +/*[amd64, arm, ppc, x86]: + by default, co_swap_function is marked as a text (code) section + if not supported, uncomment the below line to use mprotect instead */ +/* #define LIBCO_MPROTECT */ + +/*[amd64]: + Win64 only: provides a substantial speed-up, but will thrash XMM regs + do not use this unless you are certain your application won't use SSE */ +/* #define LIBCO_NO_SSE */ + +#ifdef LIBCO_C + #ifdef LIBCO_MP + #define thread_local __thread + #else + #define thread_local + #endif +#endif + +#if __STDC_VERSION__ >= 201112L + #ifndef _MSC_VER + #include + #endif +#else + #define alignas(bytes) +#endif + +#ifndef _MSC_VER + #define section(name) __attribute__((section("." #name "#"))) +#else + #define section(name) __declspec(allocate("." #name)) +#endif + +/* ifdef LIBCO_C */ +#endif diff --git a/sjlj.c b/sjlj.c index 9203efe7..dfa0aa45 100644 --- a/sjlj.c +++ b/sjlj.c @@ -5,11 +5,9 @@ */ /* - * Note this was designed for UNIX systems. Based on ideas expressed in a paper - * by Ralf Engelschall. - * For SJLJ on other systems, one would want to rewrite springboard() and - * co_create() and hack the jmb_buf stack pointer. - */ + note this was designed for UNIX systems. Based on ideas expressed in a paper by Ralf Engelschall. + for SJLJ on other systems, one would want to rewrite springboard() and co_create() and hack the jmb_buf stack pointer. +*/ #define LIBCO_C #include "libco.h" @@ -25,11 +23,12 @@ extern "C" { typedef struct { sigjmp_buf context; void (*coentry)(void); - void *stack; + void* stack; } cothread_struct; static thread_local cothread_struct co_primary; -static thread_local cothread_struct *creating, *co_running = 0; +static thread_local cothread_struct* creating; +static thread_local cothread_struct* co_running = 0; static void springboard(int ignored) { if(sigsetjmp(creating->context, 0)) { diff --git a/ucontext.c b/ucontext.c index 2e9e90ec..72ea8719 100644 --- a/ucontext.c +++ b/ucontext.c @@ -5,16 +5,16 @@ */ /* - * WARNING: the overhead of POSIX ucontext is very high, - * assembly versions of libco or libco_sjlj should be much faster - * - * This library only exists for two reasons: - * 1 - as an initial test for the viability of a ucontext implementation - * 2 - to demonstrate the power and speed of libco over existing implementations, - * such as pth (which defaults to wrapping ucontext on unix targets) - * - * Use this library only as a *last resort* - */ + WARNING: the overhead of POSIX ucontext is very high, + assembly versions of libco or libco_sjlj should be much faster + + this library only exists for two reasons: + 1: as an initial test for the viability of a ucontext implementation + 2: to demonstrate the power and speed of libco over existing implementations, + such as pth (which defaults to wrapping ucontext on unix targets) + + use this library only as a *last resort* +*/ #define LIBCO_C #include "libco.h" @@ -28,7 +28,7 @@ extern "C" { #endif static thread_local ucontext_t co_primary; -static thread_local ucontext_t *co_running = 0; +static thread_local ucontext_t* co_running = 0; cothread_t co_active() { if(!co_running) co_running = &co_primary; @@ -37,7 +37,7 @@ cothread_t co_active() { cothread_t co_create(unsigned int heapsize, void (*coentry)(void)) { if(!co_running) co_running = &co_primary; - ucontext_t *thread = (ucontext_t*)malloc(sizeof(ucontext_t)); + ucontext_t* thread = (ucontext_t*)malloc(sizeof(ucontext_t)); if(thread) { if((!getcontext(thread) && !(thread->uc_stack.ss_sp = 0)) && (thread->uc_stack.ss_sp = malloc(heapsize))) { thread->uc_link = co_running; @@ -59,7 +59,7 @@ void co_delete(cothread_t cothread) { } void co_switch(cothread_t cothread) { - ucontext_t *old_thread = co_running; + ucontext_t* old_thread = co_running; co_running = (ucontext_t*)cothread; swapcontext(old_thread, co_running); } diff --git a/x86.c b/x86.c index 44bbe4b8..def3ac1c 100644 --- a/x86.c +++ b/x86.c @@ -1,11 +1,12 @@ /* - libco.x86 (2009-10-12) + libco.x86 (2016-09-14) author: byuu license: public domain */ #define LIBCO_C #include "libco.h" +#include "settings.h" #include #include @@ -26,8 +27,13 @@ static thread_local long co_active_buffer[64]; static thread_local cothread_t co_active_handle = 0; static void (fastcall *co_swap)(cothread_t, cothread_t) = 0; +#ifdef LIBCO_MPROTECT + alignas(4096) +#else + section(text) +#endif /* ABI: fastcall */ -static unsigned char co_swap_function[] = { +static const unsigned char co_swap_function[4096] = { 0x89, 0x22, /* mov [edx],esp */ 0x8b, 0x21, /* mov esp,[ecx] */ 0x58, /* pop eax */ @@ -45,19 +51,23 @@ static unsigned char co_swap_function[] = { #ifdef _WIN32 #include - void co_init() { + static void co_init() { + #ifdef LIBCO_MPROTECT DWORD old_privileges; - VirtualProtect(co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READWRITE, &old_privileges); + VirtualProtect((void*)co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READ, &old_privileges); + #endif } #else #include #include - void co_init() { + static void co_init() { + #ifdef LIBCO_MPROTECT unsigned long addr = (unsigned long)co_swap_function; unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE)); unsigned long size = (addr - base) + sizeof co_swap_function; - mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC); + mprotect((void*)base, size, PROT_READ | PROT_EXEC); + #endif } #endif From dd76846cf2a6525b89d06e5bf011365f90b317f0 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Mon, 23 Jan 2017 08:04:26 +1100 Subject: [PATCH 16/43] Update to v102r02 release. byuu says: Changelog: - I caved on the `samples[] = {0.0}` thing, but I'm very unhappy about it - if it's really invalid C++, then GCC needs to stop accepting it in strict `-std=c++14` mode - Emulator::Interface::Information::resettable is gone - Emulator::Interface::reset() is gone - FC, SFC, MD cores updated to remove soft reset behavior - split GameBoy::Interface into GameBoyInterface, GameBoyColorInterface - split WonderSwan::Interface into WonderSwanInterface, WonderSwanColorInterface - PCE: fixed off-by-one scanline error [hex_usr] - PCE: temporary hack to prevent crashing when VDS is set to < 2 - hiro: Cocoa: removed (u)int(#) constants; converted (u)int(#) types to (u)int_(#)t types - icarus: replaced usage of unique with strip instead (so we don't mess up frameworks on macOS) - libco: added macOS-specific section marker [Ryphecha] So ... the major news this time is the removal of the soft reset behavior. This is a major!! change that results in a 100KiB diff file, and it's very prone to accidental mistakes!! If anyone is up for testing, or even better -- looking over the code changes between v102r01 and v102r02 and looking for any issues, please do so. Ideally we'll want to test every NES mapper type and every SNES coprocessor type by loading said games and power cycling to make sure the games are all cleanly resetting. It's too big of a change for me to cover there not being any issues on my own, but this is truly critical code, so yeah ... please help if you can. We technically lose a bit of hardware documentation here. The soft reset events do all kinds of interesting things in all kinds of different chips -- or at least they do on the SNES. This is obviously not ideal. But in the process of removing these portions of code, I found a few mistakes I had made previously. It simplifies resetting the system state a lot when not trying to have all the power() functions call the reset() functions to share partial functionality. In the future, the goal will be to come up with a way to add back in the soft reset behavior via keyboard binding as with the Master System core. What's going to have to happen is that the key binding will have to send a "reset pulse" to every emulated chip, and those chips are going to have to act independently to power() instead of reusing functionality. We'll get there eventually, but there's many things of vastly greater importance to work on right now, so it'll be a while. The information isn't lost ... we'll just have to pull it out of v102 when we are ready. Note that I left the SNES reset vector simulation code in, even though it's not possible to trigger, for the time being. Also ... the Super Game Boy core is still disconnected. To be honest, it totally slipped my mind when I released v102 that it wasn't connected again yet. This one's going to be pretty tricky to be honest. I'm thinking about making a third GameBoy::Interface class just for SGB, and coming up with some way of bypassing platform-> calls when in this mode. --- libco.h | 2 +- settings.h | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/libco.h b/libco.h index 792df0bd..0ea47789 100644 --- a/libco.h +++ b/libco.h @@ -1,5 +1,5 @@ /* - libco v18 (2016-09-14) + libco v18.01 (2017-01-22) author: byuu license: public domain */ diff --git a/settings.h b/settings.h index b419683a..d8037bc4 100644 --- a/settings.h +++ b/settings.h @@ -1,4 +1,4 @@ -#ifdef LIBCO_C +#if defined(LIBCO_C) /*[amd64, arm, ppc, x86]: by default, co_swap_function is marked as a text (code) section @@ -10,8 +10,8 @@ do not use this unless you are certain your application won't use SSE */ /* #define LIBCO_NO_SSE */ -#ifdef LIBCO_C - #ifdef LIBCO_MP +#if defined(LIBCO_C) + #if defined(LIBCO_MP) #define thread_local __thread #else #define thread_local @@ -19,18 +19,20 @@ #endif #if __STDC_VERSION__ >= 201112L - #ifndef _MSC_VER + #if !defined(_MSC_VER) #include #endif #else #define alignas(bytes) #endif -#ifndef _MSC_VER - #define section(name) __attribute__((section("." #name "#"))) -#else +#if defined(_MSC_VER) #define section(name) __declspec(allocate("." #name)) +#elif defined(__APPLE__) + #define section(name) __attribute__((section("__TEXT,__" #name))) +#else + #define section(name) __attribute__((section("." #name "#"))) #endif -/* ifdef LIBCO_C */ +/* if defined(LIBCO_C) */ #endif From 91104e6ab6b63cf292d237055d382d6fbaecb357 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Sun, 23 Jul 2017 19:18:16 +1000 Subject: [PATCH 17/43] Update to v103r19 release. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit byuu says: Changelog: - tomoko: Application::onMain assigned at end of Program::Program() [Screwtape]¹ - libco: add `#define _XOPEN_SOURCE 500` to fix compilation of sjlj.c [Screwtape] - ruby/audio/openal: fixed device driver string list enumeration - ruby/audio/wasapi: changing device re-initializes the driver now - ruby/audio/wasapi: probably a pointless change, but don't fill the buffer beyond the queue size with silence - ruby/video/xvideo: renamed from ruby/video/xv - ruby/video/xvideo: check to see if `XV_AUTOPAINT_COLORKEY` exists before setting it [SuperMikeMan] - ruby/video/xvideo: align buffer sizes to be evenly divisible by four [SuperMikeMan] - ruby/video/xvideo: fail nicely without crashing (hopefully) - ruby/video/xvideo: add support for YV12 and I420 12-bit planar YUV formats² ¹: prevents crashes when drivers fail to initialize from running the main loop that polls input drivers before the input driver is initialized (or fails to initialize itself.) Some drivers still don't block their main functions when initialization fails, so they will still crash, but I'll work to fix them. ²: this was a **major** pain in the ass, heh. You only get one chroma sample for every four luma samples, so the color reproduction is even worse than UYVY and YUYV (which is two to four chroma to luma.) Further, the planar format took forever to figure out. Apparently it doesn't care what portion of the image you specify in XvShmPutImage, it expects you to use the buffer dimensions to locate the U and V portions of the data. This is probably the most thorough X-Video driver in existence now. Notes: - forgot to rename the configuration settings dialog window title to just "Settings" --- sjlj.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sjlj.c b/sjlj.c index dfa0aa45..f62b000e 100644 --- a/sjlj.c +++ b/sjlj.c @@ -12,6 +12,7 @@ #define LIBCO_C #include "libco.h" +#define _XOPEN_SOURCE 500 #include #include #include From 9a271f5452ecb47fa8d5bb74dd944cdb87a1c082 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Mon, 24 Jul 2017 15:23:40 +1000 Subject: [PATCH 18/43] Update to v103r20 release. byuu says: Changelog: - ruby/audio/xaudio2: ported to new ruby API - ruby/video/cgl: ported to new ruby API (untested, won't compile) - ruby/video/directdraw: ported to new ruby API - ruby/video/gdi: ported to new ruby API - ruby/video/glx: ported to new ruby API - ruby/video/wgl: ported to new ruby API - ruby/video/opengl: code cleanups The macOS CGL driver is sure to have compilation errors. If someone will post the compilation error log, I can hopefully fix it in one or two iterations of WIPs. I am unable to test the Xorg GLX driver, because my FreeBSD desktop video card drivers do not support OpenGL 3.2. If the driver doesn't work, I'm going to need help tracking down what broke from the older releases. The real fun is still yet to come ... all the Linux-only drivers, where I don't have a single Linux machine to test with. Todo: - libco/fiber - libco/ucontext (I should really just delete this) - tomoko: hide main UI window when in exclusive fullscreen mode --- amd64.c | 6 ------ arm.c | 6 ------ fiber.c | 6 ------ libco.c | 5 ----- ppc.c | 6 ------ sjlj.c | 7 +------ ucontext.c | 6 ------ x86.c | 6 ------ 8 files changed, 1 insertion(+), 47 deletions(-) diff --git a/amd64.c b/amd64.c index 235708ab..15468090 100644 --- a/amd64.c +++ b/amd64.c @@ -1,9 +1,3 @@ -/* - libco.amd64 (2016-09-14) - author: byuu - license: public domain -*/ - #define LIBCO_C #include "libco.h" #include "settings.h" diff --git a/arm.c b/arm.c index 25f0b16c..313f1134 100644 --- a/arm.c +++ b/arm.c @@ -1,9 +1,3 @@ -/* - libco.arm (2016-09-14) - author: byuu - license: public domain -*/ - #define LIBCO_C #include "libco.h" #include "settings.h" diff --git a/fiber.c b/fiber.c index f2c5b726..38a293df 100644 --- a/fiber.c +++ b/fiber.c @@ -1,9 +1,3 @@ -/* - libco.win (2008-01-28) - authors: Nach, byuu - license: public domain -*/ - #define LIBCO_C #include "libco.h" diff --git a/libco.c b/libco.c index 13eb2379..77873fa3 100644 --- a/libco.c +++ b/libco.c @@ -1,8 +1,3 @@ -/* - libco - license: public domain -*/ - #if defined(__clang__) #pragma clang diagnostic ignored "-Wparentheses" #endif diff --git a/ppc.c b/ppc.c index efec3aa8..6d79b44f 100644 --- a/ppc.c +++ b/ppc.c @@ -1,9 +1,3 @@ -/* - libco.ppc (2016-09-14) - author: blargg - license: public domain -*/ - #define LIBCO_C #include "libco.h" #include "settings.h" diff --git a/sjlj.c b/sjlj.c index f62b000e..4d5017f5 100644 --- a/sjlj.c +++ b/sjlj.c @@ -1,9 +1,3 @@ -/* - libco.sjlj (2008-01-28) - author: Nach - license: public domain -*/ - /* note this was designed for UNIX systems. Based on ideas expressed in a paper by Ralf Engelschall. for SJLJ on other systems, one would want to rewrite springboard() and co_create() and hack the jmb_buf stack pointer. @@ -11,6 +5,7 @@ #define LIBCO_C #include "libco.h" +#include "settings.h" #define _XOPEN_SOURCE 500 #include diff --git a/ucontext.c b/ucontext.c index 72ea8719..f4527bfb 100644 --- a/ucontext.c +++ b/ucontext.c @@ -1,9 +1,3 @@ -/* - libco.ucontext (2008-01-28) - author: Nach - license: public domain -*/ - /* WARNING: the overhead of POSIX ucontext is very high, assembly versions of libco or libco_sjlj should be much faster diff --git a/x86.c b/x86.c index def3ac1c..b5c38216 100644 --- a/x86.c +++ b/x86.c @@ -1,9 +1,3 @@ -/* - libco.x86 (2016-09-14) - author: byuu - license: public domain -*/ - #define LIBCO_C #include "libco.h" #include "settings.h" From 0382100a97bfc27b36d920202e9e3decfdd7f21c Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Wed, 26 Jul 2017 22:42:06 +1000 Subject: [PATCH 19/43] Update to v103r21 release. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit byuu says: Changelog: - gb: added TAMA emulation [thanks to endrift for the initial notes] - gb: save RTC memory to disk (MBC3 doesn't write to said memory yet; TAMA doesn't emulate it yet) - gb: expect MMM01 boot loader to be at end of ROM instead of start - gb: store MBC2 save RAM as 256-bytes (512x4-bit) instead of 512-bytes (with padding) - gb: major cleanups to every cartridge mapper; moved to Mapper class instead of MMIO class - gb: don't serialize all mapper states with every save state; only serialize the active mapper - gb: serialize RAM even if a battery isn't present¹ - gb/cartridge: removed unnecessary code; refactored other code to eliminate duplication of functions - icarus: improve GB(C) heuristics generation to not include filenames for cartridges without battery backup - icarus: remove incorrect rearrangement of MMM01 ROM data - md/vdp: fix CRAM reads -- fixes Sonic Spinball colors [hex\_usr] - tomoko: hide the main higan window when entering fullscreen exclusive mode; helps with multi-monitor setups - tomoko: destroy ruby drivers before calling Application::quit() [Screwtape] - libco: add settings.h and defines to fiber, ucontext [Screwtape] ¹: this is one of those crystal clear indications that nobody's actually playing the higan DMG/CGB cores, or at least not with save states. This was a major mistake. Note: I can't find any official documentation that `GL_ALPHA_TEST` was removed from OpenGL 3.2. Since it's not hurting anything except showing some warnings in debug mode, I'm just going to leave it there for now. --- fiber.c | 1 + sjlj.c | 1 + ucontext.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/fiber.c b/fiber.c index 38a293df..50af4a72 100644 --- a/fiber.c +++ b/fiber.c @@ -1,5 +1,6 @@ #define LIBCO_C #include "libco.h" +#include "settings.h" #define WINVER 0x0400 #define _WIN32_WINNT 0x0400 diff --git a/sjlj.c b/sjlj.c index 4d5017f5..1d0cb59a 100644 --- a/sjlj.c +++ b/sjlj.c @@ -7,6 +7,7 @@ #include "libco.h" #include "settings.h" +#define _BSD_SOURCE #define _XOPEN_SOURCE 500 #include #include diff --git a/ucontext.c b/ucontext.c index f4527bfb..9ba47c88 100644 --- a/ucontext.c +++ b/ucontext.c @@ -12,8 +12,10 @@ #define LIBCO_C #include "libco.h" +#include "settings.h" #define _BSD_SOURCE +#define _XOPEN_SOURCE 500 #include #include From 57826a7ef1becc964c7be5f9e7eeb07469be5049 Mon Sep 17 00:00:00 2001 From: Talarubi Date: Tue, 24 Oct 2017 21:50:54 -0400 Subject: [PATCH 20/43] Update version and license Added LICENSE.txt and GPLv3.txt. Also updated libco documentation. After discussion with byuu, libco gets a more specific ISC license to match nall, ruby and hiro. higan, as clarified in LICENSE.txt, continues to be GPL version 3 only (no "or later" clause). --- doc/style.css | 8 ++++++++ doc/usage.html | 34 +++++++++++++++++++++++++--------- libco.h | 2 +- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/doc/style.css b/doc/style.css index 5181afde..da4a56dd 100644 --- a/doc/style.css +++ b/doc/style.css @@ -3,6 +3,14 @@ body { color: #fff; } +a { + color: #aaf; +} + +a:visited { + color: #faf; +} + code { background: #444; } diff --git a/doc/usage.html b/doc/usage.html index 3f0d81cc..efb34ec8 100644 --- a/doc/usage.html +++ b/doc/usage.html @@ -6,17 +6,32 @@ License:

    -libco is released to the public domain. +libco is released under the ISC license.
    +
    +Copyright © 2006-2017 byuu
    +
    +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies.
    +
    +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE.
    Contact:

    -At present, you may contact me at setsunakun0 at hotmail dot com.
    +At present, you may contact me as byuu at +https://board.byuu.org.
    I am interested in knowing of any projects that make use of this library, though this is only a courtesy.
    Foreword:

    -libco is a cross-platform, public domain implementation of +libco is a cross-platform, permissively licensed implementation of cooperative-multithreading; a feature that is sorely lacking from the ISO C/C++ standard.
    The library is designed for maximum speed and portability, and @@ -35,12 +50,13 @@ platforms. Porting:

    This document is included as a reference for porting libco. Please submit any ports you create to me, so that libco can become more useful. Please note that -since libco is public domain, you must submit your code as a work of the -public domain in order for it to be included in the official distribution. -Full credit will be given in the source code of the official release. Please -do not bother submitting code to me under any other license -- including GPL, -LGPL, BSD or CC -- I am not interested in creating a library with multiple -different licenses depending on which targets are used. +since libco is ISC, you must submit your code as a work of the public domain, +or under the same license, in order for it to be included in the official +distribution. Full credit will be given in the source code of the official +release. Please do not bother submitting code to me under any other +license—including GPL, LGPL, BSD or CC—I am not interested in +creating a library with multiple different licenses depending on which targets +are used.
    Synopsis:

    diff --git a/libco.h b/libco.h index 0ea47789..22a80f43 100644 --- a/libco.h +++ b/libco.h @@ -1,7 +1,7 @@ /* libco v18.01 (2017-01-22) author: byuu - license: public domain + license: ISC */ #ifndef LIBCO_H From 40524fef8af98b04e029ab192b34bb65d80981a4 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Tue, 7 Nov 2017 09:05:54 +1100 Subject: [PATCH 21/43] Update to v105r1 release. byuu says: Changelog: - higan: readded support for soft-reset to Famicom, Super Famicom, Mega Drive cores (work in progress) - handhelds lack soft reset obviously - the PC Engine also lacks a physical reset button - the Master System's reset button acts like a gamepad button, so can't show up in the menu - Mega Drive: power cycle wasn't initializing CPU (M68K) or APU (Z80) RAM - Super Famicom: fix SPC700 opcode 0x3b regression; fixes Majuu Ou [Jonas Quinn] - Super Famicom: fix SharpRTC save regression; fixes Dai Kaijuu Monogatari II's real-time clock [Talarubi] - Super Famicom: fix EpsonRTC save regression; fixes Tengai Makyou Zero's real-time clock [Talarubi] - Super Famicom: removed `*::init()` functions, as they were never used - Super Famicom: removed all but two `*::load()` functions, as they were not used - higan: added option to auto-save backup RAM every five seconds (enabled by default) - this is in case the emulator crashes, or there's a power outage; turn it off under advanced settings if you want - libco: updated license from public domain to ISC, for consistency with nall, ruby, hiro - nall: Linux compiler defaults to g++; override with g++-version if g++ is <= 4.8 - FreeBSD compiler default is going to remain g++49 until my dev box OS ships with g++ >= 4.9 Errata: I have weird RAM initialization constants, thanks to hex_usr and onethirdxcubed for both finding this: http://wiki.nesdev.com/w/index.php?title=CPU_power_up_state&diff=11711&oldid=11184 I'll remove this in the next WIP. --- doc/style.css | 12 ++++-------- doc/usage.html | 53 ++++++++++++++------------------------------------ libco.h | 2 +- 3 files changed, 20 insertions(+), 47 deletions(-) diff --git a/doc/style.css b/doc/style.css index da4a56dd..ab070256 100644 --- a/doc/style.css +++ b/doc/style.css @@ -3,14 +3,10 @@ body { color: #fff; } -a { - color: #aaf; -} - -a:visited { - color: #faf; -} - code { background: #444; } + +a { + color: #aaf; +} diff --git a/doc/usage.html b/doc/usage.html index efb34ec8..38576dc5 100644 --- a/doc/usage.html +++ b/doc/usage.html @@ -6,41 +6,19 @@ License:

    -libco is released under the ISC license.
    -
    -Copyright © 2006-2017 byuu
    -
    -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies.
    -
    -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH -REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY -AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, -INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM -LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR -OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THIS SOFTWARE. -
    - -Contact:

    -At present, you may contact me as byuu at -https://board.byuu.org.
    -I am interested in knowing of any projects that make use of this library, -though this is only a courtesy. +libco is released under the ISC license.
    Foreword:

    libco is a cross-platform, permissively licensed implementation of -cooperative-multithreading; a feature that is sorely lacking -from the ISO C/C++ standard.
    -The library is designed for maximum speed and portability, and -not for safety or features. If safety or extra functionality is desired, -a wrapper API can easily be written to encapsulate all library functions.
    -Behavior of executing operations that are listed as not permitted -below result in undefined behavior. They may work anyway, they -may cause undesired / unknown behavior, or they may crash the -program entirely.
    +cooperative-multithreading; a feature that is sorely lacking from the ISO C/C++ +standard.
    +The library is designed for maximum speed and portability, and not for safety or +features. If safety or extra functionality is desired, a wrapper API can easily +be written to encapsulate all library functions.
    +Behavior of executing operations that are listed as not permitted below result +in undefined behavior. They may work anyway, they may cause undesired / unknown +behavior, or they may crash the program entirely.
    The goal of this library was to simplify the base API as much as possible, implementing only that which cannot be implemented using pure C. Additional functionality after this would only complicate ports of this library to new @@ -50,13 +28,12 @@ platforms. Porting:

    This document is included as a reference for porting libco. Please submit any ports you create to me, so that libco can become more useful. Please note that -since libco is ISC, you must submit your code as a work of the public domain, -or under the same license, in order for it to be included in the official -distribution. Full credit will be given in the source code of the official -release. Please do not bother submitting code to me under any other -license—including GPL, LGPL, BSD or CC—I am not interested in -creating a library with multiple different licenses depending on which targets -are used. +since libco is permissively licensed, you must submit your code as a work of the +public domain in order for it to be included in the official distribution. +Full credit will be given in the source code of the official release. Please +do not bother submitting code to me under any other license -- including GPL, +LGPL, BSD or CC -- I am not interested in creating a library with multiple +different licenses depending on which targets are used.
    Synopsis:

    diff --git a/libco.h b/libco.h index 22a80f43..c8452154 100644 --- a/libco.h +++ b/libco.h @@ -1,5 +1,5 @@ /* - libco v18.01 (2017-01-22) + libco v18.02 (2017-11-06) author: byuu license: ISC */ From 80cc16d62dcde462c0c893cebe515733aa09b588 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Wed, 27 Feb 2019 23:02:30 +1100 Subject: [PATCH 22/43] Update to v106r107 release. [This WIP was made after byuu's forum closed, so byuu only gave a one-line description -Ed.] byuu says: This splits ColecoVision, SG-1000/SC-3000 away from Master System/Game Gear. Was getting too crowded in there. --- amd64.c | 18 ++++++++++++++ arm.c | 19 ++++++++++++++- doc/usage.html | 8 +++++++ fiber.c | 5 ++++ libco.h | 3 ++- ppc.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++ sjlj.c | 44 +++++++++++++++++++++++++++++++++- ucontext.c | 17 ++++++++++++++ x86.c | 18 ++++++++++++++ 9 files changed, 193 insertions(+), 3 deletions(-) diff --git a/amd64.c b/amd64.c index 15468090..ab62bd92 100644 --- a/amd64.c +++ b/amd64.c @@ -120,6 +120,24 @@ cothread_t co_active() { return co_active_handle; } +cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) { + cothread_t handle; + if(!co_swap) { + co_init(); + co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; + } + if(!co_active_handle) co_active_handle = &co_active_buffer; + + if(handle = (cothread_t)memory) { + long long *p = (long long*)((char*)handle + size); /* seek to top of stack */ + *--p = (long long)crash; /* crash if entrypoint returns */ + *--p = (long long)entrypoint; /* start of function */ + *(long long*)handle = (long long)p; /* stack pointer */ + } + + return handle; +} + cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { cothread_t handle; if(!co_swap) { diff --git a/arm.c b/arm.c index 313f1134..20c95310 100644 --- a/arm.c +++ b/arm.c @@ -40,8 +40,25 @@ cothread_t co_active() { return co_active_handle; } +cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) { + unsigned long* handle; + if(!co_swap) { + co_init(); + co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; + } + if(!co_active_handle) co_active_handle = &co_active_buffer; + + if(handle = (unsigned long*)memory) { + unsigned long* p = (unsigned long*)((unsigned char*)handle + size); + handle[8] = (unsigned long)p; + handle[9] = (unsigned long)entrypoint; + } + + return handle; +} + cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { - unsigned long* handle = 0; + unsigned long* handle; if(!co_swap) { co_init(); co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; diff --git a/doc/usage.html b/doc/usage.html index 38576dc5..994072f9 100644 --- a/doc/usage.html +++ b/doc/usage.html @@ -62,6 +62,14 @@ Return handle to current cothread. Always returns a valid handle, even when called from the main program thread.
    +cothread_t co_derive(void* memory, unsigned int heapsize, void (*coentry)(void));

    +Initializes new cothread.
    +This function is identical to co_create, only it attempts to use the provided +memory instead of allocating new memory on the heap. Please note that certain +implementations (currently only Windows Fibers) cannot be created using existing +memory, and as such, this function will fail. +
    + cothread_t co_create(unsigned int heapsize, void (*coentry)(void));

    Create new cothread.
    Heapsize is the amount of memory allocated for the cothread stack, specified diff --git a/fiber.c b/fiber.c index 50af4a72..bdf4dd4e 100644 --- a/fiber.c +++ b/fiber.c @@ -24,6 +24,11 @@ cothread_t co_active() { return co_active_; } +cothread_t co_derive(void* memory, unsigned int heapsize, void (*coentry)(void)) { + //Windows fibers do not allow users to supply their own memory + return (cothread_t)0; +} + cothread_t co_create(unsigned int heapsize, void (*coentry)(void)) { if(!co_active_) { ConvertThreadToFiber(0); diff --git a/libco.h b/libco.h index c8452154..b7f60852 100644 --- a/libco.h +++ b/libco.h @@ -1,5 +1,5 @@ /* - libco v18.02 (2017-11-06) + libco v19 (2019-02-18) author: byuu license: ISC */ @@ -14,6 +14,7 @@ extern "C" { typedef void* cothread_t; cothread_t co_active(); +cothread_t co_derive(void*, unsigned int, void (*)(void)); cothread_t co_create(unsigned int, void (*)(void)); void co_delete(cothread_t); void co_switch(cothread_t); diff --git a/ppc.c b/ppc.c index 6d79b44f..f071fab2 100644 --- a/ppc.c +++ b/ppc.c @@ -258,6 +258,70 @@ static const uint32_t libco_ppc_code[1024] = { #define CO_SWAP_ASM(x, y) ((void (*)(cothread_t, cothread_t))(uintptr_t)libco_ppc_code)(x, y) #endif +static uint32_t* co_derive_(void* memory, unsigned size, uintptr_t entry) { + (void)entry; + + uint32_t* t = (uint32_t*)memory; + + #if LIBCO_PPCDESC + if(t) { + memcpy(t, (void*)entry, sizeof(void*) * 3); /* copy entry's descriptor */ + *(const void**)t = libco_ppc_code; /* set function pointer to swap routine */ + } + #endif + + return t; +} + +cothread_t co_derive(void* memory, unsigned int size, void (*entry_)(void)) { + uintptr_t entry = (uintptr_t)entry_; + uint32_t* t = 0; + + /* be sure main thread was successfully allocated */ + if(co_active()) { + t = co_derive_(memory, size, entry); + } + + if(t) { + uintptr_t sp; + int shift; + + /* save current registers into new thread, so that any special ones will have proper values when thread is begun */ + CO_SWAP_ASM(t, t); + + #if LIBCO_PPCDESC + entry = (uintptr_t)*(void**)entry; /* get real address */ + #endif + + /* put stack near end of block, and align */ + sp = (uintptr_t)t + size - above_stack; + sp -= sp % stack_align; + + /* on PPC32, we save and restore GPRs as 32 bits. for PPC64, we + save and restore them as 64 bits, regardless of the size the ABI + uses. so, we manually write pointers at the proper size. we always + save and restore at the same address, and since PPC is big-endian, + we must put the low byte first on PPC32. */ + + /* if uintptr_t is 32 bits, >>32 is undefined behavior, + so we do two shifts and don't have to care how many bits uintptr_t is. */ + #if LIBCO_PPC64 + shift = 16; + #else + shift = 0; + #endif + + /* set up so entry will be called on next swap */ + t[ 8] = (uint32_t)(entry >> shift >> shift); + t[ 9] = (uint32_t)entry; + + t[10] = (uint32_t)(sp >> shift >> shift); + t[11] = (uint32_t)sp; + } + + return t; +} + static uint32_t* co_create_(unsigned size, uintptr_t entry) { (void)entry; diff --git a/sjlj.c b/sjlj.c index 1d0cb59a..d99572a3 100644 --- a/sjlj.c +++ b/sjlj.c @@ -38,10 +38,52 @@ cothread_t co_active() { return (cothread_t)co_running; } +cothread_t co_derive(void* memory, unsigned int size, void (*coentry)(void)) { + if(!co_running) co_running = &co_primary; + + cothread_struct* thread = (cothread_struct*)memory; + memory = (unsigned char*)memory + sizeof(cothread_struct); + size -= sizeof(cothread_struct); + if(thread) { + struct sigaction handler; + struct sigaction old_handler; + + stack_t stack; + stack_t old_stack; + + thread->coentry = thread->stack = 0; + + stack.ss_flags = 0; + stack.ss_size = size; + thread->stack = stack.ss_sp = memory; + if(stack.ss_sp && !sigaltstack(&stack, &old_stack)) { + handler.sa_handler = springboard; + handler.sa_flags = SA_ONSTACK; + sigemptyset(&handler.sa_mask); + creating = thread; + + if(!sigaction(SIGUSR1, &handler, &old_handler)) { + if(!raise(SIGUSR1)) { + thread->coentry = coentry; + } + sigaltstack(&old_stack, 0); + sigaction(SIGUSR1, &old_handler, 0); + } + } + + if(thread->coentry != coentry) { + co_delete(thread); + thread = 0; + } + } + + return (cothread_t)thread; +} + cothread_t co_create(unsigned int size, void (*coentry)(void)) { if(!co_running) co_running = &co_primary; - cothread_struct *thread = (cothread_struct*)malloc(sizeof(cothread_struct)); + cothread_struct* thread = (cothread_struct*)malloc(sizeof(cothread_struct)); if(thread) { struct sigaction handler; struct sigaction old_handler; diff --git a/ucontext.c b/ucontext.c index 9ba47c88..edf513d4 100644 --- a/ucontext.c +++ b/ucontext.c @@ -31,6 +31,23 @@ cothread_t co_active() { return (cothread_t)co_running; } +cothread_t co_derive(void* memory, unsigned int heapsize, void (*coentry)(void)) { + if(!co_running) co_running = &co_primary; + ucontext_t* thread = (ucontext_t*)memory; + memory = (unsigned char*)memory + sizeof(ucontext_t); + heapsize -= sizeof(ucontext_t); + if(thread) { + if((!getcontext(thread) && !(thread->uc_stack.ss_sp = 0)) && (thread->uc_stack.ss_sp = memory)) { + thread->uc_link = co_running; + thread->uc_stack.ss_size = heapsize; + makecontext(thread, coentry, 0); + } else { + thread = 0; + } + } + return (cothread_t)thread; +} + cothread_t co_create(unsigned int heapsize, void (*coentry)(void)) { if(!co_running) co_running = &co_primary; ucontext_t* thread = (ucontext_t*)malloc(sizeof(ucontext_t)); diff --git a/x86.c b/x86.c index b5c38216..d79306c6 100644 --- a/x86.c +++ b/x86.c @@ -74,6 +74,24 @@ cothread_t co_active() { return co_active_handle; } +cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) { + cothread_t handle; + if(!co_swap) { + co_init(); + co_swap = (void (fastcall*)(cothread_t, cothread_t))co_swap_function; + } + if(!co_active_handle) co_active_handle = &co_active_buffer; + + if(handle = (cothread_t)memory) { + long *p = (long*)((char*)handle + size); /* seek to top of stack */ + *--p = (long)crash; /* crash if entrypoint returns */ + *--p = (long)entrypoint; /* start of function */ + *(long*)handle = (long)p; /* stack pointer */ + } + + return handle; +} + cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { cothread_t handle; if(!co_swap) { From 8a751a794687afa334f438cbb58fd19744216b22 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Thu, 27 Jun 2019 22:13:03 +1000 Subject: [PATCH 23/43] Update to v106r170 release. byuu says: I finally pass blargg's dmg-sound and cgb-sound tests, but at quite a cost. Reads and writes can't happen on an exact T-cycle (clock cycle) point within an M-cycle (opcode cycle) for the DMG. Writes to trigger take effect two clocks after writes to wave RAM, for instance. Probably going to be a lot more of this in low-level PPU emulation, so I'm biting the bullet and slowly converting the Game Boy bus handler to this new format, which I'll use as a test bench for doing this later to other systems with, since Game Boy performance isn't as critical (it's a drop from 220fps to 200fps to have to poll the bus four times per memory access and synchronize the CPU four times as often, so a lot less bad than I'd feared at least.) --- aarch64.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ libco.c | 2 + 2 files changed, 118 insertions(+) create mode 100644 aarch64.c diff --git a/aarch64.c b/aarch64.c new file mode 100644 index 00000000..3b00c526 --- /dev/null +++ b/aarch64.c @@ -0,0 +1,116 @@ +#define LIBCO_C +#include "libco.h" +#include "settings.h" + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static thread_local unsigned long co_active_buffer[64]; +static thread_local cothread_t co_active_handle = 0; +static void (*co_swap)(cothread_t, cothread_t) = 0; + +#ifdef LIBCO_MPROTECT + alignas(4096) +#else + section(text) +#endif +static const uint32_t co_swap_functin[1024] = { + 0xa9002428, /* stp x8,x9,[x1] */ + 0xa9012c2a, /* stp x10,x11,[x1,#16] */ + 0xa902342c, /* stp x12,x13,[x1,#32] */ + 0xa9033c2e, /* stp x14,x15,[x1,#48] */ + 0xf9002433, /* str x19,[x1,#72] */ + 0xa9055434, /* stp x20,x21,[x1,#80] */ + 0xa9065c36, /* stp x22,x23,[x1,#96] */ + 0xa9076438, /* stp x24,x25,[x1,#112] */ + 0xa9086c3a, /* stp x26,x27,[x1,#128] */ + 0xa909743c, /* stp x28,x29,[x1,#144] */ + 0x910003f0, /* mov x16,sp */ + 0xa90a7830, /* stp x16,x30,[x1,#160] */ + + 0xa9402408, /* ldp x8,x9,[x0] */ + 0xa9412c0a, /* ldp x10,x11,[x0,#16] */ + 0xa942340c, /* ldp x12,x13,[x0,#32] */ + 0xa9433c0e, /* ldp x14,x15,[x0,#48] */ + 0xf9402413, /* ldr x19,[x0,#72] */ + 0xa9455414, /* ldp x20,x21,[x0,#80] */ + 0xa9465c16, /* ldp x22,x23,[x0,#96] */ + 0xa9476418, /* ldp x24,x25,[x0,#112] */ + 0xa9486c1a, /* ldp x26,x27,[x0,#128] */ + 0xa949741c, /* ldp x28,x29,[x0,#144] */ + 0xa94a4410, /* ldp x16,x17,[x0,#160] */ + 0x9100021f, /* mov sp,x16 */ + 0xd61f0220, /* br x17 */ +}; + +static void co_init() { + #ifdef LIBCO_MPROTECT + unsigned long addr = (unsigned long)co_swap_function; + unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE)); + unsigned long size = (addr - base) + sizeof co_swap_function; + mprotect((void*)base, size, PROT_READ | PROT_EXEC); + #endif +} + +cothread_t co_active() { + if(!co_active_handle) co_active_handle = &co_active_buffer; + return co_active_handle; +} + +cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) { + unsigned long* handle; + if(!co_swap) { + co_init(); + co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; + } + if(!co_active_handle) co_active_handle = &co_active_buffer; + + if(handle = (unsigned long*)memory) { + unsigned long* p = (unsigned long*)((unsigned char*)handle + size); + handle[19] = (unsigned long)p; /* x29 (frame pointer) */ + handle[20] = (unsigned long)p; /* x30 (stack pointer) */ + handle[21] = (unsigned long)entrypoint; /* x31 (link register) */ + } + + return handle; +} + +cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { + unsigned long* handle; + if(!co_swap) { + co_init(); + co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; + } + if(!co_active_handle) co_active_handle = &co_active_buffer; + size += 256; + size &= ~15; + + if(handle = (unsigned long*)malloc(size)) { + unsigned long* p = (unsigned long*)((unsigned char*)handle + size); + handle[19] = (unsigned long)p; /* x29 (frame pointer) */ + handle[20] = (unsigned long)p; /* x30 (stack pointer) */ + handle[21] = (unsigned long)entrypoint; /* x31 (link register) */ + } + + return handle; +} + +void co_delete(cothread_t handle) { + free(handle); +} + +void co_switch(cothread_t handle) { + cothread_t co_previous_handle = co_active_handle; + co_swap(co_active_handle = handle, co_previous_handle); +} + +#ifdef __cplusplus +} +#endif diff --git a/libco.c b/libco.c index 77873fa3..6f446971 100644 --- a/libco.c +++ b/libco.c @@ -9,6 +9,8 @@ #include "amd64.c" #elif defined(__arm__) #include "arm.c" + #elif defined(__aarch64__) + #include "aarch64.c" #elif defined(_ARCH_PPC) #include "ppc.c" #elif defined(_WIN32) From 4dae9affda754d00cd879b61bc7dd4af292d144b Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Thu, 4 Jul 2019 12:17:01 +1000 Subject: [PATCH 24/43] Update to v106r175 release. byuu says: - ruby: fullscreen support for Windows OpenGL 3.2, DirectDraw 7.0, GDI drivers - ruby: output(width, height) support for all drivers on all platforms - hiro: improve focus capture for Canvas and Viewport widgets - hiro: added two hotfixes for the macOS Cocoa target [Sintendo] - higan, bsnes: focus the viewport after leaving fullscreen exclusive mode - freebsd: moved from GCC 8.2.0 to Clang 6.0.1 - higan: added video display emulation option to Famicom and Mega Drive cores The reason I moved to Clang was because GCC keeps deadlocking my FreeBSD system. I don't know if it's GCC's fault, or suddenly running 32 copies of any high-CPU usage program, heh. But at any rate, it's worth a try. The performance is the same, but compilation takes a tiny bit longer with Clang. --- libco.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libco.c b/libco.c index 6f446971..b0f018e9 100644 --- a/libco.c +++ b/libco.c @@ -1,5 +1,9 @@ #if defined(__clang__) #pragma clang diagnostic ignored "-Wparentheses" + + //placing code in section(text) does not mark it executable with Clang. + #undef LIBCO_MPROTECT + #define LIBCO_MPROTECT #endif #if defined(__clang__) || defined(__GNUC__) From d6ffae9c1b98e4ee5ac278f412c7b9344c804bb0 Mon Sep 17 00:00:00 2001 From: peterlemon Date: Tue, 16 Jul 2019 20:12:03 +0100 Subject: [PATCH 25/43] Fix libco aarch64 typo --- aarch64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aarch64.c b/aarch64.c index 3b00c526..e153cd46 100644 --- a/aarch64.c +++ b/aarch64.c @@ -21,7 +21,7 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; #else section(text) #endif -static const uint32_t co_swap_functin[1024] = { +static const uint32_t co_swap_function[1024] = { 0xa9002428, /* stp x8,x9,[x1] */ 0xa9012c2a, /* stp x10,x11,[x1,#16] */ 0xa902342c, /* stp x12,x13,[x1,#32] */ From 0d35f2277b5c7e1d96634b10f795463401c287fd Mon Sep 17 00:00:00 2001 From: byuu <2107894+byuu@users.noreply.github.com> Date: Wed, 17 Jul 2019 22:23:15 +0900 Subject: [PATCH 26/43] Update license copyright dates. --- aarch64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aarch64.c b/aarch64.c index e153cd46..3b00c526 100644 --- a/aarch64.c +++ b/aarch64.c @@ -21,7 +21,7 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; #else section(text) #endif -static const uint32_t co_swap_function[1024] = { +static const uint32_t co_swap_functin[1024] = { 0xa9002428, /* stp x8,x9,[x1] */ 0xa9012c2a, /* stp x10,x11,[x1,#16] */ 0xa902342c, /* stp x12,x13,[x1,#32] */ From 5ff2383af7de1c7d7963732ccb9414c92add4e84 Mon Sep 17 00:00:00 2001 From: byuu <2107894+byuu@users.noreply.github.com> Date: Fri, 19 Jul 2019 00:39:19 +0900 Subject: [PATCH 27/43] v106.181 * SFC: fixed PPU WIP regression with sprites in Donkey Kong Country 2 * libco: fix ELFv2 PowerPC compilation * hiro: fix Cocoa compilation * other: miscellaneous code cleanups --- aarch64.c | 2 +- libco.c | 2 +- ppc.c | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/aarch64.c b/aarch64.c index 3b00c526..e153cd46 100644 --- a/aarch64.c +++ b/aarch64.c @@ -21,7 +21,7 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; #else section(text) #endif -static const uint32_t co_swap_functin[1024] = { +static const uint32_t co_swap_function[1024] = { 0xa9002428, /* stp x8,x9,[x1] */ 0xa9012c2a, /* stp x10,x11,[x1,#16] */ 0xa902342c, /* stp x12,x13,[x1,#32] */ diff --git a/libco.c b/libco.c index b0f018e9..de11fbe9 100644 --- a/libco.c +++ b/libco.c @@ -15,7 +15,7 @@ #include "arm.c" #elif defined(__aarch64__) #include "aarch64.c" - #elif defined(_ARCH_PPC) + #elif defined(_ARCH_PPC) && !defined(__LITTLE_ENDIAN__) #include "ppc.c" #elif defined(_WIN32) #include "fiber.c" diff --git a/ppc.c b/ppc.c index f071fab2..969fcec2 100644 --- a/ppc.c +++ b/ppc.c @@ -1,3 +1,5 @@ +/* ppc64le (ELFv2) is not currently supported */ + #define LIBCO_C #include "libco.h" #include "settings.h" @@ -36,7 +38,7 @@ static thread_local cothread_t co_active_handle = 0; /* whether function calls are indirect through a descriptor, or are directly to function */ #ifndef LIBCO_PPCDESC - #if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || LIBCO_PPC64) + #if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || (LIBCO_PPC64 && (!defined(_CALL_ELF) || _CALL_ELF == 1))) #define LIBCO_PPCDESC 1 #endif #endif From 6d4cddb8183f82370e4aa7cc10a25b164642a661 Mon Sep 17 00:00:00 2001 From: Shawn Anastasio Date: Tue, 23 Jul 2019 15:59:03 -0500 Subject: [PATCH 28/43] Implement ppc64 ELFv2 support in libco The existing ppc implementation in libco only supports the ELFv1 ABI on PowerPC 64 and therefore can't be used on Little Endian systems and Big Endian systems running ELFv2 distros. This commit introduces a new implementation of the libco API for ppc64 elfv2. It has been tested with bsnes and higan. The original ppc implementation is maintained for non-ELFv2 targets. --- libco.c | 4 +- ppc64v2.c | 284 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 287 insertions(+), 1 deletion(-) create mode 100644 ppc64v2.c diff --git a/libco.c b/libco.c index de11fbe9..f5ee5d0a 100644 --- a/libco.c +++ b/libco.c @@ -15,7 +15,9 @@ #include "arm.c" #elif defined(__aarch64__) #include "aarch64.c" - #elif defined(_ARCH_PPC) && !defined(__LITTLE_ENDIAN__) + #elif defined(__powerpc64__) && defined(_CALL_ELF) && (_CALL_ELF == 2) + #include "ppc64v2.c" + #elif defined(_ARCH_PPC) #include "ppc.c" #elif defined(_WIN32) #include "fiber.c" diff --git a/ppc64v2.c b/ppc64v2.c new file mode 100644 index 00000000..763ac70f --- /dev/null +++ b/ppc64v2.c @@ -0,0 +1,284 @@ +/** + * libco implementation for ppc64 elfv2. + * + * Written by Shawn Anastasio. + * Licensed under the ISC license. + */ + +#define LIBCO_C +#include "libco.h" +#include "settings.h" + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct ppc64_context { + // GPRs + uint64_t gprs[32]; + uint64_t lr; + uint64_t ccr; + + // FPRs + uint64_t fprs[32]; + +#ifdef __ALTIVEC__ + // Altivec (VMX) + uint64_t vmx[24 /* 12 non-volatile * 2 */]; + uint32_t vrsave; +#endif +}; + +static thread_local struct ppc64_context *context_running = 0; + +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define ALIGN(ptr, x) ( (void *)( (uintptr_t)(ptr) & ~((x)-1) ) ) + +#define MIN_STACK 0x10000lu +#define MIN_STACK_FRAME 0x20lu +#define STACK_ALIGN 0x10lu + +void swap_context(struct ppc64_context *read, struct ppc64_context *write); +__asm__( + ".text\n" + ".align 4\n" + ".type swap_context @function\n" + "swap_context:\n" + ".cfi_startproc\n" + + // Dump non-volatile and special GPRs + "std 1, 8(4)\n" + "std 2, 16(4)\n" + "std 12, 96(4)\n" + "std 13, 104(4)\n" + "std 14, 112(4)\n" + "std 15, 120(4)\n" + "std 16, 128(4)\n" + "std 17, 136(4)\n" + "std 18, 144(4)\n" + "std 19, 152(4)\n" + "std 20, 160(4)\n" + "std 21, 168(4)\n" + "std 22, 176(4)\n" + "std 23, 184(4)\n" + "std 24, 192(4)\n" + "std 25, 200(4)\n" + "std 26, 208(4)\n" + "std 27, 216(4)\n" + "std 28, 224(4)\n" + "std 29, 232(4)\n" + "std 30, 240(4)\n" + "std 31, 248(4)\n" + + // LR + "mflr 5\n" + "std 5, 256(4)\n" + + // CCR + "mfcr 5\n" + "std 5, 264(4)\n" + + // Dump non-volatile FPRs + "stfd 14, 384(4)\n" + "stfd 15, 392(4)\n" + "stfd 16, 400(4)\n" + "stfd 17, 408(4)\n" + "stfd 18, 416(4)\n" + "stfd 19, 424(4)\n" + "stfd 20, 432(4)\n" + "stfd 21, 440(4)\n" + "stfd 22, 448(4)\n" + "stfd 23, 456(4)\n" + "stfd 24, 464(4)\n" + "stfd 25, 472(4)\n" + "stfd 26, 480(4)\n" + "stfd 27, 488(4)\n" + "stfd 28, 496(4)\n" + "stfd 29, 504(4)\n" + "stfd 30, 512(4)\n" + "stfd 31, 520(4)\n" + +#ifdef __ALTIVEC__ + // Dump non-volatile VMX registers + "li 5, 528\n" + "stvxl 20, 4, 5\n" + "addi 5, 5, 16\n" + "stvxl 21, 4, 5\n" + "addi 5, 5, 16\n" + "stvxl 22, 4, 5\n" + "addi 5, 5, 16\n" + "stvxl 23, 4, 5\n" + "addi 5, 5, 16\n" + "stvxl 24, 4, 5\n" + "addi 5, 5, 16\n" + "stvxl 25, 4, 5\n" + "addi 5, 5, 16\n" + "stvxl 26, 4, 5\n" + "addi 5, 5, 16\n" + "stvxl 27, 4, 5\n" + "addi 5, 5, 16\n" + "stvxl 28, 4, 5\n" + "addi 5, 5, 16\n" + "stvxl 29, 4, 5\n" + "addi 5, 5, 16\n" + "stvxl 30, 4, 5\n" + "addi 5, 5, 16\n" + "stvxl 31, 4, 5\n" + "addi 5, 5, 16\n" + + // VRSAVE + "mfvrsave 5\n" + "stw 5, 736(4)\n" +#endif + + // Restore GPRs + "ld 1, 8(3)\n" + "ld 2, 16(3)\n" + "ld 12, 96(3)\n" + "ld 13, 104(3)\n" + "ld 14, 112(3)\n" + "ld 15, 120(3)\n" + "ld 16, 128(3)\n" + "ld 17, 136(3)\n" + "ld 18, 144(3)\n" + "ld 19, 152(3)\n" + "ld 20, 160(3)\n" + "ld 21, 168(3)\n" + "ld 22, 176(3)\n" + "ld 23, 184(3)\n" + "ld 24, 192(3)\n" + "ld 25, 200(3)\n" + "ld 26, 208(3)\n" + "ld 27, 216(3)\n" + "ld 28, 224(3)\n" + "ld 29, 232(3)\n" + "ld 30, 240(3)\n" + "ld 31, 248(3)\n" + + // Restore LR + "ld 5, 256(3)\n" + "mtlr 5\n" + + // Restore CCR + "ld 5, 264(3)\n" + "mtcr 5\n" + + // Restore FPRs + "lfd 14, 384(3)\n" + "lfd 15, 392(3)\n" + "lfd 16, 400(3)\n" + "lfd 17, 408(3)\n" + "lfd 18, 416(3)\n" + "lfd 19, 424(3)\n" + "lfd 20, 432(3)\n" + "lfd 21, 440(3)\n" + "lfd 22, 448(3)\n" + "lfd 23, 456(3)\n" + "lfd 24, 464(3)\n" + "lfd 25, 472(3)\n" + "lfd 26, 480(3)\n" + "lfd 27, 488(3)\n" + "lfd 28, 496(3)\n" + "lfd 29, 504(3)\n" + "lfd 30, 512(3)\n" + "lfd 31, 520(3)\n" + +#ifdef __ALTIVEC__ + // Restore VMX + "li 5, 528\n" + "lvxl 20, 3, 5\n" + "addi 5, 5, 16\n" + "lvxl 21, 3, 5\n" + "addi 5, 5, 16\n" + "lvxl 22, 3, 5\n" + "addi 5, 5, 16\n" + "lvxl 23, 3, 5\n" + "addi 5, 5, 16\n" + "lvxl 24, 3, 5\n" + "addi 5, 5, 16\n" + "lvxl 25, 3, 5\n" + "addi 5, 5, 16\n" + "lvxl 26, 3, 5\n" + "addi 5, 5, 16\n" + "lvxl 27, 3, 5\n" + "addi 5, 5, 16\n" + "lvxl 28, 3, 5\n" + "addi 5, 5, 16\n" + "lvxl 29, 3, 5\n" + "addi 5, 5, 16\n" + "lvxl 30, 3, 5\n" + "addi 5, 5, 16\n" + "lvxl 31, 3, 5\n" + "addi 5, 5, 16\n" + + // VRSAVE + "lwz 5, 720(3)\n" + "mtvrsave 5\n" +#endif + + // Context restored, branch to LR + "blr\n" + + ".cfi_endproc\n" + ".size swap_context, .-swap_context\n" +); + +cothread_t co_active() { + if (!context_running) + context_running = (struct ppc64_context *) + malloc(MIN_STACK + sizeof(struct ppc64_context)); + return (cothread_t)context_running; +} + +cothread_t co_derive(void *memory, unsigned int size, void (*coentry)(void)) { + uint8_t *sp; + struct ppc64_context *context = (struct ppc64_context *)memory; + + // Save current context into new context to initialize it + swap_context(context, context); + + // Align stack + sp = (uint8_t *)memory + size - STACK_ALIGN; + sp = (uint8_t *)ALIGN(sp, STACK_ALIGN); + + // Write 0 for initial backchain + *(uint64_t *)sp = 0; + + // Create new frame with backchain + sp -= MIN_STACK_FRAME; + *(uint64_t *)sp = (uint64_t)(sp + MIN_STACK_FRAME); + + // Update context with new stack (r1) and entrypoint (LR, r12) + context->lr = (uint64_t)coentry; + context->gprs[12] = (uint64_t)coentry; + context->gprs[1] = (uint64_t)sp; + + return (cothread_t)memory; +} + +cothread_t co_create(unsigned int size, void (*coentry)(void)) { + size_t total = MAX(size, MIN_STACK) + sizeof(struct ppc64_context); + void *memory = malloc(total); + if (!memory) + return (cothread_t)0; + + return co_derive(memory, total, coentry); +} + +void co_delete(cothread_t t) { + free(t); +} + +void co_switch(cothread_t t) { + struct ppc64_context *old = context_running; + context_running = (struct ppc64_context *)t; + swap_context((struct ppc64_context *)t, old); +} + +#ifdef __cplusplus +} +#endif From 07c1beadfbd51819616891e5d239ca44e14f8a67 Mon Sep 17 00:00:00 2001 From: byuu <2107894+byuu@users.noreply.github.com> Date: Wed, 24 Jul 2019 22:05:39 +0900 Subject: [PATCH 29/43] v106.185 --- libco.c | 4 +- ppc64v2.c | 115 +++++++++++++++++++++++++----------------------------- 2 files changed, 56 insertions(+), 63 deletions(-) diff --git a/libco.c b/libco.c index f5ee5d0a..f9c79bf4 100644 --- a/libco.c +++ b/libco.c @@ -15,9 +15,9 @@ #include "arm.c" #elif defined(__aarch64__) #include "aarch64.c" - #elif defined(__powerpc64__) && defined(_CALL_ELF) && (_CALL_ELF == 2) + #elif defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2 #include "ppc64v2.c" - #elif defined(_ARCH_PPC) + #elif defined(_ARCH_PPC) && !defined(__LITTLE_ENDIAN__) #include "ppc.c" #elif defined(_WIN32) #include "fiber.c" diff --git a/ppc64v2.c b/ppc64v2.c index 763ac70f..b1f153d4 100644 --- a/ppc64v2.c +++ b/ppc64v2.c @@ -1,9 +1,4 @@ -/** - * libco implementation for ppc64 elfv2. - * - * Written by Shawn Anastasio. - * Licensed under the ISC license. - */ +/* author: Shawn Anastasio */ #define LIBCO_C #include "libco.h" @@ -11,38 +6,37 @@ #include #include -#include #ifdef __cplusplus extern "C" { #endif struct ppc64_context { - // GPRs + //GPRs uint64_t gprs[32]; uint64_t lr; uint64_t ccr; - // FPRs + //FPRs uint64_t fprs[32]; -#ifdef __ALTIVEC__ - // Altivec (VMX) - uint64_t vmx[24 /* 12 non-volatile * 2 */]; + #ifdef __ALTIVEC__ + //Altivec (VMX) + uint64_t vmx[12 * 2]; uint32_t vrsave; -#endif + #endif }; -static thread_local struct ppc64_context *context_running = 0; +static thread_local struct ppc64_context* co_active_handle = 0; #define MAX(x, y) ((x) > (y) ? (x) : (y)) -#define ALIGN(ptr, x) ( (void *)( (uintptr_t)(ptr) & ~((x)-1) ) ) +#define ALIGN(p, x) ((void*)((uintptr_t)(p) & ~((x) - 1))) -#define MIN_STACK 0x10000lu +#define MIN_STACK 0x10000lu #define MIN_STACK_FRAME 0x20lu #define STACK_ALIGN 0x10lu -void swap_context(struct ppc64_context *read, struct ppc64_context *write); +void swap_context(struct ppc64_context* read, struct ppc64_context* write); __asm__( ".text\n" ".align 4\n" @@ -50,7 +44,7 @@ __asm__( "swap_context:\n" ".cfi_startproc\n" - // Dump non-volatile and special GPRs + //save GPRs "std 1, 8(4)\n" "std 2, 16(4)\n" "std 12, 96(4)\n" @@ -74,15 +68,15 @@ __asm__( "std 30, 240(4)\n" "std 31, 248(4)\n" - // LR + //save LR "mflr 5\n" "std 5, 256(4)\n" - // CCR + //save CCR "mfcr 5\n" "std 5, 264(4)\n" - // Dump non-volatile FPRs + //save FPRs "stfd 14, 384(4)\n" "stfd 15, 392(4)\n" "stfd 16, 400(4)\n" @@ -102,8 +96,8 @@ __asm__( "stfd 30, 512(4)\n" "stfd 31, 520(4)\n" -#ifdef __ALTIVEC__ - // Dump non-volatile VMX registers + #ifdef __ALTIVEC__ + //save VMX "li 5, 528\n" "stvxl 20, 4, 5\n" "addi 5, 5, 16\n" @@ -130,12 +124,12 @@ __asm__( "stvxl 31, 4, 5\n" "addi 5, 5, 16\n" - // VRSAVE + //save VRSAVE "mfvrsave 5\n" "stw 5, 736(4)\n" -#endif + #endif - // Restore GPRs + //restore GPRs "ld 1, 8(3)\n" "ld 2, 16(3)\n" "ld 12, 96(3)\n" @@ -159,15 +153,15 @@ __asm__( "ld 30, 240(3)\n" "ld 31, 248(3)\n" - // Restore LR + //restore LR "ld 5, 256(3)\n" "mtlr 5\n" - // Restore CCR + //restore CCR "ld 5, 264(3)\n" "mtcr 5\n" - // Restore FPRs + //restore FPRs "lfd 14, 384(3)\n" "lfd 15, 392(3)\n" "lfd 16, 400(3)\n" @@ -187,8 +181,8 @@ __asm__( "lfd 30, 512(3)\n" "lfd 31, 520(3)\n" -#ifdef __ALTIVEC__ - // Restore VMX + #ifdef __ALTIVEC__ + //restore VMX "li 5, 528\n" "lvxl 20, 3, 5\n" "addi 5, 5, 16\n" @@ -215,12 +209,12 @@ __asm__( "lvxl 31, 3, 5\n" "addi 5, 5, 16\n" - // VRSAVE + //restore VRSAVE "lwz 5, 720(3)\n" "mtvrsave 5\n" -#endif + #endif - // Context restored, branch to LR + //branch to LR "blr\n" ".cfi_endproc\n" @@ -228,55 +222,54 @@ __asm__( ); cothread_t co_active() { - if (!context_running) - context_running = (struct ppc64_context *) - malloc(MIN_STACK + sizeof(struct ppc64_context)); - return (cothread_t)context_running; + if(!co_active_handle) { + co_active_handle = (struct ppc64_context*)malloc(MIN_STACK + sizeof(struct ppc64_context)); + } + return (cothread_t)co_active_handle; } -cothread_t co_derive(void *memory, unsigned int size, void (*coentry)(void)) { - uint8_t *sp; - struct ppc64_context *context = (struct ppc64_context *)memory; +cothread_t co_derive(void* memory, unsigned int size, void (*coentry)(void)) { + uint8_t* sp; + struct ppc64_context* context = (struct ppc64_context*)memory; - // Save current context into new context to initialize it + //save current context into new context to initialize it swap_context(context, context); - // Align stack - sp = (uint8_t *)memory + size - STACK_ALIGN; - sp = (uint8_t *)ALIGN(sp, STACK_ALIGN); + //align stack + sp = (uint8_t*)memory + size - STACK_ALIGN; + sp = (uint8_t*)ALIGN(sp, STACK_ALIGN); - // Write 0 for initial backchain - *(uint64_t *)sp = 0; + //write 0 for initial backchain + *(uint64_t*)sp = 0; - // Create new frame with backchain + //create new frame with backchain sp -= MIN_STACK_FRAME; - *(uint64_t *)sp = (uint64_t)(sp + MIN_STACK_FRAME); + *(uint64_t*)sp = (uint64_t)(sp + MIN_STACK_FRAME); - // Update context with new stack (r1) and entrypoint (LR, r12) - context->lr = (uint64_t)coentry; + //update context with new stack (r1) and entrypoint (r12, lr) + context->gprs[ 1] = (uint64_t)sp; context->gprs[12] = (uint64_t)coentry; - context->gprs[1] = (uint64_t)sp; + context->lr = (uint64_t)coentry; return (cothread_t)memory; } cothread_t co_create(unsigned int size, void (*coentry)(void)) { size_t total = MAX(size, MIN_STACK) + sizeof(struct ppc64_context); - void *memory = malloc(total); - if (!memory) - return (cothread_t)0; + void* memory = malloc(total); + if(!memory) return (cothread_t)0; return co_derive(memory, total, coentry); } -void co_delete(cothread_t t) { - free(t); +void co_delete(cothread_t handle) { + free(handle); } -void co_switch(cothread_t t) { - struct ppc64_context *old = context_running; - context_running = (struct ppc64_context *)t; - swap_context((struct ppc64_context *)t, old); +void co_switch(cothread_t to) { + struct ppc64_context* from = co_active_handle; + co_active_handle = (struct ppc64_context*)to; + swap_context((struct ppc64_context*)to, from); } #ifdef __cplusplus From 3a1855a80f94a02cf39e2069d0e694d21a5d2418 Mon Sep 17 00:00:00 2001 From: byuu <2107894+byuu@users.noreply.github.com> Date: Mon, 14 Oct 2019 20:31:17 +0900 Subject: [PATCH 30/43] v106.222 Added port filter panel list Renamed ProgramWindow to Program --- amd64.c | 20 ++++++++++---------- libco.h | 2 +- x86.c | 20 ++++++++++---------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/amd64.c b/amd64.c index ab62bd92..664730a1 100644 --- a/amd64.c +++ b/amd64.c @@ -129,10 +129,11 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) if(!co_active_handle) co_active_handle = &co_active_buffer; if(handle = (cothread_t)memory) { - long long *p = (long long*)((char*)handle + size); /* seek to top of stack */ - *--p = (long long)crash; /* crash if entrypoint returns */ - *--p = (long long)entrypoint; /* start of function */ - *(long long*)handle = (long long)p; /* stack pointer */ + unsigned int offset = (size & ~15) - 32; + long long *p = (long long*)((char*)handle + offset); /* seek to top of stack */ + *--p = (long long)crash; /* crash if entrypoint returns */ + *--p = (long long)entrypoint; /* start of function */ + *(long long*)handle = (long long)p; /* stack pointer */ } return handle; @@ -145,14 +146,13 @@ cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; } if(!co_active_handle) co_active_handle = &co_active_buffer; - size += 512; /* allocate additional space for storage */ - size &= ~15; /* align stack to 16-byte boundary */ if(handle = (cothread_t)malloc(size)) { - long long *p = (long long*)((char*)handle + size); /* seek to top of stack */ - *--p = (long long)crash; /* crash if entrypoint returns */ - *--p = (long long)entrypoint; /* start of function */ - *(long long*)handle = (long long)p; /* stack pointer */ + unsigned int offset = (size & ~15) - 32; + long long *p = (long long*)((char*)handle + offset); /* seek to top of stack */ + *--p = (long long)crash; /* crash if entrypoint returns */ + *--p = (long long)entrypoint; /* start of function */ + *(long long*)handle = (long long)p; /* stack pointer */ } return handle; diff --git a/libco.h b/libco.h index b7f60852..a436957c 100644 --- a/libco.h +++ b/libco.h @@ -1,5 +1,5 @@ /* - libco v19 (2019-02-18) + libco v20 (2019-10-14) author: byuu license: ISC */ diff --git a/x86.c b/x86.c index d79306c6..3eb2e21e 100644 --- a/x86.c +++ b/x86.c @@ -83,10 +83,11 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) if(!co_active_handle) co_active_handle = &co_active_buffer; if(handle = (cothread_t)memory) { - long *p = (long*)((char*)handle + size); /* seek to top of stack */ - *--p = (long)crash; /* crash if entrypoint returns */ - *--p = (long)entrypoint; /* start of function */ - *(long*)handle = (long)p; /* stack pointer */ + unsigned int offset = (size & ~15) - 32; + long *p = (long*)((char*)handle + offset); /* seek to top of stack */ + *--p = (long)crash; /* crash if entrypoint returns */ + *--p = (long)entrypoint; /* start of function */ + *(long*)handle = (long)p; /* stack pointer */ } return handle; @@ -99,14 +100,13 @@ cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { co_swap = (void (fastcall*)(cothread_t, cothread_t))co_swap_function; } if(!co_active_handle) co_active_handle = &co_active_buffer; - size += 256; /* allocate additional space for storage */ - size &= ~15; /* align stack to 16-byte boundary */ if(handle = (cothread_t)malloc(size)) { - long *p = (long*)((char*)handle + size); /* seek to top of stack */ - *--p = (long)crash; /* crash if entrypoint returns */ - *--p = (long)entrypoint; /* start of function */ - *(long*)handle = (long)p; /* stack pointer */ + unsigned int offset = (size & ~15) - 32; + long *p = (long*)((char*)handle + offset); /* seek to top of stack */ + *--p = (long)crash; /* crash if entrypoint returns */ + *--p = (long)entrypoint; /* start of function */ + *(long*)handle = (long)p; /* stack pointer */ } return handle; From ef1d4b592a1c4b3384f9a97a07d6658ee10df154 Mon Sep 17 00:00:00 2001 From: byuu <2107894+byuu@users.noreply.github.com> Date: Sun, 20 Oct 2019 00:28:09 +0900 Subject: [PATCH 31/43] v106.223 --- aarch64.c | 27 +++++++++------------------ amd64.c | 23 +++++++---------------- arm.c | 26 +++++++++----------------- fiber.c | 4 ++++ libco.c | 2 +- libco.h | 3 ++- ppc.c | 4 ++++ ppc64v2.c | 10 ++++++---- sjlj.c | 4 ++++ ucontext.c | 4 ++++ x86.c | 23 +++++++---------------- 11 files changed, 57 insertions(+), 73 deletions(-) diff --git a/aarch64.c b/aarch64.c index e153cd46..59162f20 100644 --- a/aarch64.c +++ b/aarch64.c @@ -59,6 +59,10 @@ static void co_init() { #endif } +const char* co_method() { + return "aarch64"; +} + cothread_t co_active() { if(!co_active_handle) co_active_handle = &co_active_buffer; return co_active_handle; @@ -73,7 +77,8 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) if(!co_active_handle) co_active_handle = &co_active_buffer; if(handle = (unsigned long*)memory) { - unsigned long* p = (unsigned long*)((unsigned char*)handle + size); + unsigned int offset = (size & ~15); + unsigned long* p = (unsigned long*)((unsigned char*)handle + offset); handle[19] = (unsigned long)p; /* x29 (frame pointer) */ handle[20] = (unsigned long)p; /* x30 (stack pointer) */ handle[21] = (unsigned long)entrypoint; /* x31 (link register) */ @@ -83,23 +88,9 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) } cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { - unsigned long* handle; - if(!co_swap) { - co_init(); - co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; - } - if(!co_active_handle) co_active_handle = &co_active_buffer; - size += 256; - size &= ~15; - - if(handle = (unsigned long*)malloc(size)) { - unsigned long* p = (unsigned long*)((unsigned char*)handle + size); - handle[19] = (unsigned long)p; /* x29 (frame pointer) */ - handle[20] = (unsigned long)p; /* x30 (stack pointer) */ - handle[21] = (unsigned long)entrypoint; /* x31 (link register) */ - } - - return handle; + void* memory = malloc(size); + if(!memory) return (cothread_t)0; + return co_derive(memory, size, entrypoint); } void co_delete(cothread_t handle) { diff --git a/amd64.c b/amd64.c index 664730a1..e9aef889 100644 --- a/amd64.c +++ b/amd64.c @@ -115,6 +115,10 @@ static void crash() { assert(0); /* called only if cothread_t entrypoint returns */ } +const char* co_method() { + return "amd64"; +} + cothread_t co_active() { if(!co_active_handle) co_active_handle = &co_active_buffer; return co_active_handle; @@ -140,22 +144,9 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) } cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { - cothread_t handle; - if(!co_swap) { - co_init(); - co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; - } - if(!co_active_handle) co_active_handle = &co_active_buffer; - - if(handle = (cothread_t)malloc(size)) { - unsigned int offset = (size & ~15) - 32; - long long *p = (long long*)((char*)handle + offset); /* seek to top of stack */ - *--p = (long long)crash; /* crash if entrypoint returns */ - *--p = (long long)entrypoint; /* start of function */ - *(long long*)handle = (long long)p; /* stack pointer */ - } - - return handle; + void* memory = malloc(size); + if(!memory) return (cothread_t)0; + return co_derive(memory, size, entrypoint); } void co_delete(cothread_t handle) { diff --git a/arm.c b/arm.c index 20c95310..8d872bf5 100644 --- a/arm.c +++ b/arm.c @@ -35,6 +35,10 @@ static void co_init() { #endif } +const char* co_method() { + return "arm"; +} + cothread_t co_active() { if(!co_active_handle) co_active_handle = &co_active_buffer; return co_active_handle; @@ -49,7 +53,8 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) if(!co_active_handle) co_active_handle = &co_active_buffer; if(handle = (unsigned long*)memory) { - unsigned long* p = (unsigned long*)((unsigned char*)handle + size); + unsigned int offset = (size & ~15); + unsigned long* p = (unsigned long*)((unsigned char*)handle + offset); handle[8] = (unsigned long)p; handle[9] = (unsigned long)entrypoint; } @@ -58,22 +63,9 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) } cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { - unsigned long* handle; - if(!co_swap) { - co_init(); - co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; - } - if(!co_active_handle) co_active_handle = &co_active_buffer; - size += 256; - size &= ~15; - - if(handle = (unsigned long*)malloc(size)) { - unsigned long* p = (unsigned long*)((unsigned char*)handle + size); - handle[8] = (unsigned long)p; - handle[9] = (unsigned long)entrypoint; - } - - return handle; + void* memory = malloc(size); + if(!memory) return (cothread_t)0; + return co_derive(memory, size, entrypoint); } void co_delete(cothread_t handle) { diff --git a/fiber.c b/fiber.c index bdf4dd4e..f508b0f8 100644 --- a/fiber.c +++ b/fiber.c @@ -16,6 +16,10 @@ static void __stdcall co_thunk(void* coentry) { ((void (*)(void))coentry)(); } +const char* co_method() { + return "fiber"; +} + cothread_t co_active() { if(!co_active_) { ConvertThreadToFiber(0); diff --git a/libco.c b/libco.c index f9c79bf4..21fe4cab 100644 --- a/libco.c +++ b/libco.c @@ -1,7 +1,7 @@ #if defined(__clang__) #pragma clang diagnostic ignored "-Wparentheses" - //placing code in section(text) does not mark it executable with Clang. + /* placing code in section(text) does not mark it executable with Clang. */ #undef LIBCO_MPROTECT #define LIBCO_MPROTECT #endif diff --git a/libco.h b/libco.h index a436957c..0b94c2e9 100644 --- a/libco.h +++ b/libco.h @@ -1,5 +1,5 @@ /* - libco v20 (2019-10-14) + libco v20 (2019-10-16) author: byuu license: ISC */ @@ -13,6 +13,7 @@ extern "C" { typedef void* cothread_t; +const char* co_method(); cothread_t co_active(); cothread_t co_derive(void*, unsigned int, void (*)(void)); cothread_t co_create(unsigned int, void (*)(void)); diff --git a/ppc.c b/ppc.c index 969fcec2..314997c8 100644 --- a/ppc.c +++ b/ppc.c @@ -413,6 +413,10 @@ static void co_init_(void) { co_active_handle = co_create_(state_size, (uintptr_t)&co_switch); } +const char* co_method() { + return "ppc"; +} + cothread_t co_active() { if(!co_active_handle) co_init_(); diff --git a/ppc64v2.c b/ppc64v2.c index b1f153d4..fac464a6 100644 --- a/ppc64v2.c +++ b/ppc64v2.c @@ -221,6 +221,10 @@ __asm__( ".size swap_context, .-swap_context\n" ); +const char* co_method() { + return "ppc64v2"; +} + cothread_t co_active() { if(!co_active_handle) { co_active_handle = (struct ppc64_context*)malloc(MIN_STACK + sizeof(struct ppc64_context)); @@ -255,11 +259,9 @@ cothread_t co_derive(void* memory, unsigned int size, void (*coentry)(void)) { } cothread_t co_create(unsigned int size, void (*coentry)(void)) { - size_t total = MAX(size, MIN_STACK) + sizeof(struct ppc64_context); - void* memory = malloc(total); - + void* memory = malloc(size); if(!memory) return (cothread_t)0; - return co_derive(memory, total, coentry); + return co_derive(memory, size, coentry); } void co_delete(cothread_t handle) { diff --git a/sjlj.c b/sjlj.c index d99572a3..b4faf17b 100644 --- a/sjlj.c +++ b/sjlj.c @@ -33,6 +33,10 @@ static void springboard(int ignored) { } } +const char* co_method() { + return "sjlj"; +} + cothread_t co_active() { if(!co_running) co_running = &co_primary; return (cothread_t)co_running; diff --git a/ucontext.c b/ucontext.c index edf513d4..49fa976c 100644 --- a/ucontext.c +++ b/ucontext.c @@ -26,6 +26,10 @@ extern "C" { static thread_local ucontext_t co_primary; static thread_local ucontext_t* co_running = 0; +const char* co_module() { + return "ucontext"; +} + cothread_t co_active() { if(!co_running) co_running = &co_primary; return (cothread_t)co_running; diff --git a/x86.c b/x86.c index 3eb2e21e..8effa0d4 100644 --- a/x86.c +++ b/x86.c @@ -69,6 +69,10 @@ static void crash() { assert(0); /* called only if cothread_t entrypoint returns */ } +const char* co_method() { + return "x86"; +} + cothread_t co_active() { if(!co_active_handle) co_active_handle = &co_active_buffer; return co_active_handle; @@ -94,22 +98,9 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) } cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { - cothread_t handle; - if(!co_swap) { - co_init(); - co_swap = (void (fastcall*)(cothread_t, cothread_t))co_swap_function; - } - if(!co_active_handle) co_active_handle = &co_active_buffer; - - if(handle = (cothread_t)malloc(size)) { - unsigned int offset = (size & ~15) - 32; - long *p = (long*)((char*)handle + offset); /* seek to top of stack */ - *--p = (long)crash; /* crash if entrypoint returns */ - *--p = (long)entrypoint; /* start of function */ - *(long*)handle = (long)p; /* stack pointer */ - } - - return handle; + void* memory = malloc(size); + if(!memory) return (cothread_t)0; + return co_derive(memory, size, entrypoint); } void co_delete(cothread_t handle) { From 0d87e92a10d3f4b1414458fe747535dcaf8c66f1 Mon Sep 17 00:00:00 2001 From: byuu <2107894+byuu@users.noreply.github.com> Date: Sun, 20 Oct 2019 01:14:04 +0900 Subject: [PATCH 32/43] v106.224 --- aarch64.c | 8 ++++---- amd64.c | 8 ++++---- arm.c | 8 ++++---- fiber.c | 8 ++++---- libco.h | 2 +- ppc.c | 8 ++++---- ppc64v2.c | 8 ++++---- sjlj.c | 8 ++++---- ucontext.c | 8 ++++---- x86.c | 8 ++++---- 10 files changed, 37 insertions(+), 37 deletions(-) diff --git a/aarch64.c b/aarch64.c index 59162f20..b3ffcc49 100644 --- a/aarch64.c +++ b/aarch64.c @@ -59,10 +59,6 @@ static void co_init() { #endif } -const char* co_method() { - return "aarch64"; -} - cothread_t co_active() { if(!co_active_handle) co_active_handle = &co_active_buffer; return co_active_handle; @@ -102,6 +98,10 @@ void co_switch(cothread_t handle) { co_swap(co_active_handle = handle, co_previous_handle); } +int co_serializable() { + return 1; +} + #ifdef __cplusplus } #endif diff --git a/amd64.c b/amd64.c index e9aef889..e9424c0b 100644 --- a/amd64.c +++ b/amd64.c @@ -115,10 +115,6 @@ static void crash() { assert(0); /* called only if cothread_t entrypoint returns */ } -const char* co_method() { - return "amd64"; -} - cothread_t co_active() { if(!co_active_handle) co_active_handle = &co_active_buffer; return co_active_handle; @@ -158,6 +154,10 @@ void co_switch(cothread_t handle) { co_swap(co_active_handle = handle, co_previous_handle); } +int co_serializable() { + return 1; +} + #ifdef __cplusplus } #endif diff --git a/arm.c b/arm.c index 8d872bf5..95adf6b2 100644 --- a/arm.c +++ b/arm.c @@ -35,10 +35,6 @@ static void co_init() { #endif } -const char* co_method() { - return "arm"; -} - cothread_t co_active() { if(!co_active_handle) co_active_handle = &co_active_buffer; return co_active_handle; @@ -77,6 +73,10 @@ void co_switch(cothread_t handle) { co_swap(co_active_handle = handle, co_previous_handle); } +int co_serializable() { + return 1; +} + #ifdef __cplusplus } #endif diff --git a/fiber.c b/fiber.c index f508b0f8..dd539c37 100644 --- a/fiber.c +++ b/fiber.c @@ -16,10 +16,6 @@ static void __stdcall co_thunk(void* coentry) { ((void (*)(void))coentry)(); } -const char* co_method() { - return "fiber"; -} - cothread_t co_active() { if(!co_active_) { ConvertThreadToFiber(0); @@ -50,6 +46,10 @@ void co_switch(cothread_t cothread) { SwitchToFiber(cothread); } +int co_serializable() { + return 0; +} + #ifdef __cplusplus } #endif diff --git a/libco.h b/libco.h index 0b94c2e9..88d00a72 100644 --- a/libco.h +++ b/libco.h @@ -13,12 +13,12 @@ extern "C" { typedef void* cothread_t; -const char* co_method(); cothread_t co_active(); cothread_t co_derive(void*, unsigned int, void (*)(void)); cothread_t co_create(unsigned int, void (*)(void)); void co_delete(cothread_t); void co_switch(cothread_t); +int co_serializable(); #ifdef __cplusplus } diff --git a/ppc.c b/ppc.c index 314997c8..6b7f4acd 100644 --- a/ppc.c +++ b/ppc.c @@ -413,10 +413,6 @@ static void co_init_(void) { co_active_handle = co_create_(state_size, (uintptr_t)&co_switch); } -const char* co_method() { - return "ppc"; -} - cothread_t co_active() { if(!co_active_handle) co_init_(); @@ -429,3 +425,7 @@ void co_switch(cothread_t t) { CO_SWAP_ASM(t, old); } + +int co_serializable() { + return 0; +} diff --git a/ppc64v2.c b/ppc64v2.c index fac464a6..3c296959 100644 --- a/ppc64v2.c +++ b/ppc64v2.c @@ -221,10 +221,6 @@ __asm__( ".size swap_context, .-swap_context\n" ); -const char* co_method() { - return "ppc64v2"; -} - cothread_t co_active() { if(!co_active_handle) { co_active_handle = (struct ppc64_context*)malloc(MIN_STACK + sizeof(struct ppc64_context)); @@ -274,6 +270,10 @@ void co_switch(cothread_t to) { swap_context((struct ppc64_context*)to, from); } +int co_serializable() { + return 1; +} + #ifdef __cplusplus } #endif diff --git a/sjlj.c b/sjlj.c index b4faf17b..5af14729 100644 --- a/sjlj.c +++ b/sjlj.c @@ -33,10 +33,6 @@ static void springboard(int ignored) { } } -const char* co_method() { - return "sjlj"; -} - cothread_t co_active() { if(!co_running) co_running = &co_primary; return (cothread_t)co_running; @@ -140,6 +136,10 @@ void co_switch(cothread_t cothread) { } } +int co_serializable() { + return 0; +} + #ifdef __cplusplus } #endif diff --git a/ucontext.c b/ucontext.c index 49fa976c..5ff76af3 100644 --- a/ucontext.c +++ b/ucontext.c @@ -26,10 +26,6 @@ extern "C" { static thread_local ucontext_t co_primary; static thread_local ucontext_t* co_running = 0; -const char* co_module() { - return "ucontext"; -} - cothread_t co_active() { if(!co_running) co_running = &co_primary; return (cothread_t)co_running; @@ -81,6 +77,10 @@ void co_switch(cothread_t cothread) { swapcontext(old_thread, co_running); } +int co_serializable() { + return 0; +} + #ifdef __cplusplus } #endif diff --git a/x86.c b/x86.c index 8effa0d4..fa1c538f 100644 --- a/x86.c +++ b/x86.c @@ -69,10 +69,6 @@ static void crash() { assert(0); /* called only if cothread_t entrypoint returns */ } -const char* co_method() { - return "x86"; -} - cothread_t co_active() { if(!co_active_handle) co_active_handle = &co_active_buffer; return co_active_handle; @@ -112,6 +108,10 @@ void co_switch(cothread_t handle) { co_swap(co_active_handle = handle, co_previous_handle); } +int co_serializable() { + return 1; +} + #ifdef __cplusplus } #endif From d094c218247319c95efa8fb3523c3286e382061f Mon Sep 17 00:00:00 2001 From: Nikos Chantziaras Date: Mon, 25 Nov 2019 13:26:30 +0200 Subject: [PATCH 33/43] Don't include sys/mman.h when not using mprotect --- aarch64.c | 4 +++- amd64.c | 4 +++- arm.c | 4 +++- x86.c | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/aarch64.c b/aarch64.c index b3ffcc49..904f688f 100644 --- a/aarch64.c +++ b/aarch64.c @@ -6,7 +6,9 @@ #include #include #include -#include +#ifdef LIBCO_MPROTECT + #include +#endif #ifdef __cplusplus extern "C" { diff --git a/amd64.c b/amd64.c index e9424c0b..45f3f6d8 100644 --- a/amd64.c +++ b/amd64.c @@ -99,7 +99,9 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; }; #include - #include + #ifdef LIBCO_MPROTECT + #include + #endif static void co_init() { #ifdef LIBCO_MPROTECT diff --git a/arm.c b/arm.c index 95adf6b2..0ebf2e7a 100644 --- a/arm.c +++ b/arm.c @@ -5,7 +5,9 @@ #include #include #include -#include +#ifdef LIBCO_MPROTECT + #include +#endif #ifdef __cplusplus extern "C" { diff --git a/x86.c b/x86.c index fa1c538f..8b339ee7 100644 --- a/x86.c +++ b/x86.c @@ -53,7 +53,9 @@ static const unsigned char co_swap_function[4096] = { } #else #include - #include + #ifdef LIBCO_MPROTECT + #include + #endif static void co_init() { #ifdef LIBCO_MPROTECT From 0d6a02168bfc87878e32f13d5d475e9884ba38b6 Mon Sep 17 00:00:00 2001 From: byuu <2107894+byuu@users.noreply.github.com> Date: Mon, 2 Dec 2019 19:28:48 +0900 Subject: [PATCH 34/43] v106.231 SFC: disable math color bleed for first pixel (fixes green line on the left-edge of Jurassic Park) SFC/GG: attach Screen settings to Screen node, not PPU/VDP node (fixes remembering Screen settings) --- aarch64.c | 4 +--- amd64.c | 4 +--- arm.c | 4 +--- x86.c | 4 +--- 4 files changed, 4 insertions(+), 12 deletions(-) diff --git a/aarch64.c b/aarch64.c index 904f688f..b3ffcc49 100644 --- a/aarch64.c +++ b/aarch64.c @@ -6,9 +6,7 @@ #include #include #include -#ifdef LIBCO_MPROTECT - #include -#endif +#include #ifdef __cplusplus extern "C" { diff --git a/amd64.c b/amd64.c index 45f3f6d8..e9424c0b 100644 --- a/amd64.c +++ b/amd64.c @@ -99,9 +99,7 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; }; #include - #ifdef LIBCO_MPROTECT - #include - #endif + #include static void co_init() { #ifdef LIBCO_MPROTECT diff --git a/arm.c b/arm.c index 0ebf2e7a..95adf6b2 100644 --- a/arm.c +++ b/arm.c @@ -5,9 +5,7 @@ #include #include #include -#ifdef LIBCO_MPROTECT - #include -#endif +#include #ifdef __cplusplus extern "C" { diff --git a/x86.c b/x86.c index 8b339ee7..fa1c538f 100644 --- a/x86.c +++ b/x86.c @@ -53,9 +53,7 @@ static const unsigned char co_swap_function[4096] = { } #else #include - #ifdef LIBCO_MPROTECT - #include - #endif + #include static void co_init() { #ifdef LIBCO_MPROTECT From 1c7e6c58c376cfae57ab1a514613f9f5e656bb02 Mon Sep 17 00:00:00 2001 From: byuu <2107894+byuu@users.noreply.github.com> Date: Mon, 2 Dec 2019 19:45:14 +0900 Subject: [PATCH 35/43] v106.232 Correct merge issue with libco Don't invoke uname on Windows targets [Alcaro] --- aarch64.c | 6 ++++-- amd64.c | 6 ++++-- arm.c | 6 ++++-- ppc.c | 2 +- x86.c | 6 ++++-- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/aarch64.c b/aarch64.c index b3ffcc49..2132b4e5 100644 --- a/aarch64.c +++ b/aarch64.c @@ -5,8 +5,10 @@ #include #include #include -#include -#include +#ifdef LIBCO_MPROTECT + #include + #include +#endif #ifdef __cplusplus extern "C" { diff --git a/amd64.c b/amd64.c index e9424c0b..e96e5247 100644 --- a/amd64.c +++ b/amd64.c @@ -98,8 +98,10 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; 0xff, 0xe0, /* jmp rax */ }; - #include - #include + #ifdef LIBCO_MPROTECT + #include + #include + #endif static void co_init() { #ifdef LIBCO_MPROTECT diff --git a/arm.c b/arm.c index 95adf6b2..1c9dff5c 100644 --- a/arm.c +++ b/arm.c @@ -4,8 +4,10 @@ #include #include -#include -#include +#ifdef LIBCO_MPROTECT + #include + #include +#endif #ifdef __cplusplus extern "C" { diff --git a/ppc.c b/ppc.c index 6b7f4acd..ee6a9a87 100644 --- a/ppc.c +++ b/ppc.c @@ -8,7 +8,7 @@ #include #include -#if LIBCO_MPROTECT +#ifdef LIBCO_MPROTECT #include #include #endif diff --git a/x86.c b/x86.c index fa1c538f..c539a299 100644 --- a/x86.c +++ b/x86.c @@ -52,8 +52,10 @@ static const unsigned char co_swap_function[4096] = { #endif } #else - #include - #include + #ifdef LIBCO_MPROTECT + #include + #include + #endif static void co_init() { #ifdef LIBCO_MPROTECT From 7d9ebc59a0792b5dcaada3ea1c451a982200b8ac Mon Sep 17 00:00:00 2001 From: Sintendo Date: Wed, 26 Feb 2020 23:33:58 +0100 Subject: [PATCH 36/43] libco/aarch64: Fix vector register handling The calling convention specifies that d8-d15 (the lower 64 bits of vector registers q8-q15) are callee-saved. However, libco was erroneously saving and restoring general-purpose registers x8-x15 instead. --- aarch64.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/aarch64.c b/aarch64.c index 2132b4e5..93ecc49d 100644 --- a/aarch64.c +++ b/aarch64.c @@ -24,10 +24,10 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; section(text) #endif static const uint32_t co_swap_function[1024] = { - 0xa9002428, /* stp x8,x9,[x1] */ - 0xa9012c2a, /* stp x10,x11,[x1,#16] */ - 0xa902342c, /* stp x12,x13,[x1,#32] */ - 0xa9033c2e, /* stp x14,x15,[x1,#48] */ + 0x6d002428, /* stp d8,d9,[x1] */ + 0x6d012c2a, /* stp d10,d11,[x1,#16] */ + 0x6d02342c, /* stp d12,d13,[x1,#32] */ + 0x6d033c2e, /* stp d14,d15,[x1,#48] */ 0xf9002433, /* str x19,[x1,#72] */ 0xa9055434, /* stp x20,x21,[x1,#80] */ 0xa9065c36, /* stp x22,x23,[x1,#96] */ @@ -37,10 +37,10 @@ static const uint32_t co_swap_function[1024] = { 0x910003f0, /* mov x16,sp */ 0xa90a7830, /* stp x16,x30,[x1,#160] */ - 0xa9402408, /* ldp x8,x9,[x0] */ - 0xa9412c0a, /* ldp x10,x11,[x0,#16] */ - 0xa942340c, /* ldp x12,x13,[x0,#32] */ - 0xa9433c0e, /* ldp x14,x15,[x0,#48] */ + 0x6d402408, /* ldp d8,d9,[x0] */ + 0x6d412c0a, /* ldp d10,d11,[x0,#16] */ + 0x6d42340c, /* ldp d12,d13,[x0,#32] */ + 0x6d433c0e, /* ldp d14,d15,[x0,#48] */ 0xf9402413, /* ldr x19,[x0,#72] */ 0xa9455414, /* ldp x20,x21,[x0,#80] */ 0xa9465c16, /* ldp x22,x23,[x0,#96] */ From 1cde5dfad891e05a4b22762a03348253900cf031 Mon Sep 17 00:00:00 2001 From: Sintendo Date: Wed, 26 Feb 2020 23:57:32 +0100 Subject: [PATCH 37/43] libco/aarch64: Optimize co_swap_function Improves thread-switching performance using the following techniques: - Interleave stores/loads - Restore the stack pointer and link register as early as possible --- aarch64.c | 57 +++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/aarch64.c b/aarch64.c index 93ecc49d..8505b675 100644 --- a/aarch64.c +++ b/aarch64.c @@ -24,32 +24,31 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; section(text) #endif static const uint32_t co_swap_function[1024] = { - 0x6d002428, /* stp d8,d9,[x1] */ - 0x6d012c2a, /* stp d10,d11,[x1,#16] */ - 0x6d02342c, /* stp d12,d13,[x1,#32] */ - 0x6d033c2e, /* stp d14,d15,[x1,#48] */ - 0xf9002433, /* str x19,[x1,#72] */ - 0xa9055434, /* stp x20,x21,[x1,#80] */ - 0xa9065c36, /* stp x22,x23,[x1,#96] */ - 0xa9076438, /* stp x24,x25,[x1,#112] */ - 0xa9086c3a, /* stp x26,x27,[x1,#128] */ - 0xa909743c, /* stp x28,x29,[x1,#144] */ - 0x910003f0, /* mov x16,sp */ - 0xa90a7830, /* stp x16,x30,[x1,#160] */ - - 0x6d402408, /* ldp d8,d9,[x0] */ - 0x6d412c0a, /* ldp d10,d11,[x0,#16] */ - 0x6d42340c, /* ldp d12,d13,[x0,#32] */ - 0x6d433c0e, /* ldp d14,d15,[x0,#48] */ - 0xf9402413, /* ldr x19,[x0,#72] */ - 0xa9455414, /* ldp x20,x21,[x0,#80] */ - 0xa9465c16, /* ldp x22,x23,[x0,#96] */ - 0xa9476418, /* ldp x24,x25,[x0,#112] */ - 0xa9486c1a, /* ldp x26,x27,[x0,#128] */ - 0xa949741c, /* ldp x28,x29,[x0,#144] */ - 0xa94a4410, /* ldp x16,x17,[x0,#160] */ - 0x9100021f, /* mov sp,x16 */ - 0xd61f0220, /* br x17 */ + 0x910003f0, /* mov x16,sp */ + 0xa9007830, /* stp x16,x30,[x1] */ + 0xa9407810, /* ldp x16,x30,[x0] */ + 0x9100021f, /* mov sp,x16 */ + 0xa9015033, /* stp x19,x20,[x1, 16] */ + 0xa9415013, /* ldp x19,x20,[x0, 16] */ + 0xa9025835, /* stp x21,x22,[x1, 32] */ + 0xa9425815, /* ldp x21,x22,[x0, 32] */ + 0xa9036037, /* stp x23,x24,[x1, 48] */ + 0xa9436017, /* ldp x23,x24,[x0, 48] */ + 0xa9046839, /* stp x25,x26,[x1, 64] */ + 0xa9446819, /* ldp x25,x26,[x0, 64] */ + 0xa905703b, /* stp x27,x28,[x1, 80] */ + 0xa945701b, /* ldp x27,x28,[x0, 80] */ + 0xf900303d, /* str x29, [x1, 96] */ + 0xf940301d, /* ldr x29, [x0, 96] */ + 0x6d072428, /* stp d8, d9, [x1,112] */ + 0x6d472408, /* ldp d8, d9, [x0,112] */ + 0x6d082c2a, /* stp d10,d11,[x1,128] */ + 0x6d482c0a, /* ldp d10,d11,[x0,128] */ + 0x6d09342c, /* stp d12,d13,[x1,144] */ + 0x6d49340c, /* ldp d12,d13,[x0,144] */ + 0x6d0a3c2e, /* stp d14,d15,[x1,160] */ + 0x6d4a3c0e, /* ldp d14,d15,[x0,160] */ + 0xd61f03c0, /* br x30 */ }; static void co_init() { @@ -77,9 +76,9 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) if(handle = (unsigned long*)memory) { unsigned int offset = (size & ~15); unsigned long* p = (unsigned long*)((unsigned char*)handle + offset); - handle[19] = (unsigned long)p; /* x29 (frame pointer) */ - handle[20] = (unsigned long)p; /* x30 (stack pointer) */ - handle[21] = (unsigned long)entrypoint; /* x31 (link register) */ + handle[0] = (unsigned long)p; /* x16 (stack pointer) */ + handle[1] = (unsigned long)entrypoint; /* x30 (link register) */ + handle[12] = (unsigned long)p; /* x29 (frame pointer) */ } return handle; From bd9d4c96be27b746bc40dcd205c2585cef0e141b Mon Sep 17 00:00:00 2001 From: Naman Dixit Date: Sat, 6 Jun 2020 17:04:16 +0530 Subject: [PATCH 38/43] Made the dependency on stdlib.h and assert.h optional Some platforms still depend on Libc (if they are exclusively POSIX/Unix/Linux), or need some other functionality (e.g., memcpy) --- aarch64.c | 17 +++++++++++++---- amd64.c | 18 +++++++++++++----- arm.c | 17 +++++++++++++---- ppc.c | 11 ++++++++--- ppc64v2.c | 13 +++++++++---- x86.c | 18 +++++++++++++----- 6 files changed, 69 insertions(+), 25 deletions(-) diff --git a/aarch64.c b/aarch64.c index 8505b675..a77bb075 100644 --- a/aarch64.c +++ b/aarch64.c @@ -2,8 +2,17 @@ #include "libco.h" #include "settings.h" -#include -#include +#if !defined(LIBCO_ASSERT) + #include + #define LIBCO_ASSERT(...) assert(__VA_ARGS__) +#endif + +#if !defined(LIBCO_MALLOC) || !defined(LIBCO_FREE) + #include + #define LIBCO_MALLOC(...) malloc(__VA_ARGS__) + #define LIBCO_FREE(...) free(__VA_ARGS__) +#endif + #include #ifdef LIBCO_MPROTECT #include @@ -85,13 +94,13 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) } cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { - void* memory = malloc(size); + void* memory = LIBCO_MALLOC(size); if(!memory) return (cothread_t)0; return co_derive(memory, size, entrypoint); } void co_delete(cothread_t handle) { - free(handle); + LIBCO_FREE(handle); } void co_switch(cothread_t handle) { diff --git a/amd64.c b/amd64.c index e96e5247..833b5e2e 100644 --- a/amd64.c +++ b/amd64.c @@ -2,8 +2,16 @@ #include "libco.h" #include "settings.h" -#include -#include +#if !defined(LIBCO_ASSERT) + #include + #define LIBCO_ASSERT(...) assert(__VA_ARGS__) +#endif + +#if !defined(LIBCO_MALLOC) || !defined(LIBCO_FREE) + #include + #define LIBCO_MALLOC(...) malloc(__VA_ARGS__) + #define LIBCO_FREE(...) free(__VA_ARGS__) +#endif #ifdef __cplusplus extern "C" { @@ -114,7 +122,7 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; #endif static void crash() { - assert(0); /* called only if cothread_t entrypoint returns */ + LIBCO_ASSERT(0); /* called only if cothread_t entrypoint returns */ } cothread_t co_active() { @@ -142,13 +150,13 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) } cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { - void* memory = malloc(size); + void* memory = LIBCO_MALLOC(size); if(!memory) return (cothread_t)0; return co_derive(memory, size, entrypoint); } void co_delete(cothread_t handle) { - free(handle); + LIBCO_FREE(handle); } void co_switch(cothread_t handle) { diff --git a/arm.c b/arm.c index 1c9dff5c..4a4aef51 100644 --- a/arm.c +++ b/arm.c @@ -2,8 +2,17 @@ #include "libco.h" #include "settings.h" -#include -#include +#if !defined(LIBCO_ASSERT) + #include + #define LIBCO_ASSERT(...) assert(__VA_ARGS__) +#endif + +#if !defined(LIBCO_MALLOC) || !defined(LIBCO_FREE) + #include + #define LIBCO_MALLOC(...) malloc(__VA_ARGS__) + #define LIBCO_FREE(...) free(__VA_ARGS__) +#endif + #ifdef LIBCO_MPROTECT #include #include @@ -61,13 +70,13 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) } cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { - void* memory = malloc(size); + void* memory = LIBCO_MALLOC(size); if(!memory) return (cothread_t)0; return co_derive(memory, size, entrypoint); } void co_delete(cothread_t handle) { - free(handle); + LIBCO_FREE(handle); } void co_switch(cothread_t handle) { diff --git a/ppc.c b/ppc.c index ee6a9a87..bcbdf966 100644 --- a/ppc.c +++ b/ppc.c @@ -4,7 +4,12 @@ #include "libco.h" #include "settings.h" -#include +#if !defined(LIBCO_MALLOC) || !defined(LIBCO_FREE) + #include + #define LIBCO_MALLOC(...) malloc(__VA_ARGS__) + #define LIBCO_FREE(...) free(__VA_ARGS__) +#endif + #include #include @@ -327,7 +332,7 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entry_)(void)) { static uint32_t* co_create_(unsigned size, uintptr_t entry) { (void)entry; - uint32_t* t = (uint32_t*)malloc(size); + uint32_t* t = (uint32_t*)LIBCO_MALLOC(size); #if LIBCO_PPCDESC if(t) { @@ -390,7 +395,7 @@ cothread_t co_create(unsigned int size, void (*entry_)(void)) { } void co_delete(cothread_t t) { - free(t); + LIBCO_FREE(t); } static void co_init_(void) { diff --git a/ppc64v2.c b/ppc64v2.c index 3c296959..1cac8843 100644 --- a/ppc64v2.c +++ b/ppc64v2.c @@ -5,7 +5,12 @@ #include "settings.h" #include -#include + +#if !defined(LIBCO_MALLOC) || !defined(LIBCO_FREE) + #include + #define LIBCO_MALLOC(...) malloc(__VA_ARGS__) + #define LIBCO_FREE(...) free(__VA_ARGS__) +#endif #ifdef __cplusplus extern "C" { @@ -223,7 +228,7 @@ __asm__( cothread_t co_active() { if(!co_active_handle) { - co_active_handle = (struct ppc64_context*)malloc(MIN_STACK + sizeof(struct ppc64_context)); + co_active_handle = (struct ppc64_context*)LIBCO_MALLOC(MIN_STACK + sizeof(struct ppc64_context)); } return (cothread_t)co_active_handle; } @@ -255,13 +260,13 @@ cothread_t co_derive(void* memory, unsigned int size, void (*coentry)(void)) { } cothread_t co_create(unsigned int size, void (*coentry)(void)) { - void* memory = malloc(size); + void* memory = LIBCO_MALLOC(size); if(!memory) return (cothread_t)0; return co_derive(memory, size, coentry); } void co_delete(cothread_t handle) { - free(handle); + LIBCO_FREE(handle); } void co_switch(cothread_t to) { diff --git a/x86.c b/x86.c index c539a299..d2c760ad 100644 --- a/x86.c +++ b/x86.c @@ -2,8 +2,16 @@ #include "libco.h" #include "settings.h" -#include -#include +#if !defined(LIBCO_ASSERT) + #include + #define LIBCO_ASSERT(...) assert(__VA_ARGS__) +#endif + +#if !defined(LIBCO_MALLOC) || !defined(LIBCO_FREE) + #include + #define LIBCO_MALLOC(...) malloc(__VA_ARGS__) + #define LIBCO_FREE(...) free(__VA_ARGS__) +#endif #ifdef __cplusplus extern "C" { @@ -68,7 +76,7 @@ static const unsigned char co_swap_function[4096] = { #endif static void crash() { - assert(0); /* called only if cothread_t entrypoint returns */ + LIBCO_ASSERT(0); /* called only if cothread_t entrypoint returns */ } cothread_t co_active() { @@ -96,13 +104,13 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) } cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { - void* memory = malloc(size); + void* memory = LIBCO_MALLOC(size); if(!memory) return (cothread_t)0; return co_derive(memory, size, entrypoint); } void co_delete(cothread_t handle) { - free(handle); + LIBCO_FREE(handle); } void co_switch(cothread_t handle) { From fb700978ac6a415f23e398071057416b13d9b219 Mon Sep 17 00:00:00 2001 From: Naman Dixit Date: Sat, 6 Jun 2020 18:57:44 +0530 Subject: [PATCH 39/43] Moved the libc override macros to settings.h, and removed __VA_ARGS__ --- aarch64.c | 11 ----------- amd64.c | 11 ----------- arm.c | 11 ----------- ppc.c | 6 ------ ppc64v2.c | 6 ------ settings.h | 12 ++++++++++++ x86.c | 11 ----------- 7 files changed, 12 insertions(+), 56 deletions(-) diff --git a/aarch64.c b/aarch64.c index a77bb075..980686b8 100644 --- a/aarch64.c +++ b/aarch64.c @@ -2,17 +2,6 @@ #include "libco.h" #include "settings.h" -#if !defined(LIBCO_ASSERT) - #include - #define LIBCO_ASSERT(...) assert(__VA_ARGS__) -#endif - -#if !defined(LIBCO_MALLOC) || !defined(LIBCO_FREE) - #include - #define LIBCO_MALLOC(...) malloc(__VA_ARGS__) - #define LIBCO_FREE(...) free(__VA_ARGS__) -#endif - #include #ifdef LIBCO_MPROTECT #include diff --git a/amd64.c b/amd64.c index 833b5e2e..fa454809 100644 --- a/amd64.c +++ b/amd64.c @@ -2,17 +2,6 @@ #include "libco.h" #include "settings.h" -#if !defined(LIBCO_ASSERT) - #include - #define LIBCO_ASSERT(...) assert(__VA_ARGS__) -#endif - -#if !defined(LIBCO_MALLOC) || !defined(LIBCO_FREE) - #include - #define LIBCO_MALLOC(...) malloc(__VA_ARGS__) - #define LIBCO_FREE(...) free(__VA_ARGS__) -#endif - #ifdef __cplusplus extern "C" { #endif diff --git a/arm.c b/arm.c index 4a4aef51..6f0aa24d 100644 --- a/arm.c +++ b/arm.c @@ -2,17 +2,6 @@ #include "libco.h" #include "settings.h" -#if !defined(LIBCO_ASSERT) - #include - #define LIBCO_ASSERT(...) assert(__VA_ARGS__) -#endif - -#if !defined(LIBCO_MALLOC) || !defined(LIBCO_FREE) - #include - #define LIBCO_MALLOC(...) malloc(__VA_ARGS__) - #define LIBCO_FREE(...) free(__VA_ARGS__) -#endif - #ifdef LIBCO_MPROTECT #include #include diff --git a/ppc.c b/ppc.c index bcbdf966..a39d558e 100644 --- a/ppc.c +++ b/ppc.c @@ -4,12 +4,6 @@ #include "libco.h" #include "settings.h" -#if !defined(LIBCO_MALLOC) || !defined(LIBCO_FREE) - #include - #define LIBCO_MALLOC(...) malloc(__VA_ARGS__) - #define LIBCO_FREE(...) free(__VA_ARGS__) -#endif - #include #include diff --git a/ppc64v2.c b/ppc64v2.c index 1cac8843..d302a28a 100644 --- a/ppc64v2.c +++ b/ppc64v2.c @@ -6,12 +6,6 @@ #include -#if !defined(LIBCO_MALLOC) || !defined(LIBCO_FREE) - #include - #define LIBCO_MALLOC(...) malloc(__VA_ARGS__) - #define LIBCO_FREE(...) free(__VA_ARGS__) -#endif - #ifdef __cplusplus extern "C" { #endif diff --git a/settings.h b/settings.h index d8037bc4..695ebfb7 100644 --- a/settings.h +++ b/settings.h @@ -26,6 +26,17 @@ #define alignas(bytes) #endif +#if !defined(LIBCO_ASSERT) + #include + #define LIBCO_ASSERT assert +#endif + +#if !defined(LIBCO_MALLOC) || !defined(LIBCO_FREE) + #include + #define LIBCO_MALLOC malloc + #define LIBCO_FREE free +#endif + #if defined(_MSC_VER) #define section(name) __declspec(allocate("." #name)) #elif defined(__APPLE__) @@ -34,5 +45,6 @@ #define section(name) __attribute__((section("." #name "#"))) #endif + /* if defined(LIBCO_C) */ #endif diff --git a/x86.c b/x86.c index d2c760ad..d31b4ac3 100644 --- a/x86.c +++ b/x86.c @@ -2,17 +2,6 @@ #include "libco.h" #include "settings.h" -#if !defined(LIBCO_ASSERT) - #include - #define LIBCO_ASSERT(...) assert(__VA_ARGS__) -#endif - -#if !defined(LIBCO_MALLOC) || !defined(LIBCO_FREE) - #include - #define LIBCO_MALLOC(...) malloc(__VA_ARGS__) - #define LIBCO_FREE(...) free(__VA_ARGS__) -#endif - #ifdef __cplusplus extern "C" { #endif From 20fa36a7d8f22d39b9ad9b14c8e54773be7ce22e Mon Sep 17 00:00:00 2001 From: Kawa Date: Sat, 6 Jun 2020 16:29:44 +0200 Subject: [PATCH 40/43] Change docs from HTML to MD --- README.md | 10 ++- doc/style.css | 12 --- doc/targets.html | 89 -------------------- doc/targets.md | 68 ++++++++++++++++ doc/{usage.html => usage.md} | 154 ++++++++++++++++++++--------------- 5 files changed, 162 insertions(+), 171 deletions(-) delete mode 100644 doc/style.css delete mode 100644 doc/targets.html create mode 100644 doc/targets.md rename doc/{usage.html => usage.md} (53%) diff --git a/README.md b/README.md index 11e0c34e..fabc5a1c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -libco ------ +# libco libco is a cooperative multithreading library written in C89. @@ -21,7 +20,10 @@ It currently includes backends for: * POSIX platforms (setjmp) * Windows platforms (fibers) -License -======= +See [doc/targets.md] for details. + +See [doc/usage.md] for documentation. + +## License libco is released under the ISC license. diff --git a/doc/style.css b/doc/style.css deleted file mode 100644 index ab070256..00000000 --- a/doc/style.css +++ /dev/null @@ -1,12 +0,0 @@ -body { - background: #333; - color: #fff; -} - -code { - background: #444; -} - -a { - color: #aaf; -} diff --git a/doc/targets.html b/doc/targets.html deleted file mode 100644 index d6211a15..00000000 --- a/doc/targets.html +++ /dev/null @@ -1,89 +0,0 @@ - - - - - - - -Supported targets:

    - -Note that supported targets are only those that have been tested and confirmed -working. It is quite possible that libco will work on more processors, compilers -and operating systems than those listed below. -
    - -libco.x86
    -Overhead: ~5x
    -Supported processor(s): 32-bit x86
    -Supported compiler(s): any
    -Supported operating system(s):
      -
    • Windows
    • -
    • Mac OS X
    • -
    • Linux
    • -
    • BSD
    • -
    -
    - -libco.amd64
    -Overhead: ~10x (Windows), ~6x (all other platforms)
    -Supported processor(s): 64-bit amd64
    -Supported compiler(s): any
    -Supported operating system(s):
      -
    • Windows
    • -
    • Mac OS X
    • -
    • Linux
    • -
    • BSD
    • -
    -
    - -libco.ppc
    -Overhead: ~20x
    -Supported processor(s): 32-bit PowerPC, 64-bit PowerPC
    -Supported compiler(s): GNU GCC
    -Supported operating system(s):
      -
    -
  • Mac OS X
  • -
  • Linux
  • -
  • BSD
  • -
  • Playstation 3
  • - -
    - -Note: this module contains compiler flags to enable/disable FPU and Altivec -support. - -
    - -libco.fiber
    -Overhead: ~15x
    -Supported processor(s): Processor independent
    -Supported compiler(s): any
    -Supported operating system(s):
      -
    • Windows
    • -
    -
    - -libco.sjlj
    -Overhead: ~30x
    -Supported processor(s): Processor independent
    -Supported compiler(s): any
    -Supported operating system(s):
      -
    • Mac OS X
    • -
    • Linux
    • -
    • BSD
    • -
    • Solaris
    • -
    -
    - -libco.ucontext
    -Overhead: ~300x
    -Supported processor(s): Processor independent
    -Supported compiler(s): any
    -Supported operating system(s):
      -
    • Linux
    • -
    • BSD
    • -
    -
    - - - diff --git a/doc/targets.md b/doc/targets.md new file mode 100644 index 00000000..29400c48 --- /dev/null +++ b/doc/targets.md @@ -0,0 +1,68 @@ +# Supported targets +In the following lists, supported targets are only those that have been tested +and confirmed working. It is quite possible that libco will work on more +processors, compilers and operating systems than those listed below. + +The "Overhead" is the cost of switching co-routines, as compared to an ordinary +C function call. + +## libco.x86 +* **Overhead:** ~5x +* **Supported processor(s):** 32-bit x86 +*** Supported compiler(s**): any +* **Supported operating system(s):** + * Windows + * Mac OS X + * Linux + * BSD + +## libco.amd64 +* **Overhead:** ~10x (Windows), ~6x (all other platforms) +* **Supported processor(s):** 64-bit amd64 +*** Supported compiler(s**): any +* **Supported operating system(s):** + * Windows + * Mac OS X + * Linux + * BSD + +## libco.ppc +* **Overhead:** ~20x +* **Supported processor(s):** 32-bit PowerPC, 64-bit PowerPC +* **Supported compiler(s):** GNU GCC +* **Supported operating system(s):** + * Mac OS X + * Linux + * BSD + * Playstation 3 + +**Note:** this module contains compiler flags to enable/disable FPU and Altivec +support. + +## libco.fiber +This uses Windows' "fibers" API. +* **Overhead:** ~15x +* **Supported processor(s):** Processor independent +* **Supported compiler(s):** any +* **Supported operating system(s):** + * Windows + +## libco.sjlj +This uses the C standard library's `setjump`/`longjmp` APIs. +* **Overhead:** ~30x +* **Supported processor(s):** Processor independent +* **Supported compiler(s):** any +* **Supported operating system(s):** + * Mac OS X + * Linux + * BSD + * Solaris + +## libco.ucontext +This uses the POSIX "ucontext" API. +* **Overhead:** ***~300x*** +* **Supported processor(s):** Processor independent +* **Supported compiler(s):** any +* **Supported operating system(s):** + * Linux + * BSD diff --git a/doc/usage.html b/doc/usage.md similarity index 53% rename from doc/usage.html rename to doc/usage.md index 994072f9..cb0d1929 100644 --- a/doc/usage.html +++ b/doc/usage.md @@ -1,108 +1,130 @@ - - - - - - - -License:

    +# License libco is released under the ISC license. -
    -Foreword:

    +# Foreword libco is a cross-platform, permissively licensed implementation of cooperative-multithreading; a feature that is sorely lacking from the ISO C/C++ -standard.
    +standard. + The library is designed for maximum speed and portability, and not for safety or features. If safety or extra functionality is desired, a wrapper API can easily -be written to encapsulate all library functions.
    +be written to encapsulate all library functions. + Behavior of executing operations that are listed as not permitted below result in undefined behavior. They may work anyway, they may cause undesired / unknown -behavior, or they may crash the program entirely.
    +behavior, or they may crash the program entirely. + The goal of this library was to simplify the base API as much as possible, implementing only that which cannot be implemented using pure C. Additional functionality after this would only complicate ports of this library to new platforms. -
    -Porting:

    +# Porting This document is included as a reference for porting libco. Please submit any ports you create to me, so that libco can become more useful. Please note that since libco is permissively licensed, you must submit your code as a work of the public domain in order for it to be included in the official distribution. + Full credit will be given in the source code of the official release. Please do not bother submitting code to me under any other license -- including GPL, LGPL, BSD or CC -- I am not interested in creating a library with multiple different licenses depending on which targets are used. -
    -Synopsis:

    - -typedef void* cothread_t;
    -
    -cothread_t co_active();
    -cothread_t co_create(unsigned int heapsize, void (*coentry)(void));
    -void       co_delete(cothread_t cothread);
    -void       co_switch(cothread_t cothread);
    -
    -
    +Note that there are a variety of compile-time options in `settings.h`, +so if you want to use libco on a platform where it is not supported by default, +you may be able to configure the implementation appropriately without having +to make a whole new port. -Usage: -
    +# Synopsis +```c +typedef void* cothread_t; -typedef void* cothread_t;

    -Handle to cothread.
    -Handle must be of type void*.
    -A value of null (0) indicates an uninitialized or invalid -handle, whereas a non-zero value indicates a valid handle. -
    +cothread_t co_active(); +cothread_t co_create(unsigned int heapsize, void (*coentry)(void)); +void co_delete(cothread_t cothread); +void co_switch(cothread_t cothread); +``` -cothread_t co_active();

    -Return handle to current cothread. Always returns a valid handle, even when -called from the main program thread. -
    +# Usage +## cothread_t +```c +typedef void* cothread_t; +``` +Handle to cothread. -cothread_t co_derive(void* memory, unsigned int heapsize, void (*coentry)(void));

    -Initializes new cothread.
    -This function is identical to co_create, only it attempts to use the provided +Handle must be of type `void*`. + +A value of `null` (0) indicates an uninitialized or invalid handle, whereas a non-zero value indicates a valid handle. + +## co_active +```c +cothread_t co_active(); +``` +Return handle to current cothread. + +Always returns a valid handle, even when called from the main program thread. + +## co_derive +```c +cothread_t co_derive(void* memory, + unsigned int heapsize, + void (*coentry)(void)); +``` +Initializes new cothread. + +This function is identical to `co_create`, only it attempts to use the provided memory instead of allocating new memory on the heap. Please note that certain implementations (currently only Windows Fibers) cannot be created using existing memory, and as such, this function will fail. -
    -cothread_t co_create(unsigned int heapsize, void (*coentry)(void));

    -Create new cothread.
    -Heapsize is the amount of memory allocated for the cothread stack, specified +## co_create +```c +cothread_t co_create(unsigned int heapsize, + void (*coentry)(void)); +``` +Create new cothread. + +`heapsize` is the amount of memory allocated for the cothread stack, specified in bytes. This is unfortunately impossible to make fully portable. It is -recommended to specify sizes using `n * sizeof(void*)'. It is better to err +recommended to specify sizes using `n * sizeof(void*)`. It is better to err on the side of caution and allocate more memory than will be needed to ensure compatibility with other platforms, within reason. A typical heapsize for a -32-bit architecture is ~1MB.
    +32-bit architecture is ~1MB. + When the new cothread is first called, program execution jumps to coentry. This function does not take any arguments, due to portability issues with passing function arguments. However, arguments can be simulated by the use -of global variables, which can be set before the first call to each cothread.
    -coentry() must not return, and should end with an appropriate co_switch() -statement. Behavior is undefined if entry point returns normally.
    +of global variables, which can be set before the first call to each cothread. + +`coentry()` must not return, and should end with an appropriate `co_switch()` +statement. Behavior is undefined if entry point returns normally. + Library is responsible for allocating cothread stack memory, to free the user from needing to allocate special memory capable of being used -as program stack memory on platforms where this is required.
    -User is always responsible for deleting cothreads with co_delete().
    -Return value of null (0) indicates cothread creation failed. -
    +as program stack memory on platforms where this is required. -void co_delete(cothread_t cothread);

    -Delete specified cothread.
    -Null (0) or invalid cothread handle is not allowed.
    -Passing handle of active cothread to this function is not allowed.
    -Passing handle of primary cothread is not allowed. -
    +User is always responsible for deleting cothreads with `co_delete()`. + +Return value of `null` (0) indicates cothread creation failed. + +## co_delete +```c +void co_delete(cothread_t cothread); +``` +Delete specified cothread. + +`null` (0) or invalid cothread handle is not allowed. -void co_switch(cothread_t cothread);

    -Switch to specified cothread.
    -Null (0) or invalid cothread handle is not allowed.
    Passing handle of active cothread to this function is not allowed. -
    - - +Passing handle of primary cothread is not allowed. + +## co_switch +```c +void co_switch(cothread_t cothread); +``` +Switch to specified cothread. + +`null` (0) or invalid cothread handle is not allowed. + +Passing handle of active cothread to this function is not allowed. From d4a67ab8d5741ddc7167515f8f87285d908a7609 Mon Sep 17 00:00:00 2001 From: Naman Dixit Date: Thu, 25 Jun 2020 11:39:40 +0530 Subject: [PATCH 41/43] Added void to function signatures, in order to fix Clang's -Wstrict-prototypes --- libco.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libco.h b/libco.h index 88d00a72..633cd542 100644 --- a/libco.h +++ b/libco.h @@ -13,12 +13,12 @@ extern "C" { typedef void* cothread_t; -cothread_t co_active(); +cothread_t co_active(void); cothread_t co_derive(void*, unsigned int, void (*)(void)); cothread_t co_create(unsigned int, void (*)(void)); void co_delete(cothread_t); void co_switch(cothread_t); -int co_serializable(); +int co_serializable(void); #ifdef __cplusplus } From 096df4242a1062904369a417449c1d12fe40f507 Mon Sep 17 00:00:00 2001 From: Naman Dixit Date: Sat, 6 Jun 2020 15:11:09 +0530 Subject: [PATCH 42/43] Proper cross-platform thread_local and alignas --- settings.h | 74 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 64 insertions(+), 10 deletions(-) diff --git a/settings.h b/settings.h index 695ebfb7..3bb77264 100644 --- a/settings.h +++ b/settings.h @@ -10,20 +10,74 @@ do not use this unless you are certain your application won't use SSE */ /* #define LIBCO_NO_SSE */ -#if defined(LIBCO_C) - #if defined(LIBCO_MP) - #define thread_local __thread - #else +#if !defined(thread_local) // User can override thread_local for obscure compilers + #if !defined(LIBCO_MP) // Running in single-threaded environment #define thread_local + #else // Running in multi-threaded environment + #if defined(__STDC_VERSION__) // Compiling as C Language + #if defined(_MSC_VER) // Don't rely on MSVC's C11 support + #define thread_local __declspec(thread) + #elif __STDC_VERSION__ < 201112L // If we are on C90/99 + #if defined(__clang__) || defined(__GNUC__) // Clang and GCC + #define thread_local __thread + #else // Otherwise, we ignore the directive (unless user provides their own) + #define thread_local + #endif + #else // C11 and newer define thread_local in threads.h + #include + #endif + #elif defined(__cplusplus) // Compiling as C++ Language + #if __cplusplus < 201103L // thread_local is a C++11 feature + #if defined(_MSC_VER) + #define thread_local __declspec(thread) + #elif defined(__clang__) || defined(__GNUC__) + #define thread_local __thread + #else // Otherwise, we ignore the directive (unless user provides their own) + #define thread_local + #endif + #else // In C++ >= 11, thread_local in a builtin keyword + // Don't do anything + #endif + #endif #endif #endif -#if __STDC_VERSION__ >= 201112L - #if !defined(_MSC_VER) - #include - #endif -#else - #define alignas(bytes) +/* In alignas(a), 'a' should be a power of two that is at least the type's + alignment and at most the implementation's alignment limit. This limit is + 2**13 on MSVC. To be portable to MSVC through at least version 10.0, + 'a' should be an integer constant, as MSVC does not support expressions + such as 1 << 3. + + The following C11 requirements are NOT supported on MSVC: + + - If 'a' is zero, alignas has no effect. + - alignas can be used multiple times; the strictest one wins. + - alignas (TYPE) is equivalent to alignas (alignof (TYPE)). +*/ +#if !defined(alignas) + #if defined(__STDC_VERSION__) // C Language + #if defined(_MSC_VER) // Don't rely on MSVC's C11 support + #define alignas(bytes) __declspec(align(bytes)) + #elif __STDC_VERSION__ >= 201112L // C11 and above + #include + #elif defined(__clang__) || defined(__GNUC__) // C90/99 on Clang/GCC + #define alignas(bytes) __attribute__ ((aligned (bytes))) + #else // Otherwise, we ignore the directive (user should provide their own) + #define alignas(bytes) + #endif + #elif defined(__cplusplus) // C++ Language + #if __cplusplus < 201103L + #if defined(_MSC_VER) + #define alignas(bytes) __declspec(align(bytes)) + #elif defined(__clang__) || defined(__GNUC__) // C++98/03 on Clang/GCC + #define alignas(bytes) __attribute__ ((aligned (bytes))) + #else // Otherwise, we ignore the directive (unless user provides their own) + #define alignas(bytes) + #endif + #else // C++ >= 11 has alignas keyword + // Do nothing + #endif + #endif // = !defined(__STDC_VERSION__) && !defined(__cplusplus) #endif #if !defined(LIBCO_ASSERT) From d31c6e75a54be12307d65fd80f55adfbe7aa9d67 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Mon, 24 Aug 2020 21:20:02 +1000 Subject: [PATCH 43/43] Add a copy of the ISC license. Apparently there's two versions of the ISC license; the original says "and" while the updated one says "and/or". This was copied from the ares v115 release, so it should be the variant byuu intended by "ISC". Fixes #19. --- LICENSE | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..77190498 --- /dev/null +++ b/LICENSE @@ -0,0 +1,7 @@ +ISC License (ISC) + +Copyright byuu and the higan team + +Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.