bsnes/ruby/video/xshm.cpp
Tim Allen 0b4e7fb5a5 Update to v103r17 release.
byuu says:

Changelog:

  - tomoko: re-hid the video sync option¹
  - tomoko: removed " Settings" duplication on all the individual
    settings tab options
  - ruby/audio/wasapi: finished port to new syntax; adapted to an
    event-driven model; support 32-bit integral audio²
  - ruby/video/sdl: ported to new syntax; disabled driver on FreeBSD³

¹: still contemplating a synchronize submenu of {none, video, audio},
but ... the fact that video can't work on PAL, WonderSwan games is a
real limitation for it

²: this driver actually received a ton of work. There's also a new
ring-buffer queue, and I added special handling for when exclusive mode
fails because the latency requested is lower than the hardware can
support. It'll pick the closest latency to the minimum that is possible
in this case.

On my Audigy Rx, the results for non-exclusive mode are the same. For
exclusive mode, the framerate drops from 60fps to ~50fps for smaller
buffers, and ~55fps for larger buffers (no matter how big, it never hits
60fps.) This is a lot better than before where it was hitting ~15fps,
but unfortunately it's the best I can do.

The event system used by WASAPI is really stupid. It just uses SetEvent
at some arbitrary time, and you have to query to see how many samples
it's waiting on. This makes it unknowable how many samples we should
buffer before calling `WaitForSingleObject(INFINITE)`, and it's also
unclear how we should handle cases where there's more samples available
than our queue has: either we can fill it with zeroes, or we can write
less samples. The former should prevent audio looping effects when
running too slowly, whereas the latter could potentially be too
ambitious when the audio could've recovered from a minor stall.

It's shocking to me how there's as many ways to send audio to a sound
card as there are sound card APIs, when all that's needed is a simple
double buffer and a callback event from another thread to do it right.
It's also terrifying how unbelievably shitty nearly all sound card
drivers apparently are.

Also, I don't know if cards can output an actual 24-bit mode with three
byte audio samples, or if they always just take 32-bit samples and
ignore the lower 8-bits. Whatever, it's all nonsense for the final
output to be >16-bits anyway (hi, `double[]` input from ruby.)

³: unfortunately, this driver always crashes on FreeBSD (even before
the rewrite), so I'll need someone on Linux to test it and make sure it
actually works. I'll also need testing for a lot of the other drivers as
well, once they're ported over (I don't have X-video, PulseAudio, ALSA,
or udev.)

Note that I forgot to set `_ready=true` at the end of `initialize()`,
and `_ready=false` in `terminate()`, but it shouldn't actually matter
beyond showing you a false warning message on startup about it failing
to initialize.
2017-07-19 23:14:00 +10:00

198 lines
5.5 KiB
C++

//XShm driver for Xorg
//Note that on composited displays, the alpha bits will allow translucency underneath the active window
//As this is not a feature of ruby, this driver must always set the alpha bits on clear() and refresh()
//Linear interpolation is only applied horizontally for performance reasons, although Nearest is still much faster
#include <sys/shm.h>
#include <X11/extensions/XShm.h>
struct VideoXShm : Video {
VideoXShm() { initialize(); }
~VideoXShm() { terminate(); }
auto ready() -> bool { return _ready; }
auto context() -> uintptr { return _context; }
auto smooth() -> bool { return _smooth; }
auto setContext(uintptr context) -> bool {
if(_context == context) return true;
_context = context;
return initialize();
}
auto setSmooth(bool smooth) -> bool {
_smooth = smooth;
return true;
}
auto clear() -> void {
if(!_ready) return;
auto dp = _inputBuffer;
uint length = _inputWidth * _inputHeight;
while(length--) *dp++ = 255u << 24;
output();
}
auto lock(uint32_t*& data, uint& pitch, uint width, uint height) -> bool {
if(!_inputBuffer || _inputWidth != width || _inputHeight != height) {
if(_inputBuffer) delete[] _inputBuffer;
_inputWidth = width;
_inputHeight = height;
_inputBuffer = new uint32_t[width * height + 16]; //+16 is padding for linear interpolation
}
data = _inputBuffer;
pitch = _inputWidth * sizeof(uint32_t);
return true;
}
auto unlock() -> void {
}
auto output() -> void {
if(!_ready) return;
size();
float xratio = (float)_inputWidth / (float)_outputWidth;
float yratio = (float)_inputHeight / (float)_outputHeight;
#pragma omp parallel for
for(uint y = 0; y < _outputHeight; y++) {
float ystep = y * yratio;
float xstep = 0;
uint32_t* sp = _inputBuffer + (uint)ystep * _inputWidth;
uint32_t* dp = _outputBuffer + y * _outputWidth;
if(!_smooth) {
for(uint x = 0; x < _outputWidth; x++) {
*dp++ = 255u << 24 | sp[(uint)xstep];
xstep += xratio;
}
} else {
for(uint x = 0; x < _outputWidth; x++) {
*dp++ = 255u << 24 | interpolate(xstep - (uint)xstep, sp[(uint)xstep], sp[(uint)xstep + 1]);
xstep += xratio;
}
}
}
GC gc = XCreateGC(_display, _window, 0, 0);
XShmPutImage(_display, _window, gc, _image, 0, 0, 0, 0, _outputWidth, _outputHeight, False);
XFreeGC(_display, gc);
XFlush(_display);
}
private:
auto initialize() -> bool {
terminate();
if(!_context) return false;
_display = XOpenDisplay(0);
_screen = DefaultScreen(_display);
XWindowAttributes getAttributes;
XGetWindowAttributes(_display, (Window)_context, &getAttributes);
_depth = getAttributes.depth;
_visual = getAttributes.visual;
//driver only supports 32-bit pixels
//note that even on 15-bit and 16-bit displays, the window visual's depth should be 32
if(_depth < 24 || _depth > 32) {
free();
return false;
}
XSetWindowAttributes setAttributes = {0};
setAttributes.border_pixel = 0;
_window = XCreateWindow(_display, (Window)_context,
0, 0, 256, 256, 0,
getAttributes.depth, InputOutput, getAttributes.visual,
CWBorderPixel, &setAttributes
);
XSetWindowBackground(_display, _window, 0);
XMapWindow(_display, _window);
XFlush(_display);
while(XPending(_display)) {
XEvent event;
XNextEvent(_display, &event);
}
if(!size()) return false;
return _ready = true;
}
auto terminate() -> void {
free();
if(_display) {
XCloseDisplay(_display);
_display = nullptr;
}
}
auto size() -> bool {
XWindowAttributes windowAttributes;
XGetWindowAttributes(_display, (Window)_context, &windowAttributes);
if(_outputBuffer && _outputWidth == windowAttributes.width && _outputHeight == windowAttributes.height) return true;
_outputWidth = windowAttributes.width;
_outputHeight = windowAttributes.height;
XResizeWindow(_display, _window, _outputWidth, _outputHeight);
free();
_shmInfo.shmid = shmget(IPC_PRIVATE, _outputWidth * _outputHeight * sizeof(uint32_t), IPC_CREAT | 0777);
if(_shmInfo.shmid < 0) return false;
_shmInfo.shmaddr = (char*)shmat(_shmInfo.shmid, 0, 0);
_shmInfo.readOnly = False;
XShmAttach(_display, &_shmInfo);
_outputBuffer = (uint32_t*)_shmInfo.shmaddr;
_image = XShmCreateImage(_display, _visual, _depth, ZPixmap, _shmInfo.shmaddr, &_shmInfo, _outputWidth, _outputHeight);
return true;
}
auto free() -> void {
if(_outputBuffer) {
_outputBuffer = nullptr;
XShmDetach(_display, &_shmInfo);
XDestroyImage(_image);
shmdt(_shmInfo.shmaddr);
shmctl(_shmInfo.shmid, IPC_RMID, 0);
}
}
alwaysinline auto interpolate(float mu, uint32_t a, uint32_t b) -> uint32_t {
uint8_t ar = a >> 16, ag = a >> 8, ab = a >> 0;
uint8_t br = b >> 16, bg = b >> 8, bb = b >> 0;
uint8_t cr = ar * (1.0 - mu) + br * mu;
uint8_t cg = ag * (1.0 - mu) + bg * mu;
uint8_t cb = ab * (1.0 - mu) + bb * mu;
return cr << 16 | cg << 8 | cb << 0;
}
bool _ready = false;
uintptr _context = 0;
bool _smooth = true;
uint32_t* _inputBuffer = nullptr;
uint _inputWidth = 0;
uint _inputHeight = 0;
Display* _display = nullptr;
int _screen = 0;
int _depth = 0;
Visual* _visual = nullptr;
Window _window = 0;
XShmSegmentInfo _shmInfo;
XImage* _image = nullptr;
uint32_t* _outputBuffer = nullptr;
uint _outputWidth = 0;
uint _outputHeight = 0;
};