diff --git a/src/core.h b/src/core.h
index 12d1709..b5f43d7 100644
--- a/src/core.h
+++ b/src/core.h
@@ -132,6 +132,8 @@
     #define _GAPI_GXM 1
 
     #undef OS_PTHREAD_MT
+
+    //#define USE_LIBVORBIS // TODO crash
 #elif __SWITCH__
     #define _OS_SWITCH 1
     #define _GAPI_GL   1
@@ -259,10 +261,10 @@ namespace Core {
     struct Mutex {
         void *obj;
     
-        Mutex()       { obj = osMutexInit(); }
-        ~Mutex()      { osMutexFree(obj);    }
-        void lock()   { osMutexLock(obj);    }
-        void unlock() { osMutexUnlock(obj);  }
+        Mutex()       { obj = osMutexInit();          }
+        ~Mutex()      { if (obj) osMutexFree(obj);    }
+        void lock()   { if (obj) osMutexLock(obj);    }
+        void unlock() { if (obj) osMutexUnlock(obj);  }
     };
     
     struct Lock {
@@ -1009,7 +1011,7 @@ namespace Core {
     #ifdef _OS_PSV
         settings.detail.setFilter   (Core::Settings::HIGH);
         settings.detail.setLighting (Core::Settings::LOW);
-        settings.detail.setShadows  (Core::Settings::LOW);
+        settings.detail.setShadows  (Core::Settings::MEDIUM);
         settings.detail.setWater    (Core::Settings::MEDIUM);
     #endif
 
@@ -1069,7 +1071,7 @@ namespace Core {
             GAPI::discardTarget(!(active.targetOp & RT_STORE_COLOR), !(active.targetOp & RT_STORE_DEPTH));
 
             GAPI::Texture *target = reqTarget.texture;
-            uint32  face          = reqTarget.face;
+            uint32 face           = reqTarget.face;
 
             if (target != active.target || face != active.targetFace) {
                 Core::stats.rt++;
diff --git a/src/debug.h b/src/debug.h
index e7583cf..9e7a9cc 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -548,6 +548,9 @@ namespace Debug {
             for (int i = 0; i < level.roomsCount; i++)
                 for (int j = 0; j < level.rooms[i].lightsCount; j++) {
                     TR::Room::Light &l = level.rooms[i].lights[j];
+
+                    if (!level.rooms[i].flags.visible) continue;
+
                     vec3 p = vec3(float(l.x), float(l.y), float(l.z));
                     vec4 color = vec4(l.color.r, l.color.g, l.color.b, 255) * (1.0f / 255.0f);
 
diff --git a/src/gapi/gxm.h b/src/gapi/gxm.h
index 5fa3264..519c438 100644
--- a/src/gapi/gxm.h
+++ b/src/gapi/gxm.h
@@ -767,7 +767,6 @@ namespace GAPI {
             bool isCube     = (opt & OPT_CUBEMAP) != 0;
             bool isTarget   = (opt & OPT_TARGET)  != 0;
             bool isDynamic  = (opt & OPT_DYNAMIC) != 0;
-            bool isShadow   = fmt == FMT_SHADOW;
             bool isTiled    = isTarget;
             bool isSwizzled = !isDynamic && !isTiled && filter;
 
@@ -814,7 +813,7 @@ namespace GAPI {
                 size *= 6;
             }
 
-            SceGxmMemoryAttribFlags flags = (isTarget || mipCount > 1) ? SCE_GXM_MEMORY_ATTRIB_RW : SCE_GXM_MEMORY_ATTRIB_READ;
+            SceGxmMemoryAttribFlags flags = (isTarget || isDynamic || mipCount > 1) ? SCE_GXM_MEMORY_ATTRIB_RW : SCE_GXM_MEMORY_ATTRIB_READ;
             this->data = (uint8*)Context::allocGPU(SCE_KERNEL_MEMBLOCK_TYPE_USER_CDRAM_RW, size, flags, &uid);
 
             if (data && this->data) {
@@ -858,7 +857,7 @@ namespace GAPI {
             if (opt & OPT_REPEAT) {
                 addrMode = SCE_GXM_TEXTURE_ADDR_REPEAT;
             } else {
-                addrMode = (isShadow && support.texBorder) ? SCE_GXM_TEXTURE_ADDR_CLAMP_FULL_BORDER : SCE_GXM_TEXTURE_ADDR_CLAMP;
+                addrMode = SCE_GXM_TEXTURE_ADDR_CLAMP;
             }
 
             sceGxmTextureSetUAddrMode(&ID, addrMode);
@@ -1365,6 +1364,7 @@ namespace GAPI {
     }
 
     void clear(bool color, bool depth) {
+        // TODO save and restore states
         int  oColorMask  = colorMask;
         int  oBlendMode  = blendMode;
         bool oDepthTest  = depthTest;
diff --git a/src/lara.h b/src/lara.h
index 34e3a12..d00dd6b 100644
--- a/src/lara.h
+++ b/src/lara.h
@@ -2304,7 +2304,7 @@ struct Lara : Character {
                 if (stand == STAND_UNDERWATER || stand == STAND_ONWATER)
                     return stand;
                 if (stand == STAND_AIR) {
-                    if (velocity.y > 0.0f && pos.y - waterLevel > 300.0) {
+                    if (velocity.y > 0.0f && pos.y - waterLevel > 300.0f) {
                         stopScreaming();
                         return STAND_UNDERWATER;
                     }
@@ -2326,7 +2326,7 @@ struct Lara : Character {
                     return STAND_AIR;
 
                 if (stand == STAND_AIR) {
-                    if (velocity.y > 0.0f && pos.y - waterLevel > 300.0) {
+                    if (velocity.y > 0.0f && pos.y - waterLevel > 300.0f) {
                         waterSplash();
                         pos.y = waterLevel + waterDepth;
                         game->playSound(TR::SND_WATER_SPLASH, pos, Sound::PAN);
diff --git a/src/platform/psv/main.cpp b/src/platform/psv/main.cpp
index b0de426..60150ad 100644
--- a/src/platform/psv/main.cpp
+++ b/src/platform/psv/main.cpp
@@ -221,6 +221,8 @@ int checkLanguage()
     return str - STR_LANG_EN;
 }
 
+extern "C" int32_t sceKernelChangeThreadVfpException(int32_t clear, int32_t set);
+
 int main()
 {
     psvDebugScreenInit();
@@ -236,6 +238,8 @@ int main()
         sceAppUtilInit(&initParam, &bootParam);
     }
 
+    sceKernelChangeThreadVfpException(0x0800009FU, 0x0);
+
     cacheDir[0] = saveDir[0] = contentDir[0] = 0;
     strcpy(cacheDir,    "ux0:data/OpenLara/");
     strcpy(saveDir,     "ux0:data/OpenLara/");
@@ -265,6 +269,8 @@ int main()
         Game::render();
 
         GAPI::present();
+
+        sceKernelPowerTick(SCE_KERNEL_POWER_TICK_DEFAULT);
     }
 
     sndFree();
diff --git a/src/platform/win/main.cpp b/src/platform/win/main.cpp
index 2a1a7a9..b9265e9 100644
--- a/src/platform/win/main.cpp
+++ b/src/platform/win/main.cpp
@@ -203,8 +203,8 @@ void joyUpdate() {
             if (_XInputGetState(j, &state) == ERROR_SUCCESS) {
                 //osJoyVibrate(j, state.Gamepad.bLeftTrigger / 255.0f, state.Gamepad.bRightTrigger / 255.0f); // vibration test
 
-                Input::setJoyPos(j, jkL,   joyDir(joyAxis( state.Gamepad.sThumbLX,  -32768, 32767),
-                                                  joyAxis(-state.Gamepad.sThumbLY,  -32768, 32767)));
+                Input::setJoyPos(j, jkL,   joyDir(joyAxis( state.Gamepad.sThumbLX, -32768, 32767),
+                                                  joyAxis(-state.Gamepad.sThumbLY, -32768, 32767)));
                 Input::setJoyPos(j, jkR,   joyDir(joyAxis( state.Gamepad.sThumbRX, -32768, 32767),
                                                   joyAxis(-state.Gamepad.sThumbRY, -32768, 32767)));
                 Input::setJoyPos(j, jkLT,  vec2(state.Gamepad.bLeftTrigger / 255.0f, 0.0f));
diff --git a/src/shaders/common.hlsl b/src/shaders/common.hlsl
index e545b45..a1d93cb 100644
--- a/src/shaders/common.hlsl
+++ b/src/shaders/common.hlsl
@@ -65,16 +65,16 @@ struct VS_INPUT {
 	Texture2D    sReflect     : register(t2);
 	Texture2D    sShadow      : register(t3);
 	Texture2D    sMask        : register(t4);
-	
+
 	#define SAMPLE_2D(T,uv)             T.Sample(smpDefault,     uv)
 	#define SAMPLE_2D_POINT(T,uv)       T.Sample(smpPoint,       uv)
 	#define SAMPLE_2D_POINT_WRAP(T,uv)  T.Sample(smpPointWrap,   uv)
 	#define SAMPLE_2D_LINEAR(T,uv)      T.Sample(smpLinear,      uv)
 	#define SAMPLE_2D_LINEAR_WRAP(T,uv) T.Sample(smpLinearWrap,  uv)
-	#define SAMPLE_2D_CMP(T,uv)         T.SampleCmp(smpCmp,      uv.xy, uv.z)
+	#define SAMPLE_SHADOW(T,uv)         T.SampleCmpLevelZero(smpCmp, uv.xy/uv.w, uv.z/uv.w)
 	#define SAMPLE_2D_LOD0(T,uv)        T.SampleLevel(smpLinear, uv, 0)
 	#define SAMPLE_3D(T,uv)             T.SampleLevel(smpLinearWrap, uv, 0)
-	#define SAMPLE_CUBE(T,uv)           T.Sample(smpLinear,      uv)	
+	#define SAMPLE_CUBE(T,uv)           T.Sample(smpLinear,      uv)
 
 	#define POSITION    SV_POSITION
 #else
@@ -95,7 +95,13 @@ struct VS_INPUT {
 	#define SAMPLE_2D_LINEAR(T,uv)      tex2D(T, uv)
 	#define SAMPLE_2D_LINEAR_WRAP(T,uv) tex2D(T, uv)
 	#define SAMPLE_2D_LOD0(T,uv)        tex2Dlod(T, float4(uv.xy, 0, 0))
-	#define SAMPLE_2D_CMP(T,uv)         ((tex2D(T, uv.xy) => uv.z) ? 1 : 0)
+
+	#if defined(_GAPI_GXM)
+		#define SAMPLE_SHADOW(T,uv)     f1tex2Dproj(T, uv)
+	#else
+		#define SAMPLE_SHADOW(T,uv)     ((tex2D(T, uv.xy/uv.w) => uv.z/uv.w) ? 1 : 0)
+	#endif
+
 	#define SAMPLE_3D(T,uv)             tex3D(T, uv)
 	#define SAMPLE_CUBE(T,uv)           texCUBE(T, uv)
 
@@ -122,7 +128,7 @@ float4      uContacts[MAX_CONTACTS] : register( c98 );
 // options for compose, shadow, ambient passes
 #ifdef _GAPI_GXM
 	//#define OPT_AMBIENT
-	//#define OPT_SHADOW
+	#define OPT_SHADOW
 	//#define OPT_CONTACT
 	//#define OPT_CAUSTICS
 #else
@@ -220,16 +226,10 @@ float getShadowValue(float3 lightVec, float4 lightProj) {
 */
 	float factor = step(0.0, lightProj.w); //float((sMin > 0.0f) && (sMax < lightProj.w)); // 
 	lightProj.xyz *= factor;
-	lightProj.xyz /= lightProj.w;
-	lightProj.z -= SHADOW_CONST_BIAS * SHADOW_TEXEL;
+	lightProj.z -= SHADOW_CONST_BIAS * SHADOW_TEXEL * lightProj.w;
 
-#ifdef _GAPI_GXM
-	float rShadow = f1tex2Dproj(sShadow, lightProj);
-#elif _GAPI_D3D11
-	float rShadow = sShadow.SampleCmpLevelZero(smpCmp, lightProj.xy, lightProj.z);
-#else
-	float rShadow = 1.0;
-#endif
+
+	float rShadow = SAMPLE_SHADOW(sShadow, lightProj);
 
 	float fade = saturate(dot(lightVec, lightVec));
 	return rShadow + (1.0 - rShadow) * fade;
diff --git a/src/shaders/compose_mirror.hlsl b/src/shaders/compose_mirror.hlsl
index df1a487..aeeb7ea 100644
--- a/src/shaders/compose_mirror.hlsl
+++ b/src/shaders/compose_mirror.hlsl
@@ -22,9 +22,9 @@ VS_OUTPUT main(VS_INPUT In) {
 
 	Out.normal.xyz = mulQuat(rBasisRot, normalize(In.aNormal.xyz));
 	Out.normal.w = saturate(1.0 / exp(length(Out.viewVec.xyz)));
-	
+
 	Out.pos = mul(uViewProj, float4(coord, rBasisPos.w));
-	
+
 	return Out;
 }
 
@@ -35,9 +35,9 @@ float4 main(VS_OUTPUT In) : COLOR0 {
 	float4 color = SAMPLE_CUBE(sDiffuse, normalize(rv));
 
 	color *= uMaterial;
-    color.xyz = saturate(color.xyz);
-    
-    applyFog(color.xyz, In.normal.w);
+	color.xyz = saturate(color.xyz);
+
+	applyFog(color.xyz, In.normal.w);
 
 	return color;
 }
diff --git a/src/sound.h b/src/sound.h
index e7aa94e..db60d57 100644
--- a/src/sound.h
+++ b/src/sound.h
@@ -306,20 +306,21 @@ namespace Sound {
     };
 
 #ifdef DECODE_ADPCM
-    struct ADPCM : Decoder { // https://wiki.multimedia.cx/?title=Microsoft_ADPCM
+    struct ADPCM : Decoder // https://wiki.multimedia.cx/?title=Microsoft_ADPCM
+    {
         int size, block;
 
-        ADPCM(Stream *stream, int channels, int freq, int size, int block) : Decoder(stream, channels, freq), size(size), block(block) {}
-
-        struct Channel {
+        struct Channel
+        {
             int16 c1, c2;
             int16 delta;
             int16 sample1;
             int16 sample2;
 
-            int predicate(uint8 nibble) {
+            int predicate(uint8 nibble)
+            {
                 static const int table[] = { 230, 230, 230, 230, 307, 409, 512, 614, 768, 614, 512, 409, 307, 230, 230, 230 };
-    
+
                 int8 ns = nibble;
                 if (ns & 8) ns -= 16;
 
@@ -332,15 +333,23 @@ namespace Sound {
             }
         } channel[2];
 
-        virtual int decode(Frame *frames, int count) {
+        ADPCM(Stream *stream, int channels, int freq, int size, int block) : Decoder(stream, channels, freq), size(size), block(block)
+        {
+            memset(channel, 0, sizeof(channel));
+        }
+
+        virtual int decode(Frame *frames, int count)
+        {
             static const int coeff1[] = { 256, 512, 0, 192, 240, 460, 392 };
             static const int coeff2[] = { 0, -256, 0, 64, 0, -208, -232 };
             
             int seek = stream->pos - offset;
             if (seek >= size) return 0;
 
-            if (seek % block == 0) {
-                for (int i = 0; i < channels; i++) {
+            if (seek % block == 0)
+            {
+                for (int i = 0; i < channels; i++)
+                {
                     uint8 index;
                     stream->read(index);
                     channel[i].c1 = coeff1[index];
@@ -350,8 +359,10 @@ namespace Sound {
                 for (int i = 0; i < channels; i++) stream->read(channel[i].sample1);
                 for (int i = 0; i < channels; i++) stream->read(channel[i].sample2);
 
-                if (channels == 1) {
-                    if (freq == 22050) {
+                if (channels == 1)
+                {
+                    if (freq == 22050)
+                    {
                         ASSERT(count >= 4);
                         frames[0].L = frames[0].R =
                         frames[1].L = frames[1].R = channel[0].sample2;
@@ -378,8 +389,10 @@ namespace Sound {
                 stream->read(value);
                 uint8 n1 = value >> 4, n2 = value & 0xF;
 
-                if (channels == 1) {
-                    if (freq == 22050) {
+                if (channels == 1)
+                {
+                    if (freq == 22050)
+                    {
                         ASSERT(count >= 4);
                         frames[0].L = frames[0].R =
                         frames[1].L = frames[1].R = channel[0].predicate(n1);
@@ -459,8 +472,8 @@ namespace Sound {
             uint8 n;
             stream->read(n);
 
-            int a = getSample(n >> 4,   state[0]);
-            int b = getSample(n & 0x0F, state[1 % channels]);
+            int a = getSample(n >> 4, state[0]);
+            int b = getSample(n,      state[1 % channels]);
 
             Frame frame;
             if (channels == 2) {
@@ -484,7 +497,9 @@ namespace Sound {
         Frame buffer[28 * 4];
         int bufferSize;
 
-        VAG(Stream *stream) : Decoder(stream, 1, 11025), s1(0), s2(0), bufferSize(0) {}
+        VAG(Stream *stream) : Decoder(stream, 1, 11025), s1(0), s2(0), bufferSize(0) {
+            memset(buffer, 0, sizeof(buffer));
+        }
 
         void predicate(short value) {
             int s = (s1 * SPU_POS[pred] + s2 * SPU_NEG[pred]) >> 6;
@@ -551,29 +566,34 @@ namespace Sound {
 
 #ifdef DECODE_XA
     // http://problemkaputt.de/psx-spx.htm#cdromxaaudioadpcmcompression
-    struct XA : Decoder {
-        uint8 pred, shift, flags;
-        int s1, s2;
+    struct XA : Decoder
+    {
+        typedef bool (Callback)(void* userData);
 
-        Frame buffer[18 * 112];
-        int   pos;
-
-        struct Group {
+        struct Group
+        {
             uint8 params[16];
             uint8 data[112];
-        } groups[18];
+        };
 
-        Frame  prevFrames[2];
+        Frame     buffer[18 * 112];
+        Frame     prevFrames[2];
+        Frame     lerpFrames[32];
 
-        Frame  lerpFrames[32];
-        uint32 lerpPos;
+        int32     pos;
+        int32     lerpPos;
+        int32     frameIndex;
+        void*     userData;
+        Callback* nextSectorCallback;
 
-        XA(Stream *stream) : Decoder(stream, 1, 11025), s1(0), s2(0), pos(COUNT(buffer)), lerpPos(0) {
+        XA(void* userData, Callback* nextSectorCallback) : Decoder(NULL, 1, 11025), pos(COUNT(buffer)), lerpPos(0), frameIndex(7), userData(userData), nextSectorCallback(nextSectorCallback)
+        {
             memset(prevFrames, 0, sizeof(prevFrames));
             memset(lerpFrames, 0, sizeof(lerpFrames));
         }
 
-        void decode28(Group &group, int block, int channel) {
+        void decode28(Group &group, int block, int channel)
+        {
             int16 *dst   = channel ? &buffer[pos].R  : &buffer[pos].L;
             int16 &old   = channel ? prevFrames[0].R : prevFrames[0].L; 
             int16 &older = channel ? prevFrames[1].R : prevFrames[1].L; 
@@ -584,10 +604,10 @@ namespace Sound {
             int f0 = SPU_POS[filter];
             int f1 = SPU_NEG[filter];
 
-            for (int i = 0; i < 28; i++) {
+            for (int i = 0; i < 28; i++)
+            {
                 int t = (group.data[block + i * 4] >> (channel * 4)) & 0x0F;
-                if (t & 8) 
-                    t -= 16;
+                if (t & 8) t -= 16;
                 int s = (t << shift) + ((old * f0 + older * f1 + 32) / 64);
                 s = clamp(s, -32768, 32767);
                 older  = old;
@@ -597,30 +617,33 @@ namespace Sound {
             }
         }
 
-        void processBlock() {
-            if (stream->pos >= stream->size)
-                return;
-
-            stream->raw(groups, sizeof(groups));
+        void processSector(void* data)
+        {
+            Group* groups = (Group*)data;
 
             pos = 0;
 
-            for (int i = 0; i < COUNT(groups); i++)
-                for (int j = 0; j < 4; j++) {
+            for (int i = 0; i < 18; i++)
+            {
+                for (int j = 0; j < 4; j++)
+                {
                     decode28(groups[i], j, 0);
                     decode28(groups[i], j, 1);
                     pos += 28;
                 }
+            }
 
             pos = 0;
         }
 
-        void ZigZagOut(Frame &frame, uint8 p, const int16 *LUT) {
+        void ZigZagOut(Frame &frame, int32 p, const int16 *LUT)
+        {
             FrameHI sum;
             sum.L = sum.R = 0;
 
-            for (uint8 i = 1; i < 30; i++) {
-                Frame &f = lerpFrames[uint8(p - i) & 0x1F];
+            for (int32 i = 1; i < 30; i++)
+            {
+                Frame &f = lerpFrames[(p - i) & 31];
                 sum.L += f.L * LUT[i];
                 sum.R += f.R * LUT[i];
             }
@@ -630,40 +653,43 @@ namespace Sound {
         }
 
         virtual int decode(Frame *frames, int count) {
-            if (pos >= COUNT(buffer))
-                processBlock();
-
-            ASSERT((int(COUNT(buffer)) - pos) % 6 == 0)
-            ASSERT(count % 7 == 0)
-
-            count = min(count, (int(COUNT(buffer)) - pos) / 6 * 7);
-
+        #if _OS_PSV // TODO crash
+            memset(frames, 0, count * sizeof(Frame));
+            return count;
+        #endif
             int i = 0;
+
             while (i < count) {
-                ASSERT(pos < COUNT(buffer));
-                lerpFrames[lerpPos++ & 0x1F] = buffer[pos++];
-                lerpFrames[lerpPos++ & 0x1F] = buffer[pos++];
-                lerpFrames[lerpPos++ & 0x1F] = buffer[pos++];
-                lerpFrames[lerpPos++ & 0x1F] = buffer[pos++];
-                lerpFrames[lerpPos++ & 0x1F] = buffer[pos++];
-                lerpFrames[lerpPos++ & 0x1F] = buffer[pos++];
-                ZigZagOut(frames[i++], lerpPos, SPU_ZIG_ZAG[0]);
-                ZigZagOut(frames[i++], lerpPos, SPU_ZIG_ZAG[1]);
-                ZigZagOut(frames[i++], lerpPos, SPU_ZIG_ZAG[2]);
-                ZigZagOut(frames[i++], lerpPos, SPU_ZIG_ZAG[3]);
-                ZigZagOut(frames[i++], lerpPos, SPU_ZIG_ZAG[4]);
-                ZigZagOut(frames[i++], lerpPos, SPU_ZIG_ZAG[5]);
-                ZigZagOut(frames[i++], lerpPos, SPU_ZIG_ZAG[6]);
+                if (frameIndex == 7) {
+                    frameIndex = 0;
+
+                    if (pos >= COUNT(buffer)) {
+                        if (!nextSectorCallback || !nextSectorCallback(userData)) {
+                            break;
+                        }
+                    }
+
+                    ASSERT(pos <= (COUNT(buffer) - 6));
+
+                    lerpFrames[lerpPos++ & 31] = buffer[pos++];
+                    lerpFrames[lerpPos++ & 31] = buffer[pos++];
+                    lerpFrames[lerpPos++ & 31] = buffer[pos++];
+                    lerpFrames[lerpPos++ & 31] = buffer[pos++];
+                    lerpFrames[lerpPos++ & 31] = buffer[pos++];
+                    lerpFrames[lerpPos++ & 31] = buffer[pos++];
+                } else {
+                    ZigZagOut(frames[i++], lerpPos, SPU_ZIG_ZAG[frameIndex++]);
+                }
             }
 
-            ASSERT(i == count);
-
-            return count;
+            return i;
         }
 
-        virtual void replay() {
-            stream->setPos(0);
-            s1 = s2 = 0;
+        virtual void replay()
+        {
+            pos = COUNT(buffer);
+            lerpPos = 0;
+            frameIndex = 7;
         }
     };
 #endif
@@ -807,14 +833,17 @@ namespace Sound {
 
     Core::Mutex lock;
 
-    struct Listener {
+    struct Listener
+    {
         mat4 matrix;
         bool underwater;
-    } listener[2];
+    };
 
-    int listenersCount;
+    Listener listener[2];
+    int      listenersCount;
 
-    Listener& getListener(const vec3 &pos) {
+    Listener& getListener(const vec3 &pos)
+    {
         if (listenersCount == 1 || (listener[0].matrix.getPos() - pos).length2() < (listener[1].matrix.getPos() - pos).length2())
             return listener[0];
         return listener[1];
@@ -832,7 +861,8 @@ namespace Sound {
 
     bool flipped;
 
-    struct Sample {
+    struct Sample
+    {
         const vec3 *uniquePtr;
         Decoder *decoder;
         vec3    pos;
@@ -846,19 +876,22 @@ namespace Sound {
         bool    isPaused;
         bool    stopAfterFade;
 
-        Sample(Decoder *decoder, float volume, float pitch, int flags, int id) : uniquePtr(NULL), decoder(decoder), volume(volume), volumeTarget(volume), volumeDelta(0.0f), pitch(pitch), flags(flags), id(id) {
+        Sample(Decoder *decoder, float volume, float pitch, int flags, int id) : uniquePtr(NULL), decoder(decoder), volume(volume), volumeTarget(volume), volumeDelta(0.0f), pitch(pitch), flags(flags), id(id)
+        {
             isPlaying = decoder != NULL;
             isPaused  = false;
+            stopAfterFade = true;
         }
 
-        Sample(Stream *stream, const vec3 *pos, float volume, float pitch, int flags, int id) : uniquePtr(pos), decoder(NULL), volume(volume), volumeTarget(volume), volumeDelta(0.0f), pitch(pitch), flags(flags), id(id) {
+        Sample(Stream *stream, const vec3 *pos, float volume, float pitch, int flags, int id) : uniquePtr(pos), decoder(NULL), volume(volume), volumeTarget(volume), volumeDelta(0.0f), pitch(pitch), flags(flags), id(id)
+        {
             this->pos = pos ? *pos : vec3(0.0f);
 
         #ifndef NO_SOUND
             uint32 fourcc;
             stream->read(fourcc);
-            if (fourcc == FOURCC("RIFF")) { // wav
-
+            if (fourcc == FOURCC("RIFF")) // wav
+            {
                 struct {
                     uint16  format;
                     uint16  channels;
@@ -866,7 +899,7 @@ namespace Sound {
                     uint32  bytesPerSec;
                     uint16  block;
                     uint16  sampleBits;
-                } waveFmt;
+                } waveFmt = {};
 
                 stream->seek(8);
                 while (stream->pos < stream->size) {
@@ -878,29 +911,26 @@ namespace Sound {
                         stream->seek(size - sizeof(waveFmt));
                     } else if (type == FOURCC("data")) {
                         if (waveFmt.format == 1) decoder = new PCM(stream, waveFmt.channels, waveFmt.samplesPerSec, size, waveFmt.sampleBits);
-                        #ifdef DECODE_ADPCM
+                    #ifdef DECODE_ADPCM
                         if (waveFmt.format == 2) decoder = new ADPCM(stream, waveFmt.channels, waveFmt.samplesPerSec, size, waveFmt.block);
-                        #endif
+                    #endif
                         break;
-                    } else
+                    } else {
                         stream->seek(size);
+                    }
                 }
-            } 
-            else if (fourcc == FOURCC("OggS")) { // ogg
+            } else if (fourcc == FOURCC("OggS")) { // ogg
                 stream->seek(-4);
                 #ifdef DECODE_OGG
                     decoder = new OGG(stream, 2);
                 #endif 
-            }
-            else if (fourcc == FOURCC("ID3\3")) { // mp3
+            } else if (fourcc == FOURCC("ID3\3")) { // mp3
                 #ifdef DECODE_MP3
                     decoder = new MP3(stream, 2);
                 #endif
-            }
-            else if (fourcc == FOURCC("SEGA")) { // Sega Saturn PCM mono signed 8-bit 11025 Hz
+            } else if (fourcc == FOURCC("SEGA")) { // Sega Saturn PCM mono signed 8-bit 11025 Hz
                 decoder = new PCM(stream, 1, 11025, stream->size, -8);
-            }
-            else { // vag
+            } else { // vag
                 stream->setPos(0);
                 #ifdef DECODE_VAG
                     decoder = new VAG(stream);
@@ -909,32 +939,41 @@ namespace Sound {
         #endif
 
             if (!decoder)
+            {
                 delete stream;
+            }
 
             isPlaying = decoder != NULL;
             isPaused  = false;
         }
 
-        ~Sample() {
+        ~Sample()
+        {
             delete decoder;
         }
 
-        void setVolume(float value, float time) {
+        void setVolume(float value, float time)
+        {
             if (value < 0.0f) {
                 stopAfterFade = true;
                 value = 0.0f;
-            } else
+            } else {
                 stopAfterFade = false;
+            }
 
             volumeTarget = value;
             volumeDelta  = volumeTarget - volume;
+
             if (time > 0.0f)
+            {
                 volumeDelta /= 44100.0f * time;
+            }
         }
 
-        vec2 getPan() {
-            if (!(flags & PAN))
-                return vec2(1.0f);
+        vec2 getPan()
+        {
+            if (!(flags & PAN)) return vec2(1.0f);
+
             mat4  m = Sound::getListener(pos).matrix;
             vec3  v = pos - m.offset().xyz();
             vec3  n = v.normal();
@@ -949,27 +988,35 @@ namespace Sound {
             return (value * SND_PAN_FACTOR + (1.0f - SND_PAN_FACTOR)) * facing * dist;
         }
 
-        bool render(Frame *frames, int count) {
+        bool render(Frame *frames, int count)
+        {
             if (!isPlaying) return false;
 
-            if (isPaused) {
+            if (isPaused)
+            {
                 memset(frames, 0, sizeof(Frame) * count);
                 return true;
             }
 
         // decode
             int i = 0;
-            while (i < count) {
-                int res = decoder->decode(&frames[i], count - i);
-                if (res == 0) {
-                    if (!(flags & LOOP)) {
+            while (i < count)
+            {
+                int ret = decoder->decode(&frames[i], count - i);
+
+                if (ret == 0)
+                {
+                    if (!(flags & LOOP))
+                    {
                         isPlaying = false;
                         break;
-                    } else
-                        decoder->replay();
+                    }
+                    decoder->replay();
                 }
-                i += res;
+
+                i += ret;
             }
+
         // apply volume
             #define VOL_CONV(x) (1.0f - sqrtf(1.0f - x * x));
 
@@ -977,19 +1024,27 @@ namespace Sound {
             float v = volume * m;
             vec2 pan = getPan();
             vec2 vol = pan * VOL_CONV(v);
-            for (int j = 0; j < i; j++) {
-                if (volumeDelta != 0.0f) { // increase / decrease channel volume
+            for (int j = 0; j < i; j++)
+            {
+                if (volumeDelta != 0.0f) // increase / decrease channel volume
+                {
                     volume += volumeDelta;
+
                     if ((volumeDelta < 0.0f && volume < volumeTarget) ||
-                        (volumeDelta > 0.0f && volume > volumeTarget)) {
+                        (volumeDelta > 0.0f && volume > volumeTarget))
+                    {
                         volume = volumeTarget;
                         volumeDelta = 0.0f;
                         if (stopAfterFade)
+                        {
                             isPlaying = false;
+                        }
                     }
+
                     v   = volume * m;
                     vol = pan * VOL_CONV(v);
                 }
+
                 frames[j].L = int(frames[j].L * vol.x);
                 frames[j].R = int(frames[j].R * vol.y);
             }
@@ -998,19 +1053,23 @@ namespace Sound {
             return true;
         }
 
-        void stop() {
+        void stop()
+        {
             isPlaying = false;
         }
 
-        void replay() {
+        void replay()
+        {
             decoder->replay();
         }
 
-        void pause() {
+        void pause()
+        {
             isPaused = true;
         }
 
-        void resume() {
+        void resume()
+        {
             isPaused = false;
         }
     } *channels[SND_CHANNELS_MAX];
@@ -1026,7 +1085,8 @@ namespace Sound {
     Filter::Reverberation reverb;
     Filter::LowPass       lowPass;
 
-    void init() {
+    void init()
+    {
         flipped = false;
         channelsCount = 0;
         callback = NULL;
@@ -1037,9 +1097,12 @@ namespace Sound {
     #endif
     }
 
-    void deinit() {
+    void deinit()
+    {
         for (int i = 0; i < channelsCount; i++)
+        {
             delete channels[i];
+        }
     #ifdef DECODE_MP3
         mp3_decode_free();
     #endif
@@ -1047,39 +1110,52 @@ namespace Sound {
         delete[] result;
     }
 
-    void renderChannels(FrameHI *result, int count, bool music) {
+    void renderChannels(FrameHI *result, int count, bool music)
+    {
         PROFILE_CPU_TIMING(stats.render[music]);
 
         int bufSize = count + count / 2 + 4;
-        if (!buffer) buffer = new Frame[bufSize]; // + 50% for pitch
+        if (!buffer) {
+            buffer = new Frame[bufSize]; // + 50% for pitch
+        }
 
-        for (int i = 0; i < channelsCount; i++) {
-            if (music != ((channels[i]->flags & MUSIC) != 0))
+        for (int i = 0; i < channelsCount; i++)
+        {
+            if (music != ((channels[i]->flags & MUSIC) != 0)) {
                 continue;
-            
-            if (channels[i]->flags & (FLIPPED | UNFLIPPED)) {
-                if (!(channels[i]->flags & (flipped ? FLIPPED : UNFLIPPED)))
-                    continue;
-
-                vec3 d = channels[i]->pos - getListener(channels[i]->pos).matrix.getPos();
-                if (fabsf(d.x) > SND_FADEOFF_DIST || fabsf(d.y) > SND_FADEOFF_DIST || fabsf(d.z) > SND_FADEOFF_DIST)
-                    continue;
             }
 
-            if ((channels[i]->flags & LOOP) && channels[i]->volume < EPS && channels[i]->volumeTarget < EPS)
+            if (channels[i]->flags & (FLIPPED | UNFLIPPED)) {
+                if (!(channels[i]->flags & (flipped ? FLIPPED : UNFLIPPED))) {
+                    continue;
+                }
+
+                vec3 d = channels[i]->pos - getListener(channels[i]->pos).matrix.getPos();
+                if (fabsf(d.x) > SND_FADEOFF_DIST || fabsf(d.y) > SND_FADEOFF_DIST || fabsf(d.z) > SND_FADEOFF_DIST) {
+                    continue;
+                }
+            }
+
+            if ((channels[i]->flags & LOOP) && channels[i]->volume < EPS && channels[i]->volumeTarget < EPS) {
                 continue;
+            }
 
             memset(buffer, 0, sizeof(Frame) * bufSize);
             channels[i]->render(buffer, (int(count * channels[i]->pitch) + 3) / 4 * 4);
 
             if (channels[i]->pitch == 1.0f) { // no pitch
-                for (int j = 0; j < count; j++) {
+
+                for (int j = 0; j < count; j++)
+                {
                     result[j].L += buffer[j].L;
                     result[j].R += buffer[j].R;
                 }
+
             } else { // has pitch (interpolate values for smooth wave)
                 float t = 0.0f;
-                for (int j = 0; j < count; j++, t += channels[i]->pitch) {
+
+                for (int j = 0; j < count; j++, t += channels[i]->pitch)
+                {
                     int idxA = int(t);
                     int idxB = (j == (count - 1)) ? idxA : (idxA + 1);
                     int st = int((t - idxA) * DSP_SCALE);
@@ -1089,67 +1165,95 @@ namespace Sound {
                     result[j].L += a.L + ((b.L - a.L) * st >> DSP_SCALE_BIT);
                     result[j].R += a.R + ((b.R - a.R) * st >> DSP_SCALE_BIT);
                 }
+
             }
         }
     }
 
-    void convFrames(FrameHI *from, Frame *to, int count) {
-        for (int i = 0; i < count; i++) {
+    void convFrames(FrameHI *from, Frame *to, int count)
+    {
+        for (int i = 0; i < count; i++)
+        {
             to[i].L = clamp(from[i].L, -32767, 32767);
             to[i].R = clamp(from[i].R, -32767, 32767);
         }
     }
 
-    void fill(Frame *frames, int count) {
+    void fill(Frame *frames, int count)
+    {
         OS_LOCK(lock);
         PROFILE_CPU_TIMING(stats.mixer);
 
         if (!channelsCount) {
             if (result && (Core::settings.audio.music != 0 || Core::settings.audio.sound != 0)) {
                 memset(result, 0, sizeof(FrameHI) * count);
+
                 if (Core::settings.audio.reverb)
+                {
                     reverb.process(result, count);
+                }
+
                 convFrames(result, frames, count);
-            } else
+            } else {
                 memset(frames, 0, sizeof(frames[0]) * count);
+            }
             return;
         }
 
-        if (!result) result = new FrameHI[count];
+        if (!result)
+        {
+            result = new FrameHI[count];
+        }
+
         memset(result, 0, sizeof(FrameHI) * count);
 
-        if (Core::settings.audio.sound != 0) {
+        if (Core::settings.audio.sound != 0)
+        {
             renderChannels(result, count, false);
 
-            if (Core::settings.audio.reverb) {
-                if (listener[0].underwater) {
+            if (Core::settings.audio.reverb)
+            {
+                if (listener[0].underwater)
+                {
                     lowPass.process(result, count, SND_LOWPASS_FREQ);
                 }
                 reverb.process(result, count);
             }
         }
 
-        if (Core::settings.audio.music != 0) {
+        if (Core::settings.audio.music != 0)
+        {
             renderChannels(result, count, true);
         }
 
         convFrames(result, frames, count);
 
-        for (int i = 0; i < channelsCount; i++) 
-            if (!channels[i]->isPlaying) {
-                if (callback) callback(channels[i]);
+        for (int i = 0; i < channelsCount; i++)
+        {
+            if (!channels[i]->isPlaying)
+            {
+                if (callback)
+                {
+                    callback(channels[i]);
+                }
+
                 delete channels[i];
                 channels[i] = channels[--channelsCount];
                 i--;
             }
+        }
     }
 
-    Stream *openCDAudioWAD(const char *name, int index = -1) {
+    Stream *openCDAudioWAD(const char *name, int index = -1)
+    {
         if (!Stream::existsContent(name))
+        {
             return NULL;
+        }
 
         Stream *stream = new Stream(name);
-        if (stream->size) {
+        if (stream->size)
+        {
             struct Item {
                 char name[260];
                 int  size;
@@ -1165,21 +1269,30 @@ namespace Sound {
         return NULL;
     }
 
-    Stream *openCDAudioMP3(const char *dat, const char *name, int index = -1) {
+    Stream *openCDAudioMP3(const char *dat, const char *name, int index = -1)
+    {
         if (!Stream::existsContent(dat) || !Stream::existsContent(name))
+        {
             return NULL;
+        }
         Stream *stream = new Stream(name);
         return stream;
     }
 
-    Sample* getChannel(int id, const vec3 *pos) {
+    Sample* getChannel(int id, const vec3 *pos)
+    {
         for (int i = 0; i < channelsCount; i++)
+        {
             if (channels[i]->id == id && channels[i]->uniquePtr == pos)
+            {
                 return channels[i];
+            }
+        }
         return NULL;
     }
 
-    Sample* play(Stream *stream, const vec3 *pos = NULL, float volume = 1.0f, float pitch = 0.0f, int flags = 0, int id = - 1) {
+    Sample* play(Stream *stream, const vec3 *pos = NULL, float volume = 1.0f, float pitch = 0.0f, int flags = 0, int id = - 1)
+    {
     #ifndef NO_SOUND
         OS_LOCK(lock);
 
@@ -1196,16 +1309,23 @@ namespace Sound {
                 }
             }
 
-            if (flags & (UNIQUE | REPLAY)) {
+            if (flags & (UNIQUE | REPLAY))
+            {
                 Sample *ch = getChannel(id, pos);
 
-                if (ch) {
+                if (ch)
+                {
                     if (pos)
+                    {
                         ch->pos = *pos;
+                    }
+
                     ch->pitch = pitch;
 
                     if (flags & REPLAY)
+                    {
                         ch->replay();
+                    }
 
                     delete stream;
                     return ch;
@@ -1213,7 +1333,9 @@ namespace Sound {
             }
 
             if (channelsCount < SND_CHANNELS_MAX)
+            {
                 return channels[channelsCount++] = new Sample(stream, pos, volume, pitch, flags, id);
+            }
 
             LOG("! no free channels\n");
         }
@@ -1222,28 +1344,39 @@ namespace Sound {
         return NULL;
     }
 
-    Sample* play(Decoder *decoder) {
+    Sample* play(Decoder *decoder)
+    {
         OS_LOCK(lock);
 
         if (channelsCount < SND_CHANNELS_MAX)
+        {
             return channels[channelsCount++] = new Sample(decoder, 1.0f, 1.0f, MUSIC, -1);
+        }
         return NULL;
     }
 
-    void stop(int id = -1) {
+    void stop(int id = -1)
+    {
         OS_LOCK(lock);
 
         for (int i = 0; i < channelsCount; i++)
+        {
             if (id == -1 || channels[i]->id == id)
+            {
                 channels[i]->stop();
+            }
+        }
     }
 
-    void stopAll() {
+    void stopAll()
+    {
         OS_LOCK(lock);
         reverb.clear();
 
         for (int i = 0; i < channelsCount; i++)
+        {
             delete channels[i];
+        }
         channelsCount = 0;
     }
 }
diff --git a/src/ui.h b/src/ui.h
index 6a24cf4..73fd1c2 100644
--- a/src/ui.h
+++ b/src/ui.h
@@ -9,7 +9,7 @@
 #define SUBTITLES_SPEED  0.1f
 #define TEXT_LINE_HEIGHT 18
 
-#if defined(_OS_PSV) || defined(_OS_TNS)
+#if defined(_OS_TNS)
     #define UI_SHOW_FPS
 #endif
 
@@ -768,6 +768,7 @@ namespace UI {
 
         game->setShader(Core::passCompose, Shader::ENTITY, false, false);
         Core::setMaterial(1.0f, 0.0f, 0.0f, 1.0f);
+        Core::setFog(FOG_NONE);
 
         vec4 o = vec4(offset, 0.0f);
 
diff --git a/src/video.h b/src/video.h
index 552fcb7..53c1843 100644
--- a/src/video.h
+++ b/src/video.h
@@ -134,7 +134,7 @@ static const AC_ENTRY STR_AC[] = {
     { 0X3E , 27 , 1  , 17 }, // 00111110
 };
 
-static const uint8 STR_ZIG_ZAG[] = {
+static const uint8 STR_ZSCAN[] = {
      0,  1,  8, 16,  9,  2,  3, 10,
     17, 24, 32, 25, 18, 11,  4,  5,
     12, 19, 26, 33, 40, 48, 41, 34,
@@ -145,25 +145,17 @@ static const uint8 STR_ZIG_ZAG[] = {
     53, 60, 61, 54, 47, 55, 62, 63,
 };
 
-static const uint8 STR_QUANTIZATION[] = {
-     2, 16, 19, 22, 26, 27, 29, 34,
-    16, 16, 22, 24, 27, 29, 34, 37,
-    19, 22, 26, 27, 29, 34, 34, 38,
-    22, 22, 26, 27, 29, 34, 37, 40,
-    22, 26, 27, 29, 32, 35, 40, 48,
-    26, 27, 29, 32, 35, 40, 48, 58,
-    26, 27, 29, 34, 38, 46, 56, 69,
-    27, 29, 35, 38, 46, 56, 69, 83
+static const int32 STR_QTABLE[] = {
+    0x00020000, 0x00163150, 0x00163150, 0x0018D321, 0x001EC830, 0x0018D321, 0x0019DE84, 0x0027DEA0,
+    0x0027DEA0, 0x0019DE84, 0x00160000, 0x0023E1B0, 0x002C6282, 0x002724C0, 0x001A0000, 0x001536B1,
+    0x00257337, 0x00297B55, 0x0027F206, 0x00241022, 0x00146D8E, 0x000E1238, 0x001D6CAF, 0x002346F9,
+    0x00255528, 0x0025E3EF, 0x001F9AA9, 0x000FB1DC, 0x00096162, 0x001985B6, 0x0022E73A, 0x002219AE,
+    0x002219AE, 0x001DC539, 0x00144489, 0x000772FB, 0x000B1918, 0x00148191, 0x001D9060, 0x00200000,
+    0x001F6966, 0x00180AAA, 0x000E28D8, 0x000DB2B0, 0x00178BD2, 0x001B7FC9, 0x001B7FC9, 0x0015A314,
+    0x000C9DD8, 0x000C53EE, 0x001490C8, 0x0018B140, 0x0015A5E0, 0x000CFA08, 0x000D3E30, 0x00146910,
+    0x00138F5A, 0x000CB0EE, 0x000C2390, 0x001066E8, 0x000C928C, 0x000A4DA2, 0x000A4DA2, 0x00065187,
 };
 
-#define STR_IDCT_A 23170
-#define STR_IDCT_B 32138
-#define STR_IDCT_C 27245
-#define STR_IDCT_D 18204
-#define STR_IDCT_E 6392
-#define STR_IDCT_F 30273
-#define STR_IDCT_G 12539
-
 struct Video {
 
     struct Decoder : Sound::Decoder {
@@ -766,12 +758,13 @@ struct Video {
             uint16 chunksCount;
             uint32 frameIndex;
             uint32 chunkSize;
-            uint16 width, height;
+            uint16 width;
+            uint16 height;
             uint16 blocks;
-            uint16 unk1;
+            uint16 unk3800;
             uint16 qscale;
             uint16 version;
-            uint32 unk2;
+            uint32 unk00000000;
         };
 
         struct VideoChunk {
@@ -800,7 +793,7 @@ struct Video {
         int   curVideoChunk;
         int   curAudioChunk;
 
-        Sound::Decoder *audioDecoder;
+        Sound::XA *audioDecoder;
 
         struct {
             uint8 code;
@@ -809,9 +802,13 @@ struct Video {
 
         bool hasSyncHeader;
 
-        STR(Stream *stream) : Decoder(stream), videoChunksCount(0), audioChunksCount(0), curVideoChunk(-1), curAudioChunk(-1), audioDecoder(NULL) {
+        STR(Stream *stream) : Decoder(stream), videoChunksCount(0), audioChunksCount(0), curVideoChunk(-1), curAudioChunk(-1), audioDecoder(NULL)
+        {
+            memset(videoChunks, 0, sizeof(videoChunks));
+            memset(audioChunks, 0, sizeof(audioChunks));
 
-            if (stream->pos >= stream->size) {
+            if (stream->pos >= stream->size)
+            {
                 LOG("Can't load STR format \"%s\"\n", stream->name);
                 ASSERT(false);
                 return;
@@ -831,15 +828,11 @@ struct Video {
 
             hasSyncHeader = syncMagic[0] == 0xFFFFFF00 && syncMagic[1] == 0xFFFFFFFF && syncMagic[2] == 0x00FFFFFF;
             
-            if (!hasSyncHeader) {
+            if (!hasSyncHeader)
+            {
                 LOG("! No sync header found, please use jpsxdec tool to extract FMVs\n");
             }
 
-            for (int i = 0; i < MAX_CHUNKS; i++) {
-                videoChunks[i].size = 0;
-                audioChunks[i].size = 0;
-            }
-
             nextChunk();
 
             VideoChunk &chunk = videoChunks[0];
@@ -853,49 +846,63 @@ struct Video {
         #ifdef NO_SOUND
             audioDecoder = NULL;
         #else
-            audioDecoder = new Sound::XA(NULL);
+            audioDecoder = new Sound::XA(this, audioNextBlockCallback);
         #endif
         }
 
-        virtual ~STR() {
+        virtual ~STR()
+        {
             OS_LOCK(Sound::lock);
             audioDecoder->stream = NULL;
             delete audioDecoder;
         }
 
-        void buildLUT(uint8 *LUT, int start, int end, int shift) {
-            for (int i = start; i < end; i++) {
+        void buildLUT(uint8 *LUT, int start, int end, int shift)
+        {
+            for (int i = start; i < end; i++)
+            {
                 const AC_ENTRY &e = STR_AC[i];
                 uint8 trash = (1 << (8 + shift - e.length + 1));
             // fill the value and all possible endings
                 while (trash--)
+                {
                     LUT[e.code | trash] = i;
+                }
             }
         }
 
-        bool nextChunk() {
+        bool nextChunk()
+        {
             OS_LOCK(Sound::lock);
 
             if (videoChunks[videoChunksCount % MAX_CHUNKS].size > 0)
+            {
                 return false;
+            }
 
             if (stream->pos >= stream->size)
+            {
                 return false;
+            }
 
             bool readingVideo = false;
 
-            while (stream->pos < stream->size) {
-
+            while (stream->pos < stream->size)
+            {
                 if (hasSyncHeader)
+                {
                     stream->seek(24);
+                }
 
                 Sector sector;
                 stream->raw(&sector, sizeof(Sector));
 
-                if (sector.magic == MAGIC_STR) {
+                if (sector.magic == MAGIC_STR)
+                {
                     VideoChunk *chunk = videoChunks + (videoChunksCount % MAX_CHUNKS);
 
-                    if (sector.chunkIndex == 0) {
+                    if (sector.chunkIndex == 0)
+                    {
                         readingVideo  = true;
                         chunk->size   = 0;
                         chunk->width  = sector.width;
@@ -908,9 +915,12 @@ struct Video {
                     chunk->size += VIDEO_SECTOR_SIZE;
 
                     if (hasSyncHeader)
+                    {
                         stream->seek(280);
+                    }
 
-                    if (sector.chunkIndex == sector.chunksCount - 1) {
+                    if (sector.chunkIndex == sector.chunksCount - 1)
+                    {
                         videoChunksCount++;
                         return true;
                     }
@@ -924,19 +934,26 @@ struct Video {
                     stream->seek(24);
 
                     if (!hasSyncHeader)
+                    {
                         stream->seek(2048 - (AUDIO_SECTOR_SIZE + 24));
+                    }
 
                     if (!readingVideo)
+                    {
                         return true;
+                    }
                 };
             }
             return false;
         }
 
         // http://jpsxdec.blogspot.com/2011/06/decoding-mpeg-like-bitstreams.html
-        bool readCode(BitStream &bs, int16 &skip, int16 &ac) {
-            if (bs.readU(1)) {
-                if (bs.readU(1)) {
+        bool readCode(BitStream &bs, int16 &skip, int16 &ac)
+        {
+            if (bs.readU(1))
+            {
+                if (bs.readU(1))
+                {
                     skip = 0;
                     ac   = bs.readU(1) ? -1 : 1;
                     return true;
@@ -946,9 +963,12 @@ struct Video {
 
             int nz = 1;
             while (!bs.readU(1))
+            {
                 nz++;
+            }
 
-            if (nz == 5) { // escape code == 0b1000001
+            if (nz == 5) // escape code == 0b1000001
+            {
                 uint16 esc = bs.readU(16);
                 skip = esc >> 10;
                 ac   = esc & 0x3FF;
@@ -987,49 +1007,208 @@ struct Video {
             return true;
         }
 
-        void IDCT_PASS(int16 *src, int16 *dst, int32 x) { \
-            int32 a0 = src[0 * x] * STR_IDCT_A;
-            int32 b1 = src[1 * x] * STR_IDCT_B;
-            int32 c1 = src[1 * x] * STR_IDCT_C;
-            int32 d1 = src[1 * x] * STR_IDCT_D;
-            int32 e1 = src[1 * x] * STR_IDCT_E;
-            int32 f2 = src[2 * x] * STR_IDCT_F;
-            int32 g2 = src[2 * x] * STR_IDCT_G;
-            int32 b3 = src[3 * x] * STR_IDCT_B;
-            int32 c3 = src[3 * x] * STR_IDCT_C;
-            int32 d3 = src[3 * x] * STR_IDCT_D;
-            int32 e3 = src[3 * x] * STR_IDCT_E;
-            int32 a4 = src[4 * x] * STR_IDCT_A;
-            int32 b5 = src[5 * x] * STR_IDCT_B;
-            int32 c5 = src[5 * x] * STR_IDCT_C;
-            int32 d5 = src[5 * x] * STR_IDCT_D;
-            int32 e5 = src[5 * x] * STR_IDCT_E;
-            int32 f6 = src[6 * x] * STR_IDCT_F;
-            int32 g6 = src[6 * x] * STR_IDCT_G;
-            int32 b7 = src[7 * x] * STR_IDCT_B;
-            int32 c7 = src[7 * x] * STR_IDCT_C;
-            int32 d7 = src[7 * x] * STR_IDCT_D;
-            int32 e7 = src[7 * x] * STR_IDCT_E;
-            dst[0 * x] = ( a0 + b1 + f2 + c3 + a4 + d5 + g6 + e7 ) >> 16;
-            dst[1 * x] = ( a0 + c1 + g2 - e3 - a4 - b5 - f6 - d7 ) >> 16;
-            dst[2 * x] = ( a0 + d1 - g2 - b3 - a4 + e5 + f6 + c7 ) >> 16;
-            dst[3 * x] = ( a0 + e1 - f2 - d3 + a4 + c5 - g6 - b7 ) >> 16;
-            dst[4 * x] = ( a0 - e1 - f2 + d3 + a4 - c5 - g6 + b7 ) >> 16;
-            dst[5 * x] = ( a0 - d1 - g2 + b3 - a4 - e5 + f6 - c7 ) >> 16;
-            dst[6 * x] = ( a0 - c1 + g2 + e3 - a4 + b5 - f6 + d7 ) >> 16;
-            dst[7 * x] = ( a0 - b1 + f2 - c3 + a4 - d5 + g6 - e7 ) >> 16;
+        // https://psx-spx.consoledev.net/macroblockdecodermdec/
+        // https://github.com/grumpycoders/pcsx-redux/
+        #define AAN_CONST_BITS 12
+        #define AAN_PRESCALE_BITS 16
+
+        #define AAN_EXTRA 12
+        #define SCALE(x, n) ((x) >> (n))
+        #define SCALER(x, n) (((x) + ((1 << (n)) >> 1)) >> (n))
+        #define RLE_RUN(a) ((a) >> 10)
+        #define RLE_VAL(a) (((int)(a) << (sizeof(int) * 8 - 10)) >> (sizeof(int) * 8 - 10))
+        #define MULS(var, c) (SCALE((var) * (c), AAN_CONST_BITS))
+
+        #define AAN_CONST_SIZE 24
+        #define AAN_CONST_SCALE (AAN_CONST_SIZE - AAN_CONST_BITS)
+
+        #define MULR(a) ((1434 * (a)))
+        #define MULB(a) ((1807 * (a)))
+        #define MULG2(a, b) ((-351 * (a)-728 * (b)))
+        #define MULY(a) ((a) << 10)
+
+        #define MAKERGB15(r, g, b, a) ((a | ((b) << 10) | ((g) << 5) | (r)))
+        #define SCALE8(c) SCALER(c, 20)
+        #define SCALE5(c) SCALER(c, 23)
+
+        #define CLAMP5(c) (((c) < -16) ? 0 : (((c) > (31 - 16)) ? 31 : ((c) + 16)))
+        #define CLAMP8(c) (((c) < -128) ? 0 : (((c) > (255 - 128)) ? 255 : ((c) + 128)))
+
+        #define CLAMP_SCALE8(a) (CLAMP8(SCALE8(a)))
+        #define CLAMP_SCALE5(a) (CLAMP5(SCALE5(a)))
+
+        static inline void fillCol(int *blk, int val)
+        {
+            blk[0 * 8] = blk[1 * 8] = blk[2 * 8] = blk[3 * 8] = blk[4 * 8] = blk[5 * 8] = blk[6 * 8] = blk[7 * 8] = val;
         }
 
-        void IDCT(int16 *b) {
-            int16 t[64];
-            for (int i = 0; i < 8 * 1; i += 1) IDCT_PASS(b + i, t + i, 8);
-            for (int i = 0; i < 8 * 8; i += 8) IDCT_PASS(t + i, b + i, 1);
+        static inline void fillRow(int *blk, int val)
+        {
+            blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = val;
         }
 
-        virtual bool decodeVideo(Color32 *pixels) {
+        static void IDCT(int *block, int used_col)
+        {
+            #define FIX_1_082392200 4433
+            #define FIX_1_414213562 5793
+            #define FIX_1_847759065 7568
+            #define FIX_2_613125930 10703
+
+            int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+            int z5, z10, z11, z12, z13;
+            int *ptr;
+            int i;
+
+            if (used_col == -1)
+            {
+                int v = block[0];
+                for (i = 0; i < 64; i++)
+                {
+                    block[i] = v;
+                }
+                return;
+            }
+
+            ptr = block;
+            for (i = 0; i < 8; i++, ptr++)
+            {
+                if ((used_col & (1 << i)) == 0)
+                {
+                    if (ptr[8 * 0])
+                    {
+                        fillCol(ptr, ptr[0]);
+                        used_col |= (1 << i);
+                    }
+                    continue;
+                }
+
+                z10 = ptr[8 * 0] + ptr[8 * 4];
+                z11 = ptr[8 * 0] - ptr[8 * 4];
+                z13 = ptr[8 * 2] + ptr[8 * 6];
+                z12 = MULS(ptr[8 * 2] - ptr[8 * 6], FIX_1_414213562) - z13;
+
+                tmp0 = z10 + z13;
+                tmp3 = z10 - z13;
+                tmp1 = z11 + z12;
+                tmp2 = z11 - z12;
+
+                z13 = ptr[8 * 3] + ptr[8 * 5];
+                z10 = ptr[8 * 3] - ptr[8 * 5];
+                z11 = ptr[8 * 1] + ptr[8 * 7];
+                z12 = ptr[8 * 1] - ptr[8 * 7];
+
+                tmp7 = z11 + z13;
+
+                z5 = (z12 - z10) * (FIX_1_847759065);
+                tmp6 = SCALE(z10 * (FIX_2_613125930) + z5, AAN_CONST_BITS) - tmp7;
+                tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
+                tmp4 = SCALE(z12 * (FIX_1_082392200)-z5, AAN_CONST_BITS) + tmp5;
+
+                ptr[8 * 0] = (tmp0 + tmp7);
+                ptr[8 * 7] = (tmp0 - tmp7);
+                ptr[8 * 1] = (tmp1 + tmp6);
+                ptr[8 * 6] = (tmp1 - tmp6);
+                ptr[8 * 2] = (tmp2 + tmp5);
+                ptr[8 * 5] = (tmp2 - tmp5);
+                ptr[8 * 4] = (tmp3 + tmp4);
+                ptr[8 * 3] = (tmp3 - tmp4);
+            }
+
+            ptr = block;
+            if (used_col == 1)
+            {
+                for (i = 0; i < 8; i++) 
+                {
+                    fillRow(block + 8 * i, block[8 * i]);
+                }
+            } else {
+                for (i = 0; i < 8; i++, ptr += 8)
+                {
+                    z10 = ptr[0] + ptr[4];
+                    z11 = ptr[0] - ptr[4];
+                    z13 = ptr[2] + ptr[6];
+                    z12 = MULS(ptr[2] - ptr[6], FIX_1_414213562) - z13;
+
+                    tmp0 = z10 + z13;
+                    tmp3 = z10 - z13;
+                    tmp1 = z11 + z12;
+                    tmp2 = z11 - z12;
+
+                    z13 = ptr[3] + ptr[5];
+                    z10 = ptr[3] - ptr[5];
+                    z11 = ptr[1] + ptr[7];
+                    z12 = ptr[1] - ptr[7];
+
+                    tmp7 = z11 + z13;
+                    z5 = (z12 - z10) * FIX_1_847759065;
+                    tmp6 = SCALE(z10 * FIX_2_613125930 + z5, AAN_CONST_BITS) - tmp7;
+                    tmp5 = MULS(z11 - z13, FIX_1_414213562) - tmp6;
+                    tmp4 = SCALE(z12 * FIX_1_082392200 - z5, AAN_CONST_BITS) + tmp5;
+
+                    ptr[0] = tmp0 + tmp7;
+
+                    ptr[7] = tmp0 - tmp7;
+                    ptr[1] = tmp1 + tmp6;
+                    ptr[6] = tmp1 - tmp6;
+                    ptr[2] = tmp2 + tmp5;
+                    ptr[5] = tmp2 - tmp5;
+                    ptr[4] = tmp3 + tmp4;
+                    ptr[3] = tmp3 - tmp4;
+                }
+            }
+        }
+
+        static inline void putQuadRGB24(uint8 *image, int *Yblk, int Cr, int Cb)
+        {
+            int Y, R, G, B;
+
+            R = MULR(Cr);
+            G = MULG2(Cb, Cr);
+            B = MULB(Cb);
+
+            Y = MULY(Yblk[0]);
+            image[0 * 3 + 0] = CLAMP_SCALE8(Y + R);
+            image[0 * 3 + 1] = CLAMP_SCALE8(Y + G);
+            image[0 * 3 + 2] = CLAMP_SCALE8(Y + B);
+            Y = MULY(Yblk[1]);
+            image[1 * 3 + 0] = CLAMP_SCALE8(Y + R);
+            image[1 * 3 + 1] = CLAMP_SCALE8(Y + G);
+            image[1 * 3 + 2] = CLAMP_SCALE8(Y + B);
+            Y = MULY(Yblk[8]);
+            image[16 * 3 + 0] = CLAMP_SCALE8(Y + R);
+            image[16 * 3 + 1] = CLAMP_SCALE8(Y + G);
+            image[16 * 3 + 2] = CLAMP_SCALE8(Y + B);
+            Y = MULY(Yblk[9]);
+            image[17 * 3 + 0] = CLAMP_SCALE8(Y + R);
+            image[17 * 3 + 1] = CLAMP_SCALE8(Y + G);
+            image[17 * 3 + 2] = CLAMP_SCALE8(Y + B);
+        }
+
+        inline void YUV2RGB24(int32 *blk, uint8 *image)
+        {
+            int x, y;
+            int *Yblk = blk + 64 * 2;
+            int *Crblk = blk;
+            int *Cbblk = blk + 64;
+
+            for (y = 0; y < 16; y += 2, Crblk += 4, Cbblk += 4, Yblk += 8, image += 8 * 3 * 3)
+            {
+                if (y == 8) Yblk += 64;
+                for (x = 0; x < 4; x++, image += 6, Crblk++, Cbblk++, Yblk += 2)
+                {
+                    putQuadRGB24(image, Yblk, *Crblk, *Cbblk);
+                    putQuadRGB24(image + 8 * 3, Yblk + 64, *(Crblk + 4), *(Cbblk + 4));
+                }
+            }
+        }
+
+        virtual bool decodeVideo(Color32 *pixels)
+        {
             curVideoChunk++;
-            while (curVideoChunk >= videoChunksCount) {
-                if (!nextChunk()) {
+            while (curVideoChunk >= videoChunksCount)
+            {
+                if (!nextChunk())
+                {
                     return false;
                 }
             }
@@ -1038,51 +1217,55 @@ struct Video {
 
             BitStream bs(chunk->data + 8, chunk->size - 8); // make bitstream without frame header
 
-            int16 block[6][64]; // Cr, Cb, YTL, YTR, YBL, YBR
-            for (int bX = 0; bX < width / 16; bX++) {
-                for (int bY = 0; bY < height / 16; bY++) {
-                    memset(block, 0, sizeof(block));
+            int32 qscale = chunk->qscale;
 
-                    for (int i = 0; i < 6; i++) {
-                        bool nonZero = false;
+            int32 blocks[64 * 6]; // Cr, Cb, YTL, YTR, YBL, YBR
+            for (int32 bX = 0; bX < width / 16; bX++)
+            {
+                for (int32 bY = 0; bY < height / 16; bY++)
+                {
+                    memset(blocks, 0, sizeof(blocks));
 
-                        int16 *channel = block[i];
-                        channel[0] = bs.readU(10);
-                        if (channel[0]) {
-                            if (channel[0] & 0x200) {
-                                channel[0] -= 0x400;
-                            }
-                            channel[0] = channel[0] * STR_QUANTIZATION[0]; // DC
-                            nonZero = true;
+                    for (int i = 0; i < 6; i++)
+                    {
+                        int32* block = blocks + i * 64;
+
+                        int16 dc = bs.readU(10);
+                        if (dc & 0x200) {
+                            dc -= 0x400;
                         }
-                        
+
+                        block[0] = SCALER(dc * STR_QTABLE[0], AAN_EXTRA - 3);
+
+                        int32 used_col = 0;
+
                         int16 skip, ac;
-                        int index = 0;
-                        while (readCode(bs, skip, ac)) {
-                            index += 1 + skip;
+                        int32 index = 0;
+                        while (readCode(bs, skip, ac))
+                        {
+                            index += skip + 1;
                             ASSERT(index < 64);
-                            int zIndex = STR_ZIG_ZAG[index];
-                            channel[zIndex] = (ac * STR_QUANTIZATION[zIndex] * chunk->qscale + 4) >> 3;
-                            nonZero = true;
+                            block[STR_ZSCAN[index]] = SCALER(ac * STR_QTABLE[index] * qscale, AAN_EXTRA);
+
+                            used_col |= (STR_ZSCAN[index] > 7) ? 1 << (STR_ZSCAN[index] & 7) : 0;
                         }
 
-                        if (nonZero)
-                            IDCT(channel);
+                        if (index == 0) used_col = -1;
+
+                        IDCT(block, used_col);
                     }
 
+                    Color24 pix[16 * 16];
+                    YUV2RGB24(blocks, (uint8*)pix);
+
+                    int32 i = 0;
                     Color32 *blockPixels = pixels + (width * bY * 16 + bX * 16);
-
-                    for (uint32 i = 0; i < 8 * 8; i++) {
-                        int x = (i % 8) * 2;
-                        int y = (i / 8) * 2;
-                        int j = (x & 7) + (y & 7) * 8;
-
-                        Color32 *c = blockPixels + (width * y + x);
-
-                        int16 *b = block[(x < 8) ? ((y < 8) ? 2 : 4) : ((y < 8) ? 3 : 5)];
-
-                        Color32::YCbCr_T871_420(b[j] + 128, b[j + 1] + 128, b[j + 8] + 128, b[j + 8 + 1] + 128, block[1][i], block[0][i], 4,
-                                                c[0], c[1], c[width], c[width + 1]);
+                    for (int y = 0; y < 16; y++)
+                    {
+                        for (int x = 0; x < 16; x++)
+                        {
+                            blockPixels[y * width + x] = pix[i++];
+                        }
                     }
                 }
             }
@@ -1092,34 +1275,40 @@ struct Video {
             return true;
         }
 
-        virtual int decode(Sound::Frame *frames, int count) {
+        bool getNextAudioStream()
+        {
+            curAudioChunk++;
+            while (curAudioChunk >= audioChunksCount)
+            {
+                if (!nextChunk())
+                {
+                    curAudioChunk--;
+                    return false;
+                }
+            }
+
+            AudioChunk *chunk = audioChunks + (curAudioChunk % MAX_CHUNKS);
+            ASSERT(chunk->size > 0);
+            audioDecoder->processSector(chunk->data);
+            return true;
+        }
+
+        static bool audioNextBlockCallback(void* userData)
+        {
+            return ((STR*)userData)->getNextAudioStream();
+        }
+
+        virtual int decode(Sound::Frame *frames, int count)
+        {
         #ifdef NO_VIDEO
             return 0;
         #else
             if (!audioDecoder) return 0;
             Sound::XA *xa = (Sound::XA*)audioDecoder;
 
-            int i = 0;
-            while (i < count) {
-                if (xa->pos >= COUNT(xa->buffer)) {
-                    curAudioChunk++;
-                    while (curAudioChunk >= audioChunksCount) {
-                        if (!nextChunk()) {
-                            curAudioChunk--;
-                            memset(frames, 0, count * sizeof(Sound::Frame));
-                            return count;
-                        }
-                    }
-                }
-
-                AudioChunk *chunk = audioChunks + (curAudioChunk % MAX_CHUNKS);
-                ASSERT(chunk->size > 0);
-                Stream *memStream = new Stream(NULL, chunk->data, AUDIO_SECTOR_SIZE);
-                audioDecoder->stream = memStream;
-
-                i += audioDecoder->decode(&frames[i], count - i);
-
-                delete memStream;
+            int ret = audioDecoder->decode(frames, count);
+            if (ret < count) {
+                memset(frames + ret, 0, (count - ret) * sizeof(Sound::Frame));
             }
 
             return count;
@@ -1298,7 +1487,7 @@ struct Video {
         }
     }
 
-    Video(Stream *stream, TR::LevelID id) : sample(NULL), decoder(NULL), stepTimer(0.0f), time(0.0f), isPlaying(false) {
+    Video(Stream *stream, TR::LevelID id) : sample(NULL), decoder(NULL), stepTimer(0.0f), time(0.0f), isPlaying(false), needUpdate(false) {
         frameTex[0] = frameTex[1] = NULL;
 
         if (!stream) return;
@@ -1323,12 +1512,16 @@ struct Video {
         frameData = new Color32[decoder->width * decoder->height];
         memset(frameData, 0, decoder->width * decoder->height * sizeof(Color32));
 
-        for (int i = 0; i < 2; i++)
+        for (int i = 0; i < 2; i++) {
             frameTex[i] = new Texture(decoder->width, decoder->height, 1, FMT_RGBA, OPT_DYNAMIC, frameData);
+        }
 
         if (!TR::getVideoTrack(id, playAsync, this)) {
             sample = Sound::play(decoder);
             sample->pitch = pitch;
+            if (sample) {
+                sample->pitch = pitch;
+            }
         }
 
         step      = 1.0f / decoder->fps;