diff --git a/.gitignore b/.gitignore
index fcae9bf..9db2f13 100644
--- a/.gitignore
+++ b/.gitignore
@@ -183,15 +183,10 @@ packages/*
 
 Dependencies/*
 GUI.NET/*
-Windows/*
 Linux/*
 Libretro/*
 Docs/*
-SevenZip/*
 Lua/*
 TestHelper/*
 UpdateHelper/*
-Utilities/*
 PGOHelper/*
-DependencyPacker/*
-InteropDLL/*
\ No newline at end of file
diff --git a/DependencyPacker/DependencyPacker.csproj b/DependencyPacker/DependencyPacker.csproj
new file mode 100644
index 0000000..3fc7adf
--- /dev/null
+++ b/DependencyPacker/DependencyPacker.csproj
@@ -0,0 +1,186 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{AABB5225-3A49-47FF-8A48-031673CADCE9}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>DependencyPacker</RootNamespace>
+    <AssemblyName>DependencyPacker</AssemblyName>
+    <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <PlatformTarget>AnyCPU</PlatformTarget>
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>..\bin\Any CPU\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+    <Prefer32Bit>false</Prefer32Bit>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <PlatformTarget>AnyCPU</PlatformTarget>
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>..\bin\Any CPU\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+    <Prefer32Bit>false</Prefer32Bit>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
+    <DebugSymbols>true</DebugSymbols>
+    <OutputPath>..\bin\x64\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <DebugType>full</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
+    <OutputPath>..\bin\x64\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x86'">
+    <DebugSymbols>true</DebugSymbols>
+    <OutputPath>..\bin\x86\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <DebugType>full</DebugType>
+    <PlatformTarget>x86</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x86'">
+    <OutputPath>..\bin\x86\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x86</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'PGO Profile|AnyCPU'">
+    <OutputPath>..\bin\Any CPU\PGO Profile\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>AnyCPU</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'PGO Profile|x64'">
+    <OutputPath>..\bin\x64\PGO Profile\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'PGO Profile|x86'">
+    <OutputPath>..\bin\x86\PGO Profile\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x86</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'PGO Optimize|AnyCPU'">
+    <OutputPath>..\bin\Any CPU\PGO Profile\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>AnyCPU</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'PGO Optimize|x64'">
+    <OutputPath>..\bin\x64\PGO Profile\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'PGO Optimize|x86'">
+    <OutputPath>..\bin\x86\PGO Profile\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x86</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Libretro|AnyCPU'">
+    <OutputPath>bin\Libretro\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>AnyCPU</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Libretro|x64'">
+    <OutputPath>bin\x64\Libretro\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Libretro|x86'">
+    <OutputPath>bin\x86\Libretro\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x86</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="System.IO.Compression" />
+    <Reference Include="System.IO.Compression.FileSystem" />
+    <Reference Include="System.Xml.Linq" />
+    <Reference Include="System.Data.DataSetExtensions" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Program.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file
diff --git a/DependencyPacker/Program.cs b/DependencyPacker/Program.cs
new file mode 100644
index 0000000..ded5c35
--- /dev/null
+++ b/DependencyPacker/Program.cs
@@ -0,0 +1,21 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using System.IO;
+using System.IO.Compression;
+
+namespace DependencyPacker
+{
+	class Program
+	{
+		static void Main(string[] args)
+		{
+			if(File.Exists("Dependencies.zip")) {
+				File.Delete("Dependencies.zip");
+			}
+			ZipFile.CreateFromDirectory("Dependencies", "Dependencies.zip");
+		}
+	}
+}
diff --git a/DependencyPacker/Properties/AssemblyInfo.cs b/DependencyPacker/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..4804ea4
--- /dev/null
+++ b/DependencyPacker/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+﻿using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("DependencyPacker")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("Mesen")]
+[assembly: AssemblyProduct("DependencyPacker")]
+[assembly: AssemblyCopyright("Copyright © Mesen 2019")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("2cbd9a92-f7b8-4d71-bec3-2fd8810871f5")]
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version 
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers 
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
diff --git a/InteropDLL/DebugApiWrapper.cpp b/InteropDLL/DebugApiWrapper.cpp
new file mode 100644
index 0000000..4e5ce9c
--- /dev/null
+++ b/InteropDLL/DebugApiWrapper.cpp
@@ -0,0 +1,48 @@
+#include "stdafx.h"
+#include "../Core/Console.h"
+#include "../Core/Debugger.h"
+#include "../Core/TraceLogger.h"
+
+extern shared_ptr<Console> _console;
+shared_ptr<Debugger> _debugger;
+
+shared_ptr<Debugger> GetDebugger()
+{
+	if(!_debugger) {
+		_debugger = _console->GetDebugger();
+	}
+	
+	return _debugger;
+}
+
+extern "C"
+{
+	//Debugger wrapper
+	DllExport void __stdcall InitializeDebugger()
+	{
+		GetDebugger();
+	}
+
+	DllExport void __stdcall ReleaseDebugger()
+	{
+		_debugger.reset();
+		//_console->StopDebugger();
+	}
+
+	DllExport bool __stdcall IsDebuggerRunning()
+	{
+		return _console->GetDebugger(false).get() != nullptr;
+	}
+
+	DllExport bool __stdcall IsExecutionStopped() { return GetDebugger()->IsExecutionStopped(); }
+	DllExport void __stdcall ResumeExecution() { GetDebugger()->Run(); }
+	DllExport void __stdcall Step(uint32_t count) { GetDebugger()->Step(count); }
+	//DllExport const char* __stdcall DebugGetCode(uint32_t &length) { return GetDebugger()->GetCode(length); }
+
+	DllExport void __stdcall SetTraceOptions(TraceLoggerOptions options) { GetDebugger()->GetTraceLogger()->SetOptions(options); }
+	DllExport void __stdcall StartTraceLogger(char* filename) { GetDebugger()->GetTraceLogger()->StartLogging(filename); }
+	DllExport void __stdcall StopTraceLogger() { GetDebugger()->GetTraceLogger()->StopLogging(); }
+	DllExport const char* GetExecutionTrace(uint32_t lineCount) { return GetDebugger()->GetTraceLogger()->GetExecutionTrace(lineCount); }
+
+	DllExport void __stdcall GetState(DebugState *state) { GetDebugger()->GetState(state); }
+};
\ No newline at end of file
diff --git a/InteropDLL/EmuApiWrapper.cpp b/InteropDLL/EmuApiWrapper.cpp
new file mode 100644
index 0000000..2488312
--- /dev/null
+++ b/InteropDLL/EmuApiWrapper.cpp
@@ -0,0 +1,223 @@
+#include "stdafx.h"
+#include "../Core/Console.h"
+#include "../Core/MessageManager.h"
+#include "../Core/INotificationListener.h"
+#include "../Core/KeyManager.h"
+#include "../Utilities/SimpleLock.h"
+#include "../Utilities/ArchiveReader.h"
+
+#ifdef _WIN32
+	#include "../Windows/Renderer.h"
+	#include "../Windows/SoundManager.h"
+	#include "../Windows/WindowsKeyManager.h"
+#else
+	#include "../Linux/SdlRenderer.h"
+	#include "../Linux/SdlSoundManager.h"
+	#include "../Linux/LinuxKeyManager.h"
+#endif
+
+unique_ptr<IRenderingDevice> _renderer;
+unique_ptr<IAudioDevice> _soundManager;
+unique_ptr<IKeyManager> _keyManager;
+//unique_ptr<ShortcutKeyHandler> _shortcutKeyHandler;
+
+void* _windowHandle = nullptr;
+void* _viewerHandle = nullptr;
+string _returnString;
+string _logString;
+shared_ptr<Console> _console;
+SimpleLock _externalNotificationListenerLock;
+vector<shared_ptr<INotificationListener>> _externalNotificationListeners;
+
+typedef void (__stdcall *NotificationListenerCallback)(int, void*);
+
+namespace InteropEmu {
+	class InteropNotificationListener : public INotificationListener
+	{
+		NotificationListenerCallback _callback;
+	public:
+		InteropNotificationListener(NotificationListenerCallback callback)
+		{
+			_callback = callback;
+		}
+
+		virtual ~InteropNotificationListener()
+		{
+		}
+		
+		void ProcessNotification(ConsoleNotificationType type, void* parameter)
+		{
+			_callback((int)type, parameter);
+		}
+	};
+
+	extern "C" {
+		DllExport bool __stdcall TestDll()
+		{
+			return true;
+		}
+
+		DllExport uint32_t __stdcall GetMesenVersion() { return 0x00000100; }
+
+		DllExport void __stdcall InitDll()
+		{
+			_console.reset(new Console());
+		}
+
+		DllExport void __stdcall InitializeEmu(const char* homeFolder, void *windowHandle, void *viewerHandle, bool noAudio, bool noVideo, bool noInput)
+		{
+			FolderUtilities::SetHomeFolder(homeFolder);
+			//_shortcutKeyHandler.reset(new ShortcutKeyHandler(_console));
+
+			if(windowHandle != nullptr && viewerHandle != nullptr) {
+				_windowHandle = windowHandle;
+				_viewerHandle = viewerHandle;
+
+				if(!noVideo) {
+					#ifdef _WIN32
+						_renderer.reset(new Renderer(_console, (HWND)_viewerHandle, true));
+					#else 
+						_renderer.reset(new SdlRenderer(_console, _viewerHandle, true));
+					#endif
+				} 
+
+				if(!noAudio) {
+					#ifdef _WIN32
+						_soundManager.reset(new SoundManager(_console, (HWND)_windowHandle));
+					#else
+						_soundManager.reset(new SdlSoundManager(_console));
+					#endif
+				}
+
+				if(!noInput) {
+					#ifdef _WIN32
+						_keyManager.reset(new WindowsKeyManager(_console, (HWND)_windowHandle));
+					#else 
+						_keyManager.reset(new LinuxKeyManager(_console));
+					#endif				
+					
+					KeyManager::RegisterKeyManager(_keyManager.get());
+				}
+			}
+		}
+
+		DllExport void __stdcall SetFullscreenMode(bool fullscreen, void *windowHandle, uint32_t monitorWidth, uint32_t monitorHeight)
+		{
+			if(_renderer) {
+				_renderer->SetFullscreenMode(fullscreen, windowHandle, monitorWidth, monitorHeight);
+			}
+		}
+
+		DllExport void __stdcall LoadRom(char* filename, char* patchFile) { _console->LoadRom((VirtualFile)filename, (VirtualFile)patchFile); }
+		//DllExport void __stdcall AddKnownGameFolder(char* folder) { FolderUtilities::AddKnownGameFolder(folder); }
+		//DllExport void __stdcall SetFolderOverrides(char* saveFolder, char* saveStateFolder, char* screenshotFolder) { FolderUtilities::SetFolderOverrides(saveFolder, saveStateFolder, screenshotFolder); }
+
+		DllExport const char* __stdcall GetArchiveRomList(char* filename) { 
+			std::ostringstream out;
+			shared_ptr<ArchiveReader> reader = ArchiveReader::GetReader(filename);
+			if(reader) {
+				for(string romName : reader->GetFileList({ ".sfc" })) {
+					out << romName << "[!|!]";
+				}
+			}
+			_returnString = out.str();
+			return _returnString.c_str();
+		}
+
+		DllExport void __stdcall SetMousePosition(double x, double y) { KeyManager::SetMousePosition(x, y); }
+		DllExport void __stdcall SetMouseMovement(int16_t x, int16_t y) { KeyManager::SetMouseMovement(x, y); }
+
+		DllExport void __stdcall UpdateInputDevices() { if(_keyManager) { _keyManager->UpdateDevices(); } }
+		DllExport void __stdcall GetPressedKeys(uint32_t *keyBuffer) { 
+			vector<uint32_t> pressedKeys = KeyManager::GetPressedKeys();
+			for(size_t i = 0; i < pressedKeys.size() && i < 3; i++) {
+				keyBuffer[i] = pressedKeys[i];
+			}
+		}
+		DllExport void __stdcall DisableAllKeys(bool disabled) {
+			if(_keyManager) {
+				_keyManager->SetDisabled(disabled);
+			}
+		}
+		DllExport void __stdcall SetKeyState(int32_t scanCode, bool state) { 
+			if(_keyManager) { 
+				_keyManager->SetKeyState(scanCode, state); 
+				//_shortcutKeyHandler->ProcessKeys();
+			} 
+		}
+		DllExport void __stdcall ResetKeyState() { if(_keyManager) { _keyManager->ResetKeyState(); } }
+		DllExport const char* __stdcall GetKeyName(uint32_t keyCode) 
+		{
+			_returnString = KeyManager::GetKeyName(keyCode);
+			return _returnString.c_str();
+		}
+		DllExport uint32_t __stdcall GetKeyCode(char* keyName) { 
+			if(keyName) {
+				return KeyManager::GetKeyCode(keyName);
+			} else {
+				return 0;
+			}
+		}
+
+		DllExport void __stdcall Run()
+		{
+			if(_console) {
+				_console->Run();
+			}
+		}
+
+		DllExport void __stdcall Stop()
+		{
+			if(_console) {
+				_console->Stop();
+			}
+		}
+		
+		DllExport void __stdcall Release()
+		{
+			//_shortcutKeyHandler.reset();
+			
+			_console->Stop();
+
+			_renderer.reset();
+			_soundManager.reset();
+			_keyManager.reset();
+
+			//_console->Release(true);
+			_console.reset();
+			
+			//_shortcutKeyHandler.reset();
+		}
+
+		DllExport INotificationListener* __stdcall RegisterNotificationCallback(NotificationListenerCallback callback)
+		{
+			auto lock = _externalNotificationListenerLock.AcquireSafe();
+			auto listener = shared_ptr<INotificationListener>(new InteropNotificationListener(callback));
+			_externalNotificationListeners.push_back(listener);
+			//_console->GetNotificationManager()->RegisterNotificationListener(listener);
+			return listener.get();
+		}
+
+		DllExport void __stdcall UnregisterNotificationCallback(INotificationListener *listener)
+		{
+			auto lock = _externalNotificationListenerLock.AcquireSafe();
+			_externalNotificationListeners.erase(
+				std::remove_if(
+					_externalNotificationListeners.begin(),
+					_externalNotificationListeners.end(),
+					[=](shared_ptr<INotificationListener> ptr) { return ptr.get() == listener; }
+				),
+				_externalNotificationListeners.end()
+			);
+		}
+
+		DllExport void __stdcall DisplayMessage(char* title, char* message, char* param1) { MessageManager::DisplayMessage(title, message, param1 ? param1 : ""); }
+		DllExport const char* __stdcall GetLog()
+		{
+			_logString = MessageManager::GetLog();
+			return _logString.c_str();
+		}
+
+		DllExport void __stdcall WriteLogEntry(char* message) { MessageManager::Log(message); }
+	}
+}
\ No newline at end of file
diff --git a/InteropDLL/InteropDLL.vcxproj b/InteropDLL/InteropDLL.vcxproj
new file mode 100644
index 0000000..c543ee5
--- /dev/null
+++ b/InteropDLL/InteropDLL.vcxproj
@@ -0,0 +1,472 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Libretro|Win32">
+      <Configuration>Libretro</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Libretro|x64">
+      <Configuration>Libretro</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Optimize|Win32">
+      <Configuration>PGO Optimize</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Optimize|x64">
+      <Configuration>PGO Optimize</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Profile|Win32">
+      <Configuration>PGO Profile</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Profile|x64">
+      <Configuration>PGO Profile</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{37749BB2-FA78-4EC9-8990-5628FC0BBA19}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>InteropDLL</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <TargetName>MesenSCore</TargetName>
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <TargetName>MesenSCore</TargetName>
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>MesenSCore</TargetName>
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>MesenSCore</TargetName>
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>MesenSCore</TargetName>
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>MesenSCore</TargetName>
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\PGO Profile\</OutDir>
+    <IntDir>obj\$(Platform)\PGO Profile\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>MesenSCore</TargetName>
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>MesenSCore</TargetName>
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>MesenSCore</TargetName>
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>MesenSCore</TargetName>
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\PGO Profile\</OutDir>
+    <IntDir>obj\$(Platform)\PGO Profile\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;INTEROPDLL_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <CallingConvention>Cdecl</CallingConvention>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>dinput8.lib;Xinput9_1_0.lib;d3d11.lib;d3dcompiler.lib;dxguid.lib;winmm.lib;comctl32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(OutDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalOptions>/ignore:4099 %(AdditionalOptions)</AdditionalOptions>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;INTEROPDLL_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <CallingConvention>Cdecl</CallingConvention>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>dinput8.lib;Xinput9_1_0.lib;d3d11.lib;d3dcompiler.lib;dxguid.lib;winmm.lib;comctl32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(OutDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalOptions>/ignore:4099 %(AdditionalOptions)</AdditionalOptions>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;INTEROPDLL_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>dinput8.lib;Xinput9_1_0.lib;d3d11.lib;d3dcompiler.lib;dxguid.lib;winmm.lib;comctl32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(OutDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalOptions>/ignore:4099 %(AdditionalOptions)</AdditionalOptions>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;INTEROPDLL_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>dinput8.lib;Xinput9_1_0.lib;d3d11.lib;d3dcompiler.lib;dxguid.lib;winmm.lib;comctl32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(OutDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalOptions>/ignore:4099 %(AdditionalOptions)</AdditionalOptions>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>PGO;WIN32;NDEBUG;_WINDOWS;_USRDLL;INTEROPDLL_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>dinput8.lib;Xinput9_1_0.lib;d3d11.lib;d3dcompiler.lib;dxguid.lib;winmm.lib;comctl32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;</AdditionalDependencies>
+      <LinkTimeCodeGeneration>PGInstrument</LinkTimeCodeGeneration>
+      <AdditionalLibraryDirectories>$(OutDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalOptions>/ignore:4099 %(AdditionalOptions)</AdditionalOptions>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>PGO;WIN32;NDEBUG;_WINDOWS;_USRDLL;INTEROPDLL_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>dinput8.lib;Xinput9_1_0.lib;d3d11.lib;d3dcompiler.lib;dxguid.lib;winmm.lib;comctl32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;</AdditionalDependencies>
+      <LinkTimeCodeGeneration>PGOptimization</LinkTimeCodeGeneration>
+      <AdditionalLibraryDirectories>$(OutDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalOptions>/ignore:4099 %(AdditionalOptions)</AdditionalOptions>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;INTEROPDLL_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>dinput8.lib;Xinput9_1_0.lib;d3d11.lib;d3dcompiler.lib;dxguid.lib;winmm.lib;comctl32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(OutDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalOptions>/ignore:4099 %(AdditionalOptions)</AdditionalOptions>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;INTEROPDLL_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>dinput8.lib;Xinput9_1_0.lib;d3d11.lib;d3dcompiler.lib;dxguid.lib;winmm.lib;comctl32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(OutDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalOptions>/ignore:4099 %(AdditionalOptions)</AdditionalOptions>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>PGO;WIN32;NDEBUG;_WINDOWS;_USRDLL;INTEROPDLL_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>dinput8.lib;Xinput9_1_0.lib;d3d11.lib;d3dcompiler.lib;dxguid.lib;winmm.lib;comctl32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;</AdditionalDependencies>
+      <LinkTimeCodeGeneration>PGInstrument</LinkTimeCodeGeneration>
+      <AdditionalLibraryDirectories>$(OutDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalOptions>/ignore:4099 %(AdditionalOptions)</AdditionalOptions>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>PGO;WIN32;NDEBUG;_WINDOWS;_USRDLL;INTEROPDLL_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>dinput8.lib;Xinput9_1_0.lib;d3d11.lib;d3dcompiler.lib;dxguid.lib;winmm.lib;comctl32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;</AdditionalDependencies>
+      <LinkTimeCodeGeneration>PGOptimization</LinkTimeCodeGeneration>
+      <AdditionalLibraryDirectories>$(OutDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalOptions>/ignore:4099 %(AdditionalOptions)</AdditionalOptions>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="stdafx.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="EmuApiWrapper.cpp" />
+    <ClCompile Include="DebugApiWrapper.cpp" />
+    <ClCompile Include="stdafx.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">Create</PrecompiledHeader>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/InteropDLL/InteropDLL.vcxproj.filters b/InteropDLL/InteropDLL.vcxproj.filters
new file mode 100644
index 0000000..2206318
--- /dev/null
+++ b/InteropDLL/InteropDLL.vcxproj.filters
@@ -0,0 +1,29 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="stdafx.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="stdafx.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="DebugApiWrapper.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="EmuApiWrapper.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/InteropDLL/stdafx.cpp b/InteropDLL/stdafx.cpp
new file mode 100644
index 0000000..5de39fc
--- /dev/null
+++ b/InteropDLL/stdafx.cpp
@@ -0,0 +1,8 @@
+// stdafx.cpp : source file that includes just the standard includes
+// InteropDLL.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
diff --git a/InteropDLL/stdafx.h b/InteropDLL/stdafx.h
new file mode 100644
index 0000000..43d156d
--- /dev/null
+++ b/InteropDLL/stdafx.h
@@ -0,0 +1,47 @@
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#pragma once
+
+#if _WIN32 || _WIN64
+	#if _WIN64
+		#define ENVIRONMENT64
+	#else
+		#define ENVIRONMENT32
+	#endif
+#endif
+
+#if __GNUC__
+	#if __x86_64__ || __ppc64__
+		#define ENVIRONMENT64
+	#else
+		#define ENVIRONMENT32
+	#endif
+#endif
+
+#ifdef _DEBUG
+	#define MESEN_LIBRARY_DEBUG_SUFFIX "Debug"
+#else 
+	#define MESEN_LIBRARY_DEBUG_SUFFIX "Release"
+#endif
+
+#ifdef ENVIRONMENT32
+	#define MESEN_LIBRARY_SUFFIX "x86.lib"
+#else 
+	#define MESEN_LIBRARY_SUFFIX "x64.lib"
+#endif
+
+#if _WIN32 || _WIN64
+	#pragma comment(lib, "Core.lib")
+	#pragma comment(lib, "Utilities.lib")
+	#pragma comment(lib, "Windows.lib")
+	#pragma comment(lib, "SevenZip.lib")
+	#pragma comment(lib, "Lua.lib")
+	#pragma comment(lib, "../Dependencies/DirectXTK." MESEN_LIBRARY_DEBUG_SUFFIX ".Static." MESEN_LIBRARY_SUFFIX)
+	#define DllExport __declspec(dllexport)
+#else
+	#define __stdcall
+	#define DllExport __attribute__((visibility("default")))
+#endif
\ No newline at end of file
diff --git a/SevenZip/7z.h b/SevenZip/7z.h
new file mode 100644
index 0000000..4768151
--- /dev/null
+++ b/SevenZip/7z.h
@@ -0,0 +1,202 @@
+/* 7z.h -- 7z interface
+2015-11-18 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_H
+#define __7Z_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define k7zStartHeaderSize 0x20
+#define k7zSignatureSize 6
+
+extern const Byte k7zSignature[k7zSignatureSize];
+
+typedef struct
+{
+  const Byte *Data;
+  size_t Size;
+} CSzData;
+
+/* CSzCoderInfo & CSzFolder support only default methods */
+
+typedef struct
+{
+  size_t PropsOffset;
+  UInt32 MethodID;
+  Byte NumStreams;
+  Byte PropsSize;
+} CSzCoderInfo;
+
+typedef struct
+{
+  UInt32 InIndex;
+  UInt32 OutIndex;
+} CSzBond;
+
+#define SZ_NUM_CODERS_IN_FOLDER_MAX 4
+#define SZ_NUM_BONDS_IN_FOLDER_MAX 3
+#define SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX 4
+
+typedef struct
+{
+  UInt32 NumCoders;
+  UInt32 NumBonds;
+  UInt32 NumPackStreams;
+  UInt32 UnpackStream;
+  UInt32 PackStreams[SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX];
+  CSzBond Bonds[SZ_NUM_BONDS_IN_FOLDER_MAX];
+  CSzCoderInfo Coders[SZ_NUM_CODERS_IN_FOLDER_MAX];
+} CSzFolder;
+
+
+SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd);
+
+typedef struct
+{
+  UInt32 Low;
+  UInt32 High;
+} CNtfsFileTime;
+
+typedef struct
+{
+  Byte *Defs; /* MSB 0 bit numbering */
+  UInt32 *Vals;
+} CSzBitUi32s;
+
+typedef struct
+{
+  Byte *Defs; /* MSB 0 bit numbering */
+  // UInt64 *Vals;
+  CNtfsFileTime *Vals;
+} CSzBitUi64s;
+
+#define SzBitArray_Check(p, i) (((p)[(i) >> 3] & (0x80 >> ((i) & 7))) != 0)
+
+#define SzBitWithVals_Check(p, i) ((p)->Defs && ((p)->Defs[(i) >> 3] & (0x80 >> ((i) & 7))) != 0)
+
+typedef struct
+{
+  UInt32 NumPackStreams;
+  UInt32 NumFolders;
+
+  UInt64 *PackPositions;          // NumPackStreams + 1
+  CSzBitUi32s FolderCRCs;         // NumFolders
+
+  size_t *FoCodersOffsets;        // NumFolders + 1
+  UInt32 *FoStartPackStreamIndex; // NumFolders + 1
+  UInt32 *FoToCoderUnpackSizes;   // NumFolders + 1
+  Byte *FoToMainUnpackSizeIndex;  // NumFolders
+  UInt64 *CoderUnpackSizes;       // for all coders in all folders
+
+  Byte *CodersData;
+} CSzAr;
+
+UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex);
+
+SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,
+    ILookInStream *stream, UInt64 startPos,
+    Byte *outBuffer, size_t outSize,
+    ISzAlloc *allocMain);
+
+typedef struct
+{
+  CSzAr db;
+
+  UInt64 startPosAfterHeader;
+  UInt64 dataPos;
+  
+  UInt32 NumFiles;
+
+  UInt64 *UnpackPositions;  // NumFiles + 1
+  // Byte *IsEmptyFiles;
+  Byte *IsDirs;
+  CSzBitUi32s CRCs;
+
+  CSzBitUi32s Attribs;
+  // CSzBitUi32s Parents;
+  CSzBitUi64s MTime;
+  CSzBitUi64s CTime;
+
+  UInt32 *FolderToFile;   // NumFolders + 1
+  UInt32 *FileToFolder;   // NumFiles
+
+  size_t *FileNameOffsets; /* in 2-byte steps */
+  Byte *FileNames;  /* UTF-16-LE */
+} CSzArEx;
+
+#define SzArEx_IsDir(p, i) (SzBitArray_Check((p)->IsDirs, i))
+
+#define SzArEx_GetFileSize(p, i) ((p)->UnpackPositions[(i) + 1] - (p)->UnpackPositions[i])
+
+void SzArEx_Init(CSzArEx *p);
+void SzArEx_Free(CSzArEx *p, ISzAlloc *alloc);
+UInt64 SzArEx_GetFolderStreamPos(const CSzArEx *p, UInt32 folderIndex, UInt32 indexInFolder);
+int SzArEx_GetFolderFullPackSize(const CSzArEx *p, UInt32 folderIndex, UInt64 *resSize);
+
+/*
+if dest == NULL, the return value specifies the required size of the buffer,
+  in 16-bit characters, including the null-terminating character.
+if dest != NULL, the return value specifies the number of 16-bit characters that
+  are written to the dest, including the null-terminating character. */
+
+size_t SzArEx_GetFileNameUtf16(const CSzArEx *p, size_t fileIndex, UInt16 *dest);
+
+/*
+size_t SzArEx_GetFullNameLen(const CSzArEx *p, size_t fileIndex);
+UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16 *dest);
+*/
+
+
+
+/*
+  SzArEx_Extract extracts file from archive
+
+  *outBuffer must be 0 before first call for each new archive.
+
+  Extracting cache:
+    If you need to decompress more than one file, you can send
+    these values from previous call:
+      *blockIndex,
+      *outBuffer,
+      *outBufferSize
+    You can consider "*outBuffer" as cache of solid block. If your archive is solid,
+    it will increase decompression speed.
+  
+    If you use external function, you can declare these 3 cache variables
+    (blockIndex, outBuffer, outBufferSize) as static in that external function.
+    
+    Free *outBuffer and set *outBuffer to 0, if you want to flush cache.
+*/
+
+SRes SzArEx_Extract(
+    const CSzArEx *db,
+    ILookInStream *inStream,
+    UInt32 fileIndex,         /* index of file */
+    UInt32 *blockIndex,       /* index of solid block */
+    Byte **outBuffer,         /* pointer to pointer to output buffer (allocated with allocMain) */
+    size_t *outBufferSize,    /* buffer size for output buffer */
+    size_t *offset,           /* offset of stream for required file in *outBuffer */
+    size_t *outSizeProcessed, /* size of file in *outBuffer */
+    ISzAlloc *allocMain,
+    ISzAlloc *allocTemp);
+
+
+/*
+SzArEx_Open Errors:
+SZ_ERROR_NO_ARCHIVE
+SZ_ERROR_ARCHIVE
+SZ_ERROR_UNSUPPORTED
+SZ_ERROR_MEM
+SZ_ERROR_CRC
+SZ_ERROR_INPUT_EOF
+SZ_ERROR_FAIL
+*/
+
+SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream,
+    ISzAlloc *allocMain, ISzAlloc *allocTemp);
+
+EXTERN_C_END
+
+#endif
diff --git a/SevenZip/7zAlloc.c b/SevenZip/7zAlloc.c
new file mode 100644
index 0000000..3e848c9
--- /dev/null
+++ b/SevenZip/7zAlloc.c
@@ -0,0 +1,78 @@
+/* 7zAlloc.c -- Allocation functions
+2015-11-09 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "7zAlloc.h"
+
+/* #define _SZ_ALLOC_DEBUG */
+/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
+
+#ifdef _SZ_ALLOC_DEBUG
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+#include <stdio.h>
+int g_allocCount = 0;
+int g_allocCountTemp = 0;
+
+#endif
+
+void *SzAlloc(void *p, size_t size)
+{
+  UNUSED_VAR(p);
+  if (size == 0)
+    return 0;
+  #ifdef _SZ_ALLOC_DEBUG
+  fprintf(stderr, "\nAlloc %10u bytes; count = %10d", (unsigned)size, g_allocCount);
+  g_allocCount++;
+  #endif
+  return malloc(size);
+}
+
+void SzFree(void *p, void *address)
+{
+  UNUSED_VAR(p);
+  #ifdef _SZ_ALLOC_DEBUG
+  if (address != 0)
+  {
+    g_allocCount--;
+    fprintf(stderr, "\nFree; count = %10d", g_allocCount);
+  }
+  #endif
+  free(address);
+}
+
+void *SzAllocTemp(void *p, size_t size)
+{
+  UNUSED_VAR(p);
+  if (size == 0)
+    return 0;
+  #ifdef _SZ_ALLOC_DEBUG
+  fprintf(stderr, "\nAlloc_temp %10u bytes;  count = %10d", (unsigned)size, g_allocCountTemp);
+  g_allocCountTemp++;
+  #ifdef _WIN32
+  return HeapAlloc(GetProcessHeap(), 0, size);
+  #endif
+  #endif
+  return malloc(size);
+}
+
+void SzFreeTemp(void *p, void *address)
+{
+  UNUSED_VAR(p);
+  #ifdef _SZ_ALLOC_DEBUG
+  if (address != 0)
+  {
+    g_allocCountTemp--;
+    fprintf(stderr, "\nFree_temp; count = %10d", g_allocCountTemp);
+  }
+  #ifdef _WIN32
+  HeapFree(GetProcessHeap(), 0, address);
+  return;
+  #endif
+  #endif
+  free(address);
+}
diff --git a/SevenZip/7zAlloc.h b/SevenZip/7zAlloc.h
new file mode 100644
index 0000000..2fd5bdb
--- /dev/null
+++ b/SevenZip/7zAlloc.h
@@ -0,0 +1,23 @@
+/* 7zAlloc.h -- Allocation functions
+2013-03-25 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_ALLOC_H
+#define __7Z_ALLOC_H
+
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *SzAlloc(void *p, size_t size);
+void SzFree(void *p, void *address);
+
+void *SzAllocTemp(void *p, size_t size);
+void SzFreeTemp(void *p, void *address);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/SevenZip/7zArcIn.c b/SevenZip/7zArcIn.c
new file mode 100644
index 0000000..2beed3d
--- /dev/null
+++ b/SevenZip/7zArcIn.c
@@ -0,0 +1,1771 @@
+/* 7zArcIn.c -- 7z Input functions
+2016-05-16 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+#include "7z.h"
+#include "7zBuf.h"
+#include "7zCrc.h"
+#include "CpuArch.h"
+
+#define MY_ALLOC(T, p, size, alloc) { \
+  if ((p = (T *)IAlloc_Alloc(alloc, (size) * sizeof(T))) == NULL) return SZ_ERROR_MEM; }
+
+#define MY_ALLOC_ZE(T, p, size, alloc) { if ((size) == 0) p = NULL; else MY_ALLOC(T, p, size, alloc) }
+
+#define MY_ALLOC_AND_CPY(to, size, from, alloc) \
+  { MY_ALLOC(Byte, to, size, alloc); memcpy(to, from, size); }
+
+#define MY_ALLOC_ZE_AND_CPY(to, size, from, alloc) \
+  { if ((size) == 0) p = NULL; else { MY_ALLOC_AND_CPY(to, size, from, alloc) } }
+
+#define k7zMajorVersion 0
+
+enum EIdEnum
+{
+  k7zIdEnd,
+  k7zIdHeader,
+  k7zIdArchiveProperties,
+  k7zIdAdditionalStreamsInfo,
+  k7zIdMainStreamsInfo,
+  k7zIdFilesInfo,
+  k7zIdPackInfo,
+  k7zIdUnpackInfo,
+  k7zIdSubStreamsInfo,
+  k7zIdSize,
+  k7zIdCRC,
+  k7zIdFolder,
+  k7zIdCodersUnpackSize,
+  k7zIdNumUnpackStream,
+  k7zIdEmptyStream,
+  k7zIdEmptyFile,
+  k7zIdAnti,
+  k7zIdName,
+  k7zIdCTime,
+  k7zIdATime,
+  k7zIdMTime,
+  k7zIdWinAttrib,
+  k7zIdComment,
+  k7zIdEncodedHeader,
+  k7zIdStartPos,
+  k7zIdDummy
+  // k7zNtSecure,
+  // k7zParent,
+  // k7zIsReal
+};
+
+const Byte k7zSignature[k7zSignatureSize] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
+
+#define SzBitUi32s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; }
+
+static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAlloc *alloc)
+{
+  if (num == 0)
+  {
+    p->Defs = NULL;
+    p->Vals = NULL;
+  }
+  else
+  {
+    MY_ALLOC(Byte, p->Defs, (num + 7) >> 3, alloc);
+    MY_ALLOC(UInt32, p->Vals, num, alloc);
+  }
+  return SZ_OK;
+}
+
+void SzBitUi32s_Free(CSzBitUi32s *p, ISzAlloc *alloc)
+{
+  IAlloc_Free(alloc, p->Defs); p->Defs = NULL;
+  IAlloc_Free(alloc, p->Vals); p->Vals = NULL;
+}
+
+#define SzBitUi64s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; }
+
+void SzBitUi64s_Free(CSzBitUi64s *p, ISzAlloc *alloc)
+{
+  IAlloc_Free(alloc, p->Defs); p->Defs = NULL;
+  IAlloc_Free(alloc, p->Vals); p->Vals = NULL;
+}
+
+
+static void SzAr_Init(CSzAr *p)
+{
+  p->NumPackStreams = 0;
+  p->NumFolders = 0;
+  
+  p->PackPositions = NULL;
+  SzBitUi32s_Init(&p->FolderCRCs);
+
+  p->FoCodersOffsets = NULL;
+  p->FoStartPackStreamIndex = NULL;
+  p->FoToCoderUnpackSizes = NULL;
+  p->FoToMainUnpackSizeIndex = NULL;
+  p->CoderUnpackSizes = NULL;
+
+  p->CodersData = NULL;
+}
+
+static void SzAr_Free(CSzAr *p, ISzAlloc *alloc)
+{
+  IAlloc_Free(alloc, p->PackPositions);
+  SzBitUi32s_Free(&p->FolderCRCs, alloc);
+ 
+  IAlloc_Free(alloc, p->FoCodersOffsets);
+  IAlloc_Free(alloc, p->FoStartPackStreamIndex);
+  IAlloc_Free(alloc, p->FoToCoderUnpackSizes);
+  IAlloc_Free(alloc, p->FoToMainUnpackSizeIndex);
+  IAlloc_Free(alloc, p->CoderUnpackSizes);
+  
+  IAlloc_Free(alloc, p->CodersData);
+
+  SzAr_Init(p);
+}
+
+
+void SzArEx_Init(CSzArEx *p)
+{
+  SzAr_Init(&p->db);
+  
+  p->NumFiles = 0;
+  p->dataPos = 0;
+  
+  p->UnpackPositions = NULL;
+  p->IsDirs = NULL;
+  
+  p->FolderToFile = NULL;
+  p->FileToFolder = NULL;
+  
+  p->FileNameOffsets = NULL;
+  p->FileNames = NULL;
+  
+  SzBitUi32s_Init(&p->CRCs);
+  SzBitUi32s_Init(&p->Attribs);
+  // SzBitUi32s_Init(&p->Parents);
+  SzBitUi64s_Init(&p->MTime);
+  SzBitUi64s_Init(&p->CTime);
+}
+
+void SzArEx_Free(CSzArEx *p, ISzAlloc *alloc)
+{
+  IAlloc_Free(alloc, p->UnpackPositions);
+  IAlloc_Free(alloc, p->IsDirs);
+
+  IAlloc_Free(alloc, p->FolderToFile);
+  IAlloc_Free(alloc, p->FileToFolder);
+
+  IAlloc_Free(alloc, p->FileNameOffsets);
+  IAlloc_Free(alloc, p->FileNames);
+
+  SzBitUi32s_Free(&p->CRCs, alloc);
+  SzBitUi32s_Free(&p->Attribs, alloc);
+  // SzBitUi32s_Free(&p->Parents, alloc);
+  SzBitUi64s_Free(&p->MTime, alloc);
+  SzBitUi64s_Free(&p->CTime, alloc);
+  
+  SzAr_Free(&p->db, alloc);
+  SzArEx_Init(p);
+}
+
+
+static int TestSignatureCandidate(const Byte *testBytes)
+{
+  unsigned i;
+  for (i = 0; i < k7zSignatureSize; i++)
+    if (testBytes[i] != k7zSignature[i])
+      return 0;
+  return 1;
+}
+
+#define SzData_Clear(p) { (p)->Data = NULL; (p)->Size = 0; }
+
+#define SZ_READ_BYTE_SD(_sd_, dest) if ((_sd_)->Size == 0) return SZ_ERROR_ARCHIVE; (_sd_)->Size--; dest = *(_sd_)->Data++;
+#define SZ_READ_BYTE(dest) SZ_READ_BYTE_SD(sd, dest)
+#define SZ_READ_BYTE_2(dest) if (sd.Size == 0) return SZ_ERROR_ARCHIVE; sd.Size--; dest = *sd.Data++;
+
+#define SKIP_DATA(sd, size) { sd->Size -= (size_t)(size); sd->Data += (size_t)(size); }
+#define SKIP_DATA2(sd, size) { sd.Size -= (size_t)(size); sd.Data += (size_t)(size); }
+
+#define SZ_READ_32(dest) if (sd.Size < 4) return SZ_ERROR_ARCHIVE; \
+   dest = GetUi32(sd.Data); SKIP_DATA2(sd, 4);
+
+static MY_NO_INLINE SRes ReadNumber(CSzData *sd, UInt64 *value)
+{
+  Byte firstByte, mask;
+  unsigned i;
+  UInt32 v;
+
+  SZ_READ_BYTE(firstByte);
+  if ((firstByte & 0x80) == 0)
+  {
+    *value = firstByte;
+    return SZ_OK;
+  }
+  SZ_READ_BYTE(v);
+  if ((firstByte & 0x40) == 0)
+  {
+    *value = (((UInt32)firstByte & 0x3F) << 8) | v;
+    return SZ_OK;
+  }
+  SZ_READ_BYTE(mask);
+  *value = v | ((UInt32)mask << 8);
+  mask = 0x20;
+  for (i = 2; i < 8; i++)
+  {
+    Byte b;
+    if ((firstByte & mask) == 0)
+    {
+      UInt64 highPart = (unsigned)firstByte & (unsigned)(mask - 1);
+      *value |= (highPart << (8 * i));
+      return SZ_OK;
+    }
+    SZ_READ_BYTE(b);
+    *value |= ((UInt64)b << (8 * i));
+    mask >>= 1;
+  }
+  return SZ_OK;
+}
+
+
+static MY_NO_INLINE SRes SzReadNumber32(CSzData *sd, UInt32 *value)
+{
+  Byte firstByte;
+  UInt64 value64;
+  if (sd->Size == 0)
+    return SZ_ERROR_ARCHIVE;
+  firstByte = *sd->Data;
+  if ((firstByte & 0x80) == 0)
+  {
+    *value = firstByte;
+    sd->Data++;
+    sd->Size--;
+    return SZ_OK;
+  }
+  RINOK(ReadNumber(sd, &value64));
+  if (value64 >= (UInt32)0x80000000 - 1)
+    return SZ_ERROR_UNSUPPORTED;
+  if (value64 >= ((UInt64)(1) << ((sizeof(size_t) - 1) * 8 + 4)))
+    return SZ_ERROR_UNSUPPORTED;
+  *value = (UInt32)value64;
+  return SZ_OK;
+}
+
+#define ReadID(sd, value) ReadNumber(sd, value)
+
+static SRes SkipData(CSzData *sd)
+{
+  UInt64 size;
+  RINOK(ReadNumber(sd, &size));
+  if (size > sd->Size)
+    return SZ_ERROR_ARCHIVE;
+  SKIP_DATA(sd, size);
+  return SZ_OK;
+}
+
+static SRes WaitId(CSzData *sd, UInt32 id)
+{
+  for (;;)
+  {
+    UInt64 type;
+    RINOK(ReadID(sd, &type));
+    if (type == id)
+      return SZ_OK;
+    if (type == k7zIdEnd)
+      return SZ_ERROR_ARCHIVE;
+    RINOK(SkipData(sd));
+  }
+}
+
+static SRes RememberBitVector(CSzData *sd, UInt32 numItems, const Byte **v)
+{
+  UInt32 numBytes = (numItems + 7) >> 3;
+  if (numBytes > sd->Size)
+    return SZ_ERROR_ARCHIVE;
+  *v = sd->Data;
+  SKIP_DATA(sd, numBytes);
+  return SZ_OK;
+}
+
+static UInt32 CountDefinedBits(const Byte *bits, UInt32 numItems)
+{
+  Byte b = 0;
+  unsigned m = 0;
+  UInt32 sum = 0;
+  for (; numItems != 0; numItems--)
+  {
+    if (m == 0)
+    {
+      b = *bits++;
+      m = 8;
+    }
+    m--;
+    sum += ((b >> m) & 1);
+  }
+  return sum;
+}
+
+static MY_NO_INLINE SRes ReadBitVector(CSzData *sd, UInt32 numItems, Byte **v, ISzAlloc *alloc)
+{
+  Byte allAreDefined;
+  Byte *v2;
+  UInt32 numBytes = (numItems + 7) >> 3;
+  *v = NULL;
+  SZ_READ_BYTE(allAreDefined);
+  if (numBytes == 0)
+    return SZ_OK;
+  if (allAreDefined == 0)
+  {
+    if (numBytes > sd->Size)
+      return SZ_ERROR_ARCHIVE;
+    MY_ALLOC_AND_CPY(*v, numBytes, sd->Data, alloc);
+    SKIP_DATA(sd, numBytes);
+    return SZ_OK;
+  }
+  MY_ALLOC(Byte, *v, numBytes, alloc);
+  v2 = *v;
+  memset(v2, 0xFF, (size_t)numBytes);
+  {
+    unsigned numBits = (unsigned)numItems & 7;
+    if (numBits != 0)
+      v2[numBytes - 1] = (Byte)((((UInt32)1 << numBits) - 1) << (8 - numBits));
+  }
+  return SZ_OK;
+}
+
+static MY_NO_INLINE SRes ReadUi32s(CSzData *sd2, UInt32 numItems, CSzBitUi32s *crcs, ISzAlloc *alloc)
+{
+  UInt32 i;
+  CSzData sd;
+  UInt32 *vals;
+  const Byte *defs;
+  MY_ALLOC_ZE(UInt32, crcs->Vals, numItems, alloc);
+  sd = *sd2;
+  defs = crcs->Defs;
+  vals = crcs->Vals;
+  for (i = 0; i < numItems; i++)
+    if (SzBitArray_Check(defs, i))
+    {
+      SZ_READ_32(vals[i]);
+    }
+    else
+      vals[i] = 0;
+  *sd2 = sd;
+  return SZ_OK;
+}
+
+static SRes ReadBitUi32s(CSzData *sd, UInt32 numItems, CSzBitUi32s *crcs, ISzAlloc *alloc)
+{
+  SzBitUi32s_Free(crcs, alloc);
+  RINOK(ReadBitVector(sd, numItems, &crcs->Defs, alloc));
+  return ReadUi32s(sd, numItems, crcs, alloc);
+}
+
+static SRes SkipBitUi32s(CSzData *sd, UInt32 numItems)
+{
+  Byte allAreDefined;
+  UInt32 numDefined = numItems;
+  SZ_READ_BYTE(allAreDefined);
+  if (!allAreDefined)
+  {
+    size_t numBytes = (numItems + 7) >> 3;
+    if (numBytes > sd->Size)
+      return SZ_ERROR_ARCHIVE;
+    numDefined = CountDefinedBits(sd->Data, numItems);
+    SKIP_DATA(sd, numBytes);
+  }
+  if (numDefined > (sd->Size >> 2))
+    return SZ_ERROR_ARCHIVE;
+  SKIP_DATA(sd, (size_t)numDefined * 4);
+  return SZ_OK;
+}
+
+static SRes ReadPackInfo(CSzAr *p, CSzData *sd, ISzAlloc *alloc)
+{
+  RINOK(SzReadNumber32(sd, &p->NumPackStreams));
+
+  RINOK(WaitId(sd, k7zIdSize));
+  MY_ALLOC(UInt64, p->PackPositions, (size_t)p->NumPackStreams + 1, alloc);
+  {
+    UInt64 sum = 0;
+    UInt32 i;
+    UInt32 numPackStreams = p->NumPackStreams;
+    for (i = 0; i < numPackStreams; i++)
+    {
+      UInt64 packSize;
+      p->PackPositions[i] = sum;
+      RINOK(ReadNumber(sd, &packSize));
+      sum += packSize;
+      if (sum < packSize)
+        return SZ_ERROR_ARCHIVE;
+    }
+    p->PackPositions[i] = sum;
+  }
+
+  for (;;)
+  {
+    UInt64 type;
+    RINOK(ReadID(sd, &type));
+    if (type == k7zIdEnd)
+      return SZ_OK;
+    if (type == k7zIdCRC)
+    {
+      /* CRC of packed streams is unused now */
+      RINOK(SkipBitUi32s(sd, p->NumPackStreams));
+      continue;
+    }
+    RINOK(SkipData(sd));
+  }
+}
+
+/*
+static SRes SzReadSwitch(CSzData *sd)
+{
+  Byte external;
+  RINOK(SzReadByte(sd, &external));
+  return (external == 0) ? SZ_OK: SZ_ERROR_UNSUPPORTED;
+}
+*/
+
+#define k_NumCodersStreams_in_Folder_MAX (SZ_NUM_BONDS_IN_FOLDER_MAX + SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX)
+
+SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd)
+{
+  UInt32 numCoders, i;
+  UInt32 numInStreams = 0;
+  const Byte *dataStart = sd->Data;
+
+  f->NumCoders = 0;
+  f->NumBonds = 0;
+  f->NumPackStreams = 0;
+  f->UnpackStream = 0;
+  
+  RINOK(SzReadNumber32(sd, &numCoders));
+  if (numCoders == 0 || numCoders > SZ_NUM_CODERS_IN_FOLDER_MAX)
+    return SZ_ERROR_UNSUPPORTED;
+  
+  for (i = 0; i < numCoders; i++)
+  {
+    Byte mainByte;
+    CSzCoderInfo *coder = f->Coders + i;
+    unsigned idSize, j;
+    UInt64 id;
+    
+    SZ_READ_BYTE(mainByte);
+    if ((mainByte & 0xC0) != 0)
+      return SZ_ERROR_UNSUPPORTED;
+    
+    idSize = (unsigned)(mainByte & 0xF);
+    if (idSize > sizeof(id))
+      return SZ_ERROR_UNSUPPORTED;
+    if (idSize > sd->Size)
+      return SZ_ERROR_ARCHIVE;
+    id = 0;
+    for (j = 0; j < idSize; j++)
+    {
+      id = ((id << 8) | *sd->Data);
+      sd->Data++;
+      sd->Size--;
+    }
+    if (id > (UInt32)0xFFFFFFFF)
+      return SZ_ERROR_UNSUPPORTED;
+    coder->MethodID = (UInt32)id;
+    
+    coder->NumStreams = 1;
+    coder->PropsOffset = 0;
+    coder->PropsSize = 0;
+    
+    if ((mainByte & 0x10) != 0)
+    {
+      UInt32 numStreams;
+      
+      RINOK(SzReadNumber32(sd, &numStreams));
+      if (numStreams > k_NumCodersStreams_in_Folder_MAX)
+        return SZ_ERROR_UNSUPPORTED;
+      coder->NumStreams = (Byte)numStreams;
+
+      RINOK(SzReadNumber32(sd, &numStreams));
+      if (numStreams != 1)
+        return SZ_ERROR_UNSUPPORTED;
+    }
+
+    numInStreams += coder->NumStreams;
+
+    if (numInStreams > k_NumCodersStreams_in_Folder_MAX)
+      return SZ_ERROR_UNSUPPORTED;
+
+    if ((mainByte & 0x20) != 0)
+    {
+      UInt32 propsSize = 0;
+      RINOK(SzReadNumber32(sd, &propsSize));
+      if (propsSize > sd->Size)
+        return SZ_ERROR_ARCHIVE;
+      if (propsSize >= 0x80)
+        return SZ_ERROR_UNSUPPORTED;
+      coder->PropsOffset = sd->Data - dataStart;
+      coder->PropsSize = (Byte)propsSize;
+      sd->Data += (size_t)propsSize;
+      sd->Size -= (size_t)propsSize;
+    }
+  }
+
+  /*
+  if (numInStreams == 1 && numCoders == 1)
+  {
+    f->NumPackStreams = 1;
+    f->PackStreams[0] = 0;
+  }
+  else
+  */
+  {
+    Byte streamUsed[k_NumCodersStreams_in_Folder_MAX];
+    UInt32 numBonds, numPackStreams;
+    
+    numBonds = numCoders - 1;
+    if (numInStreams < numBonds)
+      return SZ_ERROR_ARCHIVE;
+    if (numBonds > SZ_NUM_BONDS_IN_FOLDER_MAX)
+      return SZ_ERROR_UNSUPPORTED;
+    f->NumBonds = numBonds;
+    
+    numPackStreams = numInStreams - numBonds;
+    if (numPackStreams > SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX)
+      return SZ_ERROR_UNSUPPORTED;
+    f->NumPackStreams = numPackStreams;
+  
+    for (i = 0; i < numInStreams; i++)
+      streamUsed[i] = False;
+    
+    if (numBonds != 0)
+    {
+      Byte coderUsed[SZ_NUM_CODERS_IN_FOLDER_MAX];
+
+      for (i = 0; i < numCoders; i++)
+        coderUsed[i] = False;
+      
+      for (i = 0; i < numBonds; i++)
+      {
+        CSzBond *bp = f->Bonds + i;
+        
+        RINOK(SzReadNumber32(sd, &bp->InIndex));
+        if (bp->InIndex >= numInStreams || streamUsed[bp->InIndex])
+          return SZ_ERROR_ARCHIVE;
+        streamUsed[bp->InIndex] = True;
+        
+        RINOK(SzReadNumber32(sd, &bp->OutIndex));
+        if (bp->OutIndex >= numCoders || coderUsed[bp->OutIndex])
+          return SZ_ERROR_ARCHIVE;
+        coderUsed[bp->OutIndex] = True;
+      }
+      
+      for (i = 0; i < numCoders; i++)
+        if (!coderUsed[i])
+        {
+          f->UnpackStream = i;
+          break;
+        }
+      
+      if (i == numCoders)
+        return SZ_ERROR_ARCHIVE;
+    }
+    
+    if (numPackStreams == 1)
+    {
+      for (i = 0; i < numInStreams; i++)
+        if (!streamUsed[i])
+          break;
+      if (i == numInStreams)
+        return SZ_ERROR_ARCHIVE;
+      f->PackStreams[0] = i;
+    }
+    else
+      for (i = 0; i < numPackStreams; i++)
+      {
+        UInt32 index;
+        RINOK(SzReadNumber32(sd, &index));
+        if (index >= numInStreams || streamUsed[index])
+          return SZ_ERROR_ARCHIVE;
+        streamUsed[index] = True;
+        f->PackStreams[i] = index;
+      }
+  }
+
+  f->NumCoders = numCoders;
+
+  return SZ_OK;
+}
+
+
+static MY_NO_INLINE SRes SkipNumbers(CSzData *sd2, UInt32 num)
+{
+  CSzData sd;
+  sd = *sd2;
+  for (; num != 0; num--)
+  {
+    Byte firstByte, mask;
+    unsigned i;
+    SZ_READ_BYTE_2(firstByte);
+    if ((firstByte & 0x80) == 0)
+      continue;
+    if ((firstByte & 0x40) == 0)
+    {
+      if (sd.Size == 0)
+        return SZ_ERROR_ARCHIVE;
+      sd.Size--;
+      sd.Data++;
+      continue;
+    }
+    mask = 0x20;
+    for (i = 2; i < 8 && (firstByte & mask) != 0; i++)
+      mask >>= 1;
+    if (i > sd.Size)
+      return SZ_ERROR_ARCHIVE;
+    SKIP_DATA2(sd, i);
+  }
+  *sd2 = sd;
+  return SZ_OK;
+}
+
+
+#define k_Scan_NumCoders_MAX 64
+#define k_Scan_NumCodersStreams_in_Folder_MAX 64
+
+
+static SRes ReadUnpackInfo(CSzAr *p,
+    CSzData *sd2,
+    UInt32 numFoldersMax,
+    const CBuf *tempBufs, UInt32 numTempBufs,
+    ISzAlloc *alloc)
+{
+  CSzData sd;
+  
+  UInt32 fo, numFolders, numCodersOutStreams, packStreamIndex;
+  const Byte *startBufPtr;
+  Byte external;
+  
+  RINOK(WaitId(sd2, k7zIdFolder));
+  
+  RINOK(SzReadNumber32(sd2, &numFolders));
+  if (numFolders > numFoldersMax)
+    return SZ_ERROR_UNSUPPORTED;
+  p->NumFolders = numFolders;
+
+  SZ_READ_BYTE_SD(sd2, external);
+  if (external == 0)
+    sd = *sd2;
+  else
+  {
+    UInt32 index;
+    RINOK(SzReadNumber32(sd2, &index));
+    if (index >= numTempBufs)
+      return SZ_ERROR_ARCHIVE;
+    sd.Data = tempBufs[index].data;
+    sd.Size = tempBufs[index].size;
+  }
+  
+  MY_ALLOC(size_t, p->FoCodersOffsets, (size_t)numFolders + 1, alloc);
+  MY_ALLOC(UInt32, p->FoStartPackStreamIndex, (size_t)numFolders + 1, alloc);
+  MY_ALLOC(UInt32, p->FoToCoderUnpackSizes, (size_t)numFolders + 1, alloc);
+  MY_ALLOC(Byte, p->FoToMainUnpackSizeIndex, (size_t)numFolders, alloc);
+  
+  startBufPtr = sd.Data;
+  
+  packStreamIndex = 0;
+  numCodersOutStreams = 0;
+
+  for (fo = 0; fo < numFolders; fo++)
+  {
+    UInt32 numCoders, ci, numInStreams = 0;
+    
+    p->FoCodersOffsets[fo] = sd.Data - startBufPtr;
+    
+    RINOK(SzReadNumber32(&sd, &numCoders));
+    if (numCoders == 0 || numCoders > k_Scan_NumCoders_MAX)
+      return SZ_ERROR_UNSUPPORTED;
+    
+    for (ci = 0; ci < numCoders; ci++)
+    {
+      Byte mainByte;
+      unsigned idSize;
+      UInt32 coderInStreams;
+      
+      SZ_READ_BYTE_2(mainByte);
+      if ((mainByte & 0xC0) != 0)
+        return SZ_ERROR_UNSUPPORTED;
+      idSize = (mainByte & 0xF);
+      if (idSize > 8)
+        return SZ_ERROR_UNSUPPORTED;
+      if (idSize > sd.Size)
+        return SZ_ERROR_ARCHIVE;
+      SKIP_DATA2(sd, idSize);
+      
+      coderInStreams = 1;
+      
+      if ((mainByte & 0x10) != 0)
+      {
+        UInt32 coderOutStreams;
+        RINOK(SzReadNumber32(&sd, &coderInStreams));
+        RINOK(SzReadNumber32(&sd, &coderOutStreams));
+        if (coderInStreams > k_Scan_NumCodersStreams_in_Folder_MAX || coderOutStreams != 1)
+          return SZ_ERROR_UNSUPPORTED;
+      }
+      
+      numInStreams += coderInStreams;
+
+      if ((mainByte & 0x20) != 0)
+      {
+        UInt32 propsSize;
+        RINOK(SzReadNumber32(&sd, &propsSize));
+        if (propsSize > sd.Size)
+          return SZ_ERROR_ARCHIVE;
+        SKIP_DATA2(sd, propsSize);
+      }
+    }
+    
+    {
+      UInt32 indexOfMainStream = 0;
+      UInt32 numPackStreams = 1;
+      
+      if (numCoders != 1 || numInStreams != 1)
+      {
+        Byte streamUsed[k_Scan_NumCodersStreams_in_Folder_MAX];
+        Byte coderUsed[k_Scan_NumCoders_MAX];
+    
+        UInt32 i;
+        UInt32 numBonds = numCoders - 1;
+        if (numInStreams < numBonds)
+          return SZ_ERROR_ARCHIVE;
+        
+        if (numInStreams > k_Scan_NumCodersStreams_in_Folder_MAX)
+          return SZ_ERROR_UNSUPPORTED;
+        
+        for (i = 0; i < numInStreams; i++)
+          streamUsed[i] = False;
+        for (i = 0; i < numCoders; i++)
+          coderUsed[i] = False;
+        
+        for (i = 0; i < numBonds; i++)
+        {
+          UInt32 index;
+          
+          RINOK(SzReadNumber32(&sd, &index));
+          if (index >= numInStreams || streamUsed[index])
+            return SZ_ERROR_ARCHIVE;
+          streamUsed[index] = True;
+          
+          RINOK(SzReadNumber32(&sd, &index));
+          if (index >= numCoders || coderUsed[index])
+            return SZ_ERROR_ARCHIVE;
+          coderUsed[index] = True;
+        }
+        
+        numPackStreams = numInStreams - numBonds;
+        
+        if (numPackStreams != 1)
+          for (i = 0; i < numPackStreams; i++)
+          {
+            UInt32 index;
+            RINOK(SzReadNumber32(&sd, &index));
+            if (index >= numInStreams || streamUsed[index])
+              return SZ_ERROR_ARCHIVE;
+            streamUsed[index] = True;
+          }
+          
+        for (i = 0; i < numCoders; i++)
+          if (!coderUsed[i])
+          {
+            indexOfMainStream = i;
+            break;
+          }
+ 
+        if (i == numCoders)
+          return SZ_ERROR_ARCHIVE;
+      }
+      
+      p->FoStartPackStreamIndex[fo] = packStreamIndex;
+      p->FoToCoderUnpackSizes[fo] = numCodersOutStreams;
+      p->FoToMainUnpackSizeIndex[fo] = (Byte)indexOfMainStream;
+      numCodersOutStreams += numCoders;
+      if (numCodersOutStreams < numCoders)
+        return SZ_ERROR_UNSUPPORTED;
+      if (numPackStreams > p->NumPackStreams - packStreamIndex)
+        return SZ_ERROR_ARCHIVE;
+      packStreamIndex += numPackStreams;
+    }
+  }
+
+  p->FoToCoderUnpackSizes[fo] = numCodersOutStreams;
+  
+  {
+    size_t dataSize = sd.Data - startBufPtr;
+    p->FoStartPackStreamIndex[fo] = packStreamIndex;
+    p->FoCodersOffsets[fo] = dataSize;
+    MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc);
+  }
+  
+  if (external != 0)
+  {
+    if (sd.Size != 0)
+      return SZ_ERROR_ARCHIVE;
+    sd = *sd2;
+  }
+  
+  RINOK(WaitId(&sd, k7zIdCodersUnpackSize));
+  
+  MY_ALLOC_ZE(UInt64, p->CoderUnpackSizes, (size_t)numCodersOutStreams, alloc);
+  {
+    UInt32 i;
+    for (i = 0; i < numCodersOutStreams; i++)
+    {
+      RINOK(ReadNumber(&sd, p->CoderUnpackSizes + i));
+    }
+  }
+
+  for (;;)
+  {
+    UInt64 type;
+    RINOK(ReadID(&sd, &type));
+    if (type == k7zIdEnd)
+    {
+      *sd2 = sd;
+      return SZ_OK;
+    }
+    if (type == k7zIdCRC)
+    {
+      RINOK(ReadBitUi32s(&sd, numFolders, &p->FolderCRCs, alloc));
+      continue;
+    }
+    RINOK(SkipData(&sd));
+  }
+}
+
+
+UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex)
+{
+  return p->CoderUnpackSizes[p->FoToCoderUnpackSizes[folderIndex] + p->FoToMainUnpackSizeIndex[folderIndex]];
+}
+
+
+typedef struct
+{
+  UInt32 NumTotalSubStreams;
+  UInt32 NumSubDigests;
+  CSzData sdNumSubStreams;
+  CSzData sdSizes;
+  CSzData sdCRCs;
+} CSubStreamInfo;
+
+
+static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
+{
+  UInt64 type = 0;
+  UInt32 numSubDigests = 0;
+  UInt32 numFolders = p->NumFolders;
+  UInt32 numUnpackStreams = numFolders;
+  UInt32 numUnpackSizesInData = 0;
+
+  for (;;)
+  {
+    RINOK(ReadID(sd, &type));
+    if (type == k7zIdNumUnpackStream)
+    {
+      UInt32 i;
+      ssi->sdNumSubStreams.Data = sd->Data;
+      numUnpackStreams = 0;
+      numSubDigests = 0;
+      for (i = 0; i < numFolders; i++)
+      {
+        UInt32 numStreams;
+        RINOK(SzReadNumber32(sd, &numStreams));
+        if (numUnpackStreams > numUnpackStreams + numStreams)
+          return SZ_ERROR_UNSUPPORTED;
+        numUnpackStreams += numStreams;
+        if (numStreams != 0)
+          numUnpackSizesInData += (numStreams - 1);
+        if (numStreams != 1 || !SzBitWithVals_Check(&p->FolderCRCs, i))
+          numSubDigests += numStreams;
+      }
+      ssi->sdNumSubStreams.Size = sd->Data - ssi->sdNumSubStreams.Data;
+      continue;
+    }
+    if (type == k7zIdCRC || type == k7zIdSize || type == k7zIdEnd)
+      break;
+    RINOK(SkipData(sd));
+  }
+
+  if (!ssi->sdNumSubStreams.Data)
+  {
+    numSubDigests = numFolders;
+    if (p->FolderCRCs.Defs)
+      numSubDigests = numFolders - CountDefinedBits(p->FolderCRCs.Defs, numFolders);
+  }
+  
+  ssi->NumTotalSubStreams = numUnpackStreams;
+  ssi->NumSubDigests = numSubDigests;
+
+  if (type == k7zIdSize)
+  {
+    ssi->sdSizes.Data = sd->Data;
+    RINOK(SkipNumbers(sd, numUnpackSizesInData));
+    ssi->sdSizes.Size = sd->Data - ssi->sdSizes.Data;
+    RINOK(ReadID(sd, &type));
+  }
+
+  for (;;)
+  {
+    if (type == k7zIdEnd)
+      return SZ_OK;
+    if (type == k7zIdCRC)
+    {
+      ssi->sdCRCs.Data = sd->Data;
+      RINOK(SkipBitUi32s(sd, numSubDigests));
+      ssi->sdCRCs.Size = sd->Data - ssi->sdCRCs.Data;
+    }
+    else
+    {
+      RINOK(SkipData(sd));
+    }
+    RINOK(ReadID(sd, &type));
+  }
+}
+
+static SRes SzReadStreamsInfo(CSzAr *p,
+    CSzData *sd,
+    UInt32 numFoldersMax, const CBuf *tempBufs, UInt32 numTempBufs,
+    UInt64 *dataOffset,
+    CSubStreamInfo *ssi,
+    ISzAlloc *alloc)
+{
+  UInt64 type;
+
+  SzData_Clear(&ssi->sdSizes);
+  SzData_Clear(&ssi->sdCRCs);
+  SzData_Clear(&ssi->sdNumSubStreams);
+
+  *dataOffset = 0;
+  RINOK(ReadID(sd, &type));
+  if (type == k7zIdPackInfo)
+  {
+    RINOK(ReadNumber(sd, dataOffset));
+    RINOK(ReadPackInfo(p, sd, alloc));
+    RINOK(ReadID(sd, &type));
+  }
+  if (type == k7zIdUnpackInfo)
+  {
+    RINOK(ReadUnpackInfo(p, sd, numFoldersMax, tempBufs, numTempBufs, alloc));
+    RINOK(ReadID(sd, &type));
+  }
+  if (type == k7zIdSubStreamsInfo)
+  {
+    RINOK(ReadSubStreamsInfo(p, sd, ssi));
+    RINOK(ReadID(sd, &type));
+  }
+  else
+  {
+    ssi->NumTotalSubStreams = p->NumFolders;
+    // ssi->NumSubDigests = 0;
+  }
+
+  return (type == k7zIdEnd ? SZ_OK : SZ_ERROR_UNSUPPORTED);
+}
+
+static SRes SzReadAndDecodePackedStreams(
+    ILookInStream *inStream,
+    CSzData *sd,
+    CBuf *tempBufs,
+    UInt32 numFoldersMax,
+    UInt64 baseOffset,
+    CSzAr *p,
+    ISzAlloc *allocTemp)
+{
+  UInt64 dataStartPos;
+  UInt32 fo;
+  CSubStreamInfo ssi;
+
+  RINOK(SzReadStreamsInfo(p, sd, numFoldersMax, NULL, 0, &dataStartPos, &ssi, allocTemp));
+  
+  dataStartPos += baseOffset;
+  if (p->NumFolders == 0)
+    return SZ_ERROR_ARCHIVE;
+ 
+  for (fo = 0; fo < p->NumFolders; fo++)
+    Buf_Init(tempBufs + fo);
+  
+  for (fo = 0; fo < p->NumFolders; fo++)
+  {
+    CBuf *tempBuf = tempBufs + fo;
+    UInt64 unpackSize = SzAr_GetFolderUnpackSize(p, fo);
+    if ((size_t)unpackSize != unpackSize)
+      return SZ_ERROR_MEM;
+    if (!Buf_Create(tempBuf, (size_t)unpackSize, allocTemp))
+      return SZ_ERROR_MEM;
+  }
+  
+  for (fo = 0; fo < p->NumFolders; fo++)
+  {
+    const CBuf *tempBuf = tempBufs + fo;
+    RINOK(LookInStream_SeekTo(inStream, dataStartPos));
+    RINOK(SzAr_DecodeFolder(p, fo, inStream, dataStartPos, tempBuf->data, tempBuf->size, allocTemp));
+  }
+  
+  return SZ_OK;
+}
+
+static SRes SzReadFileNames(const Byte *data, size_t size, UInt32 numFiles, size_t *offsets)
+{
+  size_t pos = 0;
+  *offsets++ = 0;
+  if (numFiles == 0)
+    return (size == 0) ? SZ_OK : SZ_ERROR_ARCHIVE;
+  if (size < 2)
+    return SZ_ERROR_ARCHIVE;
+  if (data[size - 2] != 0 || data[size - 1] != 0)
+    return SZ_ERROR_ARCHIVE;
+  do
+  {
+    const Byte *p;
+    if (pos == size)
+      return SZ_ERROR_ARCHIVE;
+    for (p = data + pos;
+      #ifdef _WIN32
+      *(const UInt16 *)p != 0
+      #else
+      p[0] != 0 || p[1] != 0
+      #endif
+      ; p += 2);
+    pos = p - data + 2;
+    *offsets++ = (pos >> 1);
+  }
+  while (--numFiles);
+  return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE;
+}
+
+static MY_NO_INLINE SRes ReadTime(CSzBitUi64s *p, UInt32 num,
+    CSzData *sd2,
+    const CBuf *tempBufs, UInt32 numTempBufs,
+    ISzAlloc *alloc)
+{
+  CSzData sd;
+  UInt32 i;
+  CNtfsFileTime *vals;
+  Byte *defs;
+  Byte external;
+  
+  RINOK(ReadBitVector(sd2, num, &p->Defs, alloc));
+  
+  SZ_READ_BYTE_SD(sd2, external);
+  if (external == 0)
+    sd = *sd2;
+  else
+  {
+    UInt32 index;
+    RINOK(SzReadNumber32(sd2, &index));
+    if (index >= numTempBufs)
+      return SZ_ERROR_ARCHIVE;
+    sd.Data = tempBufs[index].data;
+    sd.Size = tempBufs[index].size;
+  }
+  
+  MY_ALLOC_ZE(CNtfsFileTime, p->Vals, num, alloc);
+  vals = p->Vals;
+  defs = p->Defs;
+  for (i = 0; i < num; i++)
+    if (SzBitArray_Check(defs, i))
+    {
+      if (sd.Size < 8)
+        return SZ_ERROR_ARCHIVE;
+      vals[i].Low = GetUi32(sd.Data);
+      vals[i].High = GetUi32(sd.Data + 4);
+      SKIP_DATA2(sd, 8);
+    }
+    else
+      vals[i].High = vals[i].Low = 0;
+  
+  if (external == 0)
+    *sd2 = sd;
+  
+  return SZ_OK;
+}
+
+
+#define NUM_ADDITIONAL_STREAMS_MAX 8
+
+
+static SRes SzReadHeader2(
+    CSzArEx *p,   /* allocMain */
+    CSzData *sd,
+    ILookInStream *inStream,
+    CBuf *tempBufs, UInt32 *numTempBufs,
+    ISzAlloc *allocMain,
+    ISzAlloc *allocTemp
+    )
+{
+  CSubStreamInfo ssi;
+
+{
+  UInt64 type;
+  
+  SzData_Clear(&ssi.sdSizes);
+  SzData_Clear(&ssi.sdCRCs);
+  SzData_Clear(&ssi.sdNumSubStreams);
+
+  ssi.NumSubDigests = 0;
+  ssi.NumTotalSubStreams = 0;
+
+  RINOK(ReadID(sd, &type));
+
+  if (type == k7zIdArchiveProperties)
+  {
+    for (;;)
+    {
+      UInt64 type2;
+      RINOK(ReadID(sd, &type2));
+      if (type2 == k7zIdEnd)
+        break;
+      RINOK(SkipData(sd));
+    }
+    RINOK(ReadID(sd, &type));
+  }
+
+  if (type == k7zIdAdditionalStreamsInfo)
+  {
+    CSzAr tempAr;
+    SRes res;
+    
+    SzAr_Init(&tempAr);
+    res = SzReadAndDecodePackedStreams(inStream, sd, tempBufs, NUM_ADDITIONAL_STREAMS_MAX,
+        p->startPosAfterHeader, &tempAr, allocTemp);
+    *numTempBufs = tempAr.NumFolders;
+    SzAr_Free(&tempAr, allocTemp);
+    
+    if (res != SZ_OK)
+      return res;
+    RINOK(ReadID(sd, &type));
+  }
+
+  if (type == k7zIdMainStreamsInfo)
+  {
+    RINOK(SzReadStreamsInfo(&p->db, sd, (UInt32)1 << 30, tempBufs, *numTempBufs,
+        &p->dataPos, &ssi, allocMain));
+    p->dataPos += p->startPosAfterHeader;
+    RINOK(ReadID(sd, &type));
+  }
+
+  if (type == k7zIdEnd)
+  {
+    return SZ_OK;
+  }
+
+  if (type != k7zIdFilesInfo)
+    return SZ_ERROR_ARCHIVE;
+}
+
+{
+  UInt32 numFiles = 0;
+  UInt32 numEmptyStreams = 0;
+  const Byte *emptyStreams = NULL;
+  const Byte *emptyFiles = NULL;
+  
+  RINOK(SzReadNumber32(sd, &numFiles));
+  p->NumFiles = numFiles;
+
+  for (;;)
+  {
+    UInt64 type;
+    UInt64 size;
+    RINOK(ReadID(sd, &type));
+    if (type == k7zIdEnd)
+      break;
+    RINOK(ReadNumber(sd, &size));
+    if (size > sd->Size)
+      return SZ_ERROR_ARCHIVE;
+    
+    if (type >= ((UInt32)1 << 8))
+    {
+      SKIP_DATA(sd, size);
+    }
+    else switch ((unsigned)type)
+    {
+      case k7zIdName:
+      {
+        size_t namesSize;
+        const Byte *namesData;
+        Byte external;
+
+        SZ_READ_BYTE(external);
+        if (external == 0)
+        {
+          namesSize = (size_t)size - 1;
+          namesData = sd->Data;
+        }
+        else
+        {
+          UInt32 index;
+          RINOK(SzReadNumber32(sd, &index));
+          if (index >= *numTempBufs)
+            return SZ_ERROR_ARCHIVE;
+          namesData = (tempBufs)[index].data;
+          namesSize = (tempBufs)[index].size;
+        }
+
+        if ((namesSize & 1) != 0)
+          return SZ_ERROR_ARCHIVE;
+        MY_ALLOC(size_t, p->FileNameOffsets, numFiles + 1, allocMain);
+        MY_ALLOC_ZE_AND_CPY(p->FileNames, namesSize, namesData, allocMain);
+        RINOK(SzReadFileNames(p->FileNames, namesSize, numFiles, p->FileNameOffsets))
+        if (external == 0)
+        {
+          SKIP_DATA(sd, namesSize);
+        }
+        break;
+      }
+      case k7zIdEmptyStream:
+      {
+        RINOK(RememberBitVector(sd, numFiles, &emptyStreams));
+        numEmptyStreams = CountDefinedBits(emptyStreams, numFiles);
+        emptyFiles = NULL;
+        break;
+      }
+      case k7zIdEmptyFile:
+      {
+        RINOK(RememberBitVector(sd, numEmptyStreams, &emptyFiles));
+        break;
+      }
+      case k7zIdWinAttrib:
+      {
+        Byte external;
+        CSzData sdSwitch;
+        CSzData *sdPtr;
+        SzBitUi32s_Free(&p->Attribs, allocMain);
+        RINOK(ReadBitVector(sd, numFiles, &p->Attribs.Defs, allocMain));
+
+        SZ_READ_BYTE(external);
+        if (external == 0)
+          sdPtr = sd;
+        else
+        {
+          UInt32 index;
+          RINOK(SzReadNumber32(sd, &index));
+          if (index >= *numTempBufs)
+            return SZ_ERROR_ARCHIVE;
+          sdSwitch.Data = (tempBufs)[index].data;
+          sdSwitch.Size = (tempBufs)[index].size;
+          sdPtr = &sdSwitch;
+        }
+        RINOK(ReadUi32s(sdPtr, numFiles, &p->Attribs, allocMain));
+        break;
+      }
+      /*
+      case k7zParent:
+      {
+        SzBitUi32s_Free(&p->Parents, allocMain);
+        RINOK(ReadBitVector(sd, numFiles, &p->Parents.Defs, allocMain));
+        RINOK(SzReadSwitch(sd));
+        RINOK(ReadUi32s(sd, numFiles, &p->Parents, allocMain));
+        break;
+      }
+      */
+      case k7zIdMTime: RINOK(ReadTime(&p->MTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)); break;
+      case k7zIdCTime: RINOK(ReadTime(&p->CTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)); break;
+      default:
+      {
+        SKIP_DATA(sd, size);
+      }
+    }
+  }
+
+  if (numFiles - numEmptyStreams != ssi.NumTotalSubStreams)
+    return SZ_ERROR_ARCHIVE;
+
+  for (;;)
+  {
+    UInt64 type;
+    RINOK(ReadID(sd, &type));
+    if (type == k7zIdEnd)
+      break;
+    RINOK(SkipData(sd));
+  }
+
+  {
+    UInt32 i;
+    UInt32 emptyFileIndex = 0;
+    UInt32 folderIndex = 0;
+    UInt32 remSubStreams = 0;
+    UInt32 numSubStreams = 0;
+    UInt64 unpackPos = 0;
+    const Byte *digestsDefs = NULL;
+    const Byte *digestsVals = NULL;
+    UInt32 digestsValsIndex = 0;
+    UInt32 digestIndex;
+    Byte allDigestsDefined = 0;
+    Byte isDirMask = 0;
+    Byte crcMask = 0;
+    Byte mask = 0x80;
+    
+    MY_ALLOC(UInt32, p->FolderToFile, p->db.NumFolders + 1, allocMain);
+    MY_ALLOC_ZE(UInt32, p->FileToFolder, p->NumFiles, allocMain);
+    MY_ALLOC(UInt64, p->UnpackPositions, p->NumFiles + 1, allocMain);
+    MY_ALLOC_ZE(Byte, p->IsDirs, (p->NumFiles + 7) >> 3, allocMain);
+
+    RINOK(SzBitUi32s_Alloc(&p->CRCs, p->NumFiles, allocMain));
+
+    if (ssi.sdCRCs.Size != 0)
+    {
+      SZ_READ_BYTE_SD(&ssi.sdCRCs, allDigestsDefined);
+      if (allDigestsDefined)
+        digestsVals = ssi.sdCRCs.Data;
+      else
+      {
+        size_t numBytes = (ssi.NumSubDigests + 7) >> 3;
+        digestsDefs = ssi.sdCRCs.Data;
+        digestsVals = digestsDefs + numBytes;
+      }
+    }
+
+    digestIndex = 0;
+    
+    for (i = 0; i < numFiles; i++, mask >>= 1)
+    {
+      if (mask == 0)
+      {
+        UInt32 byteIndex = (i - 1) >> 3;
+        p->IsDirs[byteIndex] = isDirMask;
+        p->CRCs.Defs[byteIndex] = crcMask;
+        isDirMask = 0;
+        crcMask = 0;
+        mask = 0x80;
+      }
+
+      p->UnpackPositions[i] = unpackPos;
+      p->CRCs.Vals[i] = 0;
+      
+      if (emptyStreams && SzBitArray_Check(emptyStreams, i))
+      {
+        if (emptyFiles)
+        {
+          if (!SzBitArray_Check(emptyFiles, emptyFileIndex))
+            isDirMask |= mask;
+          emptyFileIndex++;
+        }
+        else
+          isDirMask |= mask;
+        if (remSubStreams == 0)
+        {
+          p->FileToFolder[i] = (UInt32)-1;
+          continue;
+        }
+      }
+      
+      if (remSubStreams == 0)
+      {
+        for (;;)
+        {
+          if (folderIndex >= p->db.NumFolders)
+            return SZ_ERROR_ARCHIVE;
+          p->FolderToFile[folderIndex] = i;
+          numSubStreams = 1;
+          if (ssi.sdNumSubStreams.Data)
+          {
+            RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams));
+          }
+          remSubStreams = numSubStreams;
+          if (numSubStreams != 0)
+            break;
+          {
+            UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex);
+            unpackPos += folderUnpackSize;
+            if (unpackPos < folderUnpackSize)
+              return SZ_ERROR_ARCHIVE;
+          }
+
+          folderIndex++;
+        }
+      }
+      
+      p->FileToFolder[i] = folderIndex;
+      
+      if (emptyStreams && SzBitArray_Check(emptyStreams, i))
+        continue;
+      
+      if (--remSubStreams == 0)
+      {
+        UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex);
+        UInt64 startFolderUnpackPos = p->UnpackPositions[p->FolderToFile[folderIndex]];
+        if (folderUnpackSize < unpackPos - startFolderUnpackPos)
+          return SZ_ERROR_ARCHIVE;
+        unpackPos = startFolderUnpackPos + folderUnpackSize;
+        if (unpackPos < folderUnpackSize)
+          return SZ_ERROR_ARCHIVE;
+
+        if (numSubStreams == 1 && SzBitWithVals_Check(&p->db.FolderCRCs, i))
+        {
+          p->CRCs.Vals[i] = p->db.FolderCRCs.Vals[folderIndex];
+          crcMask |= mask;
+        }
+        else if (allDigestsDefined || (digestsDefs && SzBitArray_Check(digestsDefs, digestIndex)))
+        {
+          p->CRCs.Vals[i] = GetUi32(digestsVals + (size_t)digestsValsIndex * 4);
+          digestsValsIndex++;
+          crcMask |= mask;
+        }
+        
+        folderIndex++;
+      }
+      else
+      {
+        UInt64 v;
+        RINOK(ReadNumber(&ssi.sdSizes, &v));
+        unpackPos += v;
+        if (unpackPos < v)
+          return SZ_ERROR_ARCHIVE;
+        if (allDigestsDefined || (digestsDefs && SzBitArray_Check(digestsDefs, digestIndex)))
+        {
+          p->CRCs.Vals[i] = GetUi32(digestsVals + (size_t)digestsValsIndex * 4);
+          digestsValsIndex++;
+          crcMask |= mask;
+        }
+      }
+    }
+
+    if (mask != 0x80)
+    {
+      UInt32 byteIndex = (i - 1) >> 3;
+      p->IsDirs[byteIndex] = isDirMask;
+      p->CRCs.Defs[byteIndex] = crcMask;
+    }
+    
+    p->UnpackPositions[i] = unpackPos;
+
+    if (remSubStreams != 0)
+      return SZ_ERROR_ARCHIVE;
+
+    for (;;)
+    {
+      p->FolderToFile[folderIndex] = i;
+      if (folderIndex >= p->db.NumFolders)
+        break;
+      if (!ssi.sdNumSubStreams.Data)
+        return SZ_ERROR_ARCHIVE;
+      RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams));
+      if (numSubStreams != 0)
+        return SZ_ERROR_ARCHIVE;
+      /*
+      {
+        UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex);
+        unpackPos += folderUnpackSize;
+        if (unpackPos < folderUnpackSize)
+          return SZ_ERROR_ARCHIVE;
+      }
+      */
+      folderIndex++;
+    }
+
+    if (ssi.sdNumSubStreams.Data && ssi.sdNumSubStreams.Size != 0)
+      return SZ_ERROR_ARCHIVE;
+  }
+}
+  return SZ_OK;
+}
+
+
+static SRes SzReadHeader(
+    CSzArEx *p,
+    CSzData *sd,
+    ILookInStream *inStream,
+    ISzAlloc *allocMain,
+    ISzAlloc *allocTemp)
+{
+  UInt32 i;
+  UInt32 numTempBufs = 0;
+  SRes res;
+  CBuf tempBufs[NUM_ADDITIONAL_STREAMS_MAX];
+
+  for (i = 0; i < NUM_ADDITIONAL_STREAMS_MAX; i++)
+    Buf_Init(tempBufs + i);
+  
+  res = SzReadHeader2(p, sd, inStream,
+      tempBufs, &numTempBufs,
+      allocMain, allocTemp);
+  
+  for (i = 0; i < NUM_ADDITIONAL_STREAMS_MAX; i++)
+    Buf_Free(tempBufs + i, allocTemp);
+
+  RINOK(res);
+
+  if (sd->Size != 0)
+    return SZ_ERROR_FAIL;
+
+  return res;
+}
+
+static SRes SzArEx_Open2(
+    CSzArEx *p,
+    ILookInStream *inStream,
+    ISzAlloc *allocMain,
+    ISzAlloc *allocTemp)
+{
+  Byte header[k7zStartHeaderSize];
+  Int64 startArcPos;
+  UInt64 nextHeaderOffset, nextHeaderSize;
+  size_t nextHeaderSizeT;
+  UInt32 nextHeaderCRC;
+  CBuf buf;
+  SRes res;
+
+  startArcPos = 0;
+  RINOK(inStream->Seek(inStream, &startArcPos, SZ_SEEK_CUR));
+
+  RINOK(LookInStream_Read2(inStream, header, k7zStartHeaderSize, SZ_ERROR_NO_ARCHIVE));
+
+  if (!TestSignatureCandidate(header))
+    return SZ_ERROR_NO_ARCHIVE;
+  if (header[6] != k7zMajorVersion)
+    return SZ_ERROR_UNSUPPORTED;
+
+  nextHeaderOffset = GetUi64(header + 12);
+  nextHeaderSize = GetUi64(header + 20);
+  nextHeaderCRC = GetUi32(header + 28);
+
+  p->startPosAfterHeader = startArcPos + k7zStartHeaderSize;
+  
+  if (CrcCalc(header + 12, 20) != GetUi32(header + 8))
+    return SZ_ERROR_CRC;
+
+  nextHeaderSizeT = (size_t)nextHeaderSize;
+  if (nextHeaderSizeT != nextHeaderSize)
+    return SZ_ERROR_MEM;
+  if (nextHeaderSizeT == 0)
+    return SZ_OK;
+  if (nextHeaderOffset > nextHeaderOffset + nextHeaderSize ||
+      nextHeaderOffset > nextHeaderOffset + nextHeaderSize + k7zStartHeaderSize)
+    return SZ_ERROR_NO_ARCHIVE;
+
+  {
+    Int64 pos = 0;
+    RINOK(inStream->Seek(inStream, &pos, SZ_SEEK_END));
+    if ((UInt64)pos < startArcPos + nextHeaderOffset ||
+        (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset ||
+        (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize)
+      return SZ_ERROR_INPUT_EOF;
+  }
+
+  RINOK(LookInStream_SeekTo(inStream, startArcPos + k7zStartHeaderSize + nextHeaderOffset));
+
+  if (!Buf_Create(&buf, nextHeaderSizeT, allocTemp))
+    return SZ_ERROR_MEM;
+
+  res = LookInStream_Read(inStream, buf.data, nextHeaderSizeT);
+  
+  if (res == SZ_OK)
+  {
+    res = SZ_ERROR_ARCHIVE;
+    if (CrcCalc(buf.data, nextHeaderSizeT) == nextHeaderCRC)
+    {
+      CSzData sd;
+      UInt64 type;
+      sd.Data = buf.data;
+      sd.Size = buf.size;
+      
+      res = ReadID(&sd, &type);
+      
+      if (res == SZ_OK && type == k7zIdEncodedHeader)
+      {
+        CSzAr tempAr;
+        CBuf tempBuf;
+        Buf_Init(&tempBuf);
+        
+        SzAr_Init(&tempAr);
+        res = SzReadAndDecodePackedStreams(inStream, &sd, &tempBuf, 1, p->startPosAfterHeader, &tempAr, allocTemp);
+        SzAr_Free(&tempAr, allocTemp);
+       
+        if (res != SZ_OK)
+        {
+          Buf_Free(&tempBuf, allocTemp);
+        }
+        else
+        {
+          Buf_Free(&buf, allocTemp);
+          buf.data = tempBuf.data;
+          buf.size = tempBuf.size;
+          sd.Data = buf.data;
+          sd.Size = buf.size;
+          res = ReadID(&sd, &type);
+        }
+      }
+  
+      if (res == SZ_OK)
+      {
+        if (type == k7zIdHeader)
+        {
+          /*
+          CSzData sd2;
+          unsigned ttt;
+          for (ttt = 0; ttt < 40000; ttt++)
+          {
+            SzArEx_Free(p, allocMain);
+            sd2 = sd;
+            res = SzReadHeader(p, &sd2, inStream, allocMain, allocTemp);
+            if (res != SZ_OK)
+              break;
+          }
+          */
+          res = SzReadHeader(p, &sd, inStream, allocMain, allocTemp);
+        }
+        else
+          res = SZ_ERROR_UNSUPPORTED;
+      }
+    }
+  }
+ 
+  Buf_Free(&buf, allocTemp);
+  return res;
+}
+
+
+SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream,
+    ISzAlloc *allocMain, ISzAlloc *allocTemp)
+{
+  SRes res = SzArEx_Open2(p, inStream, allocMain, allocTemp);
+  if (res != SZ_OK)
+    SzArEx_Free(p, allocMain);
+  return res;
+}
+
+
+SRes SzArEx_Extract(
+    const CSzArEx *p,
+    ILookInStream *inStream,
+    UInt32 fileIndex,
+    UInt32 *blockIndex,
+    Byte **tempBuf,
+    size_t *outBufferSize,
+    size_t *offset,
+    size_t *outSizeProcessed,
+    ISzAlloc *allocMain,
+    ISzAlloc *allocTemp)
+{
+  UInt32 folderIndex = p->FileToFolder[fileIndex];
+  SRes res = SZ_OK;
+  
+  *offset = 0;
+  *outSizeProcessed = 0;
+  
+  if (folderIndex == (UInt32)-1)
+  {
+    IAlloc_Free(allocMain, *tempBuf);
+    *blockIndex = folderIndex;
+    *tempBuf = NULL;
+    *outBufferSize = 0;
+    return SZ_OK;
+  }
+
+  if (*tempBuf == NULL || *blockIndex != folderIndex)
+  {
+    UInt64 unpackSizeSpec = SzAr_GetFolderUnpackSize(&p->db, folderIndex);
+    /*
+    UInt64 unpackSizeSpec =
+        p->UnpackPositions[p->FolderToFile[folderIndex + 1]] -
+        p->UnpackPositions[p->FolderToFile[folderIndex]];
+    */
+    size_t unpackSize = (size_t)unpackSizeSpec;
+
+    if (unpackSize != unpackSizeSpec)
+      return SZ_ERROR_MEM;
+    *blockIndex = folderIndex;
+    IAlloc_Free(allocMain, *tempBuf);
+    *tempBuf = NULL;
+    
+    if (res == SZ_OK)
+    {
+      *outBufferSize = unpackSize;
+      if (unpackSize != 0)
+      {
+        *tempBuf = (Byte *)IAlloc_Alloc(allocMain, unpackSize);
+        if (*tempBuf == NULL)
+          res = SZ_ERROR_MEM;
+      }
+  
+      if (res == SZ_OK)
+      {
+        res = SzAr_DecodeFolder(&p->db, folderIndex,
+            inStream, p->dataPos, *tempBuf, unpackSize, allocTemp);
+      }
+    }
+  }
+
+  if (res == SZ_OK)
+  {
+    UInt64 unpackPos = p->UnpackPositions[fileIndex];
+    *offset = (size_t)(unpackPos - p->UnpackPositions[p->FolderToFile[folderIndex]]);
+    *outSizeProcessed = (size_t)(p->UnpackPositions[fileIndex + 1] - unpackPos);
+    if (*offset + *outSizeProcessed > *outBufferSize)
+      return SZ_ERROR_FAIL;
+    if (SzBitWithVals_Check(&p->CRCs, fileIndex))
+      if (CrcCalc(*tempBuf + *offset, *outSizeProcessed) != p->CRCs.Vals[fileIndex])
+        res = SZ_ERROR_CRC;
+  }
+
+  return res;
+}
+
+
+size_t SzArEx_GetFileNameUtf16(const CSzArEx *p, size_t fileIndex, UInt16 *dest)
+{
+  size_t offs = p->FileNameOffsets[fileIndex];
+  size_t len = p->FileNameOffsets[fileIndex + 1] - offs;
+  if (dest != 0)
+  {
+    size_t i;
+    const Byte *src = p->FileNames + offs * 2;
+    for (i = 0; i < len; i++)
+      dest[i] = GetUi16(src + i * 2);
+  }
+  return len;
+}
+
+/*
+size_t SzArEx_GetFullNameLen(const CSzArEx *p, size_t fileIndex)
+{
+  size_t len;
+  if (!p->FileNameOffsets)
+    return 1;
+  len = 0;
+  for (;;)
+  {
+    UInt32 parent = (UInt32)(Int32)-1;
+    len += p->FileNameOffsets[fileIndex + 1] - p->FileNameOffsets[fileIndex];
+    if SzBitWithVals_Check(&p->Parents, fileIndex)
+      parent = p->Parents.Vals[fileIndex];
+    if (parent == (UInt32)(Int32)-1)
+      return len;
+    fileIndex = parent;
+  }
+}
+
+UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16 *dest)
+{
+  Bool needSlash;
+  if (!p->FileNameOffsets)
+  {
+    *(--dest) = 0;
+    return dest;
+  }
+  needSlash = False;
+  for (;;)
+  {
+    UInt32 parent = (UInt32)(Int32)-1;
+    size_t curLen = p->FileNameOffsets[fileIndex + 1] - p->FileNameOffsets[fileIndex];
+    SzArEx_GetFileNameUtf16(p, fileIndex, dest - curLen);
+    if (needSlash)
+      *(dest - 1) = '/';
+    needSlash = True;
+    dest -= curLen;
+
+    if SzBitWithVals_Check(&p->Parents, fileIndex)
+      parent = p->Parents.Vals[fileIndex];
+    if (parent == (UInt32)(Int32)-1)
+      return dest;
+    fileIndex = parent;
+  }
+}
+*/
diff --git a/SevenZip/7zBuf.c b/SevenZip/7zBuf.c
new file mode 100644
index 0000000..089a5c4
--- /dev/null
+++ b/SevenZip/7zBuf.c
@@ -0,0 +1,36 @@
+/* 7zBuf.c -- Byte Buffer
+2013-01-21 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "7zBuf.h"
+
+void Buf_Init(CBuf *p)
+{
+  p->data = 0;
+  p->size = 0;
+}
+
+int Buf_Create(CBuf *p, size_t size, ISzAlloc *alloc)
+{
+  p->size = 0;
+  if (size == 0)
+  {
+    p->data = 0;
+    return 1;
+  }
+  p->data = (Byte *)alloc->Alloc(alloc, size);
+  if (p->data != 0)
+  {
+    p->size = size;
+    return 1;
+  }
+  return 0;
+}
+
+void Buf_Free(CBuf *p, ISzAlloc *alloc)
+{
+  alloc->Free(alloc, p->data);
+  p->data = 0;
+  p->size = 0;
+}
diff --git a/SevenZip/7zBuf.h b/SevenZip/7zBuf.h
new file mode 100644
index 0000000..65f1d7a
--- /dev/null
+++ b/SevenZip/7zBuf.h
@@ -0,0 +1,35 @@
+/* 7zBuf.h -- Byte Buffer
+2013-01-18 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_BUF_H
+#define __7Z_BUF_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+typedef struct
+{
+  Byte *data;
+  size_t size;
+} CBuf;
+
+void Buf_Init(CBuf *p);
+int Buf_Create(CBuf *p, size_t size, ISzAlloc *alloc);
+void Buf_Free(CBuf *p, ISzAlloc *alloc);
+
+typedef struct
+{
+  Byte *data;
+  size_t size;
+  size_t pos;
+} CDynBuf;
+
+void DynBuf_Construct(CDynBuf *p);
+void DynBuf_SeekToBeg(CDynBuf *p);
+int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAlloc *alloc);
+void DynBuf_Free(CDynBuf *p, ISzAlloc *alloc);
+
+EXTERN_C_END
+
+#endif
diff --git a/SevenZip/7zCrc.c b/SevenZip/7zCrc.c
new file mode 100644
index 0000000..dc6d6ab
--- /dev/null
+++ b/SevenZip/7zCrc.c
@@ -0,0 +1,128 @@
+/* 7zCrc.c -- CRC32 init
+2015-03-10 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "7zCrc.h"
+#include "CpuArch.h"
+
+#define kCrcPoly 0xEDB88320
+
+#ifdef MY_CPU_LE
+  #define CRC_NUM_TABLES 8
+#else
+  #define CRC_NUM_TABLES 9
+
+  #define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24))
+
+  UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
+  UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
+#endif
+
+#ifndef MY_CPU_BE
+  UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
+  UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
+#endif
+
+typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table);
+
+CRC_FUNC g_CrcUpdateT4;
+CRC_FUNC g_CrcUpdateT8;
+CRC_FUNC g_CrcUpdate;
+
+UInt32 g_CrcTable[256 * CRC_NUM_TABLES];
+
+UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void *data, size_t size)
+{
+  return g_CrcUpdate(v, data, size, g_CrcTable);
+}
+
+UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size)
+{
+  return g_CrcUpdate(CRC_INIT_VAL, data, size, g_CrcTable) ^ CRC_INIT_VAL;
+}
+
+#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
+
+UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+  const Byte *p = (const Byte *)data;
+  const Byte *pEnd = p + size;
+  for (; p != pEnd; p++)
+    v = CRC_UPDATE_BYTE_2(v, *p);
+  return v;
+}
+
+void MY_FAST_CALL CrcGenerateTable()
+{
+  UInt32 i;
+  for (i = 0; i < 256; i++)
+  {
+    UInt32 r = i;
+    unsigned j;
+    for (j = 0; j < 8; j++)
+      r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));
+    g_CrcTable[i] = r;
+  }
+  for (; i < 256 * CRC_NUM_TABLES; i++)
+  {
+    UInt32 r = g_CrcTable[i - 256];
+    g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8);
+  }
+
+  #if CRC_NUM_TABLES < 4
+  
+  g_CrcUpdate = CrcUpdateT1;
+  
+  #else
+ 
+  #ifdef MY_CPU_LE
+
+    g_CrcUpdateT4 = CrcUpdateT4;
+    g_CrcUpdate = CrcUpdateT4;
+
+    #if CRC_NUM_TABLES >= 8
+      g_CrcUpdateT8 = CrcUpdateT8;
+  
+      #ifdef MY_CPU_X86_OR_AMD64
+      if (!CPU_Is_InOrder())
+        g_CrcUpdate = CrcUpdateT8;
+      #endif
+    #endif
+
+  #else
+  {
+    #ifndef MY_CPU_BE
+    UInt32 k = 0x01020304;
+    const Byte *p = (const Byte *)&k;
+    if (p[0] == 4 && p[1] == 3)
+    {
+      g_CrcUpdateT4 = CrcUpdateT4;
+      g_CrcUpdate = CrcUpdateT4;
+      #if CRC_NUM_TABLES >= 8
+      g_CrcUpdateT8 = CrcUpdateT8;
+      // g_CrcUpdate = CrcUpdateT8;
+      #endif
+    }
+    else if (p[0] != 1 || p[1] != 2)
+      g_CrcUpdate = CrcUpdateT1;
+    else
+    #endif
+    {
+      for (i = 256 * CRC_NUM_TABLES - 1; i >= 256; i--)
+      {
+        UInt32 x = g_CrcTable[i - 256];
+        g_CrcTable[i] = CRC_UINT32_SWAP(x);
+      }
+      g_CrcUpdateT4 = CrcUpdateT1_BeT4;
+      g_CrcUpdate = CrcUpdateT1_BeT4;
+      #if CRC_NUM_TABLES >= 8
+      g_CrcUpdateT8 = CrcUpdateT1_BeT8;
+      // g_CrcUpdate = CrcUpdateT1_BeT8;
+      #endif
+    }
+  }
+  #endif
+
+  #endif
+}
diff --git a/SevenZip/7zCrc.h b/SevenZip/7zCrc.h
new file mode 100644
index 0000000..8fd5795
--- /dev/null
+++ b/SevenZip/7zCrc.h
@@ -0,0 +1,25 @@
+/* 7zCrc.h -- CRC32 calculation
+2013-01-18 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_CRC_H
+#define __7Z_CRC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+extern UInt32 g_CrcTable[];
+
+/* Call CrcGenerateTable one time before other CRC functions */
+void MY_FAST_CALL CrcGenerateTable(void);
+
+#define CRC_INIT_VAL 0xFFFFFFFF
+#define CRC_GET_DIGEST(crc) ((crc) ^ CRC_INIT_VAL)
+#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
+
+UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void *data, size_t size);
+UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size);
+
+EXTERN_C_END
+
+#endif
diff --git a/SevenZip/7zCrcOpt.c b/SevenZip/7zCrcOpt.c
new file mode 100644
index 0000000..d1e1cd7
--- /dev/null
+++ b/SevenZip/7zCrcOpt.c
@@ -0,0 +1,115 @@
+/* 7zCrcOpt.c -- CRC32 calculation
+2015-03-01 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "CpuArch.h"
+
+#ifndef MY_CPU_BE
+
+#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
+
+UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+  const Byte *p = (const Byte *)data;
+  for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2(v, *p);
+  for (; size >= 4; size -= 4, p += 4)
+  {
+    v ^= *(const UInt32 *)p;
+    v =
+          table[0x300 + ((v      ) & 0xFF)]
+        ^ table[0x200 + ((v >>  8) & 0xFF)]
+        ^ table[0x100 + ((v >> 16) & 0xFF)]
+        ^ table[0x000 + ((v >> 24))];
+  }
+  for (; size > 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2(v, *p);
+  return v;
+}
+
+UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+  const Byte *p = (const Byte *)data;
+  for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2(v, *p);
+  for (; size >= 8; size -= 8, p += 8)
+  {
+    UInt32 d;
+    v ^= *(const UInt32 *)p;
+    v =
+          table[0x700 + ((v      ) & 0xFF)]
+        ^ table[0x600 + ((v >>  8) & 0xFF)]
+        ^ table[0x500 + ((v >> 16) & 0xFF)]
+        ^ table[0x400 + ((v >> 24))];
+    d = *((const UInt32 *)p + 1);
+    v ^=
+          table[0x300 + ((d      ) & 0xFF)]
+        ^ table[0x200 + ((d >>  8) & 0xFF)]
+        ^ table[0x100 + ((d >> 16) & 0xFF)]
+        ^ table[0x000 + ((d >> 24))];
+  }
+  for (; size > 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2(v, *p);
+  return v;
+}
+
+#endif
+
+
+#ifndef MY_CPU_LE
+
+#define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24))
+
+#define CRC_UPDATE_BYTE_2_BE(crc, b) (table[(((crc) >> 24) ^ (b))] ^ ((crc) << 8))
+
+UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+  const Byte *p = (const Byte *)data;
+  table += 0x100;
+  v = CRC_UINT32_SWAP(v);
+  for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2_BE(v, *p);
+  for (; size >= 4; size -= 4, p += 4)
+  {
+    v ^= *(const UInt32 *)p;
+    v =
+          table[0x000 + ((v      ) & 0xFF)]
+        ^ table[0x100 + ((v >>  8) & 0xFF)]
+        ^ table[0x200 + ((v >> 16) & 0xFF)]
+        ^ table[0x300 + ((v >> 24))];
+  }
+  for (; size > 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2_BE(v, *p);
+  return CRC_UINT32_SWAP(v);
+}
+
+UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+  const Byte *p = (const Byte *)data;
+  table += 0x100;
+  v = CRC_UINT32_SWAP(v);
+  for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2_BE(v, *p);
+  for (; size >= 8; size -= 8, p += 8)
+  {
+    UInt32 d;
+    v ^= *(const UInt32 *)p;
+    v =
+          table[0x400 + ((v      ) & 0xFF)]
+        ^ table[0x500 + ((v >>  8) & 0xFF)]
+        ^ table[0x600 + ((v >> 16) & 0xFF)]
+        ^ table[0x700 + ((v >> 24))];
+    d = *((const UInt32 *)p + 1);
+    v ^=
+          table[0x000 + ((d      ) & 0xFF)]
+        ^ table[0x100 + ((d >>  8) & 0xFF)]
+        ^ table[0x200 + ((d >> 16) & 0xFF)]
+        ^ table[0x300 + ((d >> 24))];
+  }
+  for (; size > 0; size--, p++)
+    v = CRC_UPDATE_BYTE_2_BE(v, *p);
+  return CRC_UINT32_SWAP(v);
+}
+
+#endif
diff --git a/SevenZip/7zDec.c b/SevenZip/7zDec.c
new file mode 100644
index 0000000..c45d6bf
--- /dev/null
+++ b/SevenZip/7zDec.c
@@ -0,0 +1,591 @@
+/* 7zDec.c -- Decoding from 7z folder
+2015-11-18 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+/* #define _7ZIP_PPMD_SUPPPORT */
+
+#include "7z.h"
+#include "7zCrc.h"
+
+#include "Bcj2.h"
+#include "Bra.h"
+#include "CpuArch.h"
+#include "Delta.h"
+#include "LzmaDec.h"
+#include "Lzma2Dec.h"
+#ifdef _7ZIP_PPMD_SUPPPORT
+#include "Ppmd7.h"
+#endif
+
+#define k_Copy 0
+#define k_Delta 3
+#define k_LZMA2 0x21
+#define k_LZMA  0x30101
+#define k_BCJ   0x3030103
+#define k_BCJ2  0x303011B
+#define k_PPC   0x3030205
+#define k_IA64  0x3030401
+#define k_ARM   0x3030501
+#define k_ARMT  0x3030701
+#define k_SPARC 0x3030805
+
+
+#ifdef _7ZIP_PPMD_SUPPPORT
+
+#define k_PPMD 0x30401
+
+typedef struct
+{
+  IByteIn p;
+  const Byte *cur;
+  const Byte *end;
+  const Byte *begin;
+  UInt64 processed;
+  Bool extra;
+  SRes res;
+  ILookInStream *inStream;
+} CByteInToLook;
+
+static Byte ReadByte(void *pp)
+{
+  CByteInToLook *p = (CByteInToLook *)pp;
+  if (p->cur != p->end)
+    return *p->cur++;
+  if (p->res == SZ_OK)
+  {
+    size_t size = p->cur - p->begin;
+    p->processed += size;
+    p->res = p->inStream->Skip(p->inStream, size);
+    size = (1 << 25);
+    p->res = p->inStream->Look(p->inStream, (const void **)&p->begin, &size);
+    p->cur = p->begin;
+    p->end = p->begin + size;
+    if (size != 0)
+      return *p->cur++;;
+  }
+  p->extra = True;
+  return 0;
+}
+
+static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream,
+    Byte *outBuffer, SizeT outSize, ISzAlloc *allocMain)
+{
+  CPpmd7 ppmd;
+  CByteInToLook s;
+  SRes res = SZ_OK;
+
+  s.p.Read = ReadByte;
+  s.inStream = inStream;
+  s.begin = s.end = s.cur = NULL;
+  s.extra = False;
+  s.res = SZ_OK;
+  s.processed = 0;
+
+  if (propsSize != 5)
+    return SZ_ERROR_UNSUPPORTED;
+
+  {
+    unsigned order = props[0];
+    UInt32 memSize = GetUi32(props + 1);
+    if (order < PPMD7_MIN_ORDER ||
+        order > PPMD7_MAX_ORDER ||
+        memSize < PPMD7_MIN_MEM_SIZE ||
+        memSize > PPMD7_MAX_MEM_SIZE)
+      return SZ_ERROR_UNSUPPORTED;
+    Ppmd7_Construct(&ppmd);
+    if (!Ppmd7_Alloc(&ppmd, memSize, allocMain))
+      return SZ_ERROR_MEM;
+    Ppmd7_Init(&ppmd, order);
+  }
+  {
+    CPpmd7z_RangeDec rc;
+    Ppmd7z_RangeDec_CreateVTable(&rc);
+    rc.Stream = &s.p;
+    if (!Ppmd7z_RangeDec_Init(&rc))
+      res = SZ_ERROR_DATA;
+    else if (s.extra)
+      res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
+    else
+    {
+      SizeT i;
+      for (i = 0; i < outSize; i++)
+      {
+        int sym = Ppmd7_DecodeSymbol(&ppmd, &rc.p);
+        if (s.extra || sym < 0)
+          break;
+        outBuffer[i] = (Byte)sym;
+      }
+      if (i != outSize)
+        res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
+      else if (s.processed + (s.cur - s.begin) != inSize || !Ppmd7z_RangeDec_IsFinishedOK(&rc))
+        res = SZ_ERROR_DATA;
+    }
+  }
+  Ppmd7_Free(&ppmd, allocMain);
+  return res;
+}
+
+#endif
+
+
+static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream,
+    Byte *outBuffer, SizeT outSize, ISzAlloc *allocMain)
+{
+  CLzmaDec state;
+  SRes res = SZ_OK;
+
+  LzmaDec_Construct(&state);
+  RINOK(LzmaDec_AllocateProbs(&state, props, propsSize, allocMain));
+  state.dic = outBuffer;
+  state.dicBufSize = outSize;
+  LzmaDec_Init(&state);
+
+  for (;;)
+  {
+    const void *inBuf = NULL;
+    size_t lookahead = (1 << 18);
+    if (lookahead > inSize)
+      lookahead = (size_t)inSize;
+    res = inStream->Look(inStream, &inBuf, &lookahead);
+    if (res != SZ_OK)
+      break;
+
+    {
+      SizeT inProcessed = (SizeT)lookahead, dicPos = state.dicPos;
+      ELzmaStatus status;
+      res = LzmaDec_DecodeToDic(&state, outSize, inBuf, &inProcessed, LZMA_FINISH_END, &status);
+      lookahead -= inProcessed;
+      inSize -= inProcessed;
+      if (res != SZ_OK)
+        break;
+
+      if (status == LZMA_STATUS_FINISHED_WITH_MARK)
+      {
+        if (outSize != state.dicPos || inSize != 0)
+          res = SZ_ERROR_DATA;
+        break;
+      }
+
+      if (outSize == state.dicPos && inSize == 0 && status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK)
+        break;
+
+      if (inProcessed == 0 && dicPos == state.dicPos)
+      {
+        res = SZ_ERROR_DATA;
+        break;
+      }
+
+      res = inStream->Skip((void *)inStream, inProcessed);
+      if (res != SZ_OK)
+        break;
+    }
+  }
+
+  LzmaDec_FreeProbs(&state, allocMain);
+  return res;
+}
+
+
+#ifndef _7Z_NO_METHOD_LZMA2
+
+static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream,
+    Byte *outBuffer, SizeT outSize, ISzAlloc *allocMain)
+{
+  CLzma2Dec state;
+  SRes res = SZ_OK;
+
+  Lzma2Dec_Construct(&state);
+  if (propsSize != 1)
+    return SZ_ERROR_DATA;
+  RINOK(Lzma2Dec_AllocateProbs(&state, props[0], allocMain));
+  state.decoder.dic = outBuffer;
+  state.decoder.dicBufSize = outSize;
+  Lzma2Dec_Init(&state);
+
+  for (;;)
+  {
+    const void *inBuf = NULL;
+    size_t lookahead = (1 << 18);
+    if (lookahead > inSize)
+      lookahead = (size_t)inSize;
+    res = inStream->Look(inStream, &inBuf, &lookahead);
+    if (res != SZ_OK)
+      break;
+
+    {
+      SizeT inProcessed = (SizeT)lookahead, dicPos = state.decoder.dicPos;
+      ELzmaStatus status;
+      res = Lzma2Dec_DecodeToDic(&state, outSize, inBuf, &inProcessed, LZMA_FINISH_END, &status);
+      lookahead -= inProcessed;
+      inSize -= inProcessed;
+      if (res != SZ_OK)
+        break;
+
+      if (status == LZMA_STATUS_FINISHED_WITH_MARK)
+      {
+        if (outSize != state.decoder.dicPos || inSize != 0)
+          res = SZ_ERROR_DATA;
+        break;
+      }
+
+      if (inProcessed == 0 && dicPos == state.decoder.dicPos)
+      {
+        res = SZ_ERROR_DATA;
+        break;
+      }
+
+      res = inStream->Skip((void *)inStream, inProcessed);
+      if (res != SZ_OK)
+        break;
+    }
+  }
+
+  Lzma2Dec_FreeProbs(&state, allocMain);
+  return res;
+}
+
+#endif
+
+
+static SRes SzDecodeCopy(UInt64 inSize, ILookInStream *inStream, Byte *outBuffer)
+{
+  while (inSize > 0)
+  {
+    const void *inBuf;
+    size_t curSize = (1 << 18);
+    if (curSize > inSize)
+      curSize = (size_t)inSize;
+    RINOK(inStream->Look(inStream, &inBuf, &curSize));
+    if (curSize == 0)
+      return SZ_ERROR_INPUT_EOF;
+    memcpy(outBuffer, inBuf, curSize);
+    outBuffer += curSize;
+    inSize -= curSize;
+    RINOK(inStream->Skip((void *)inStream, curSize));
+  }
+  return SZ_OK;
+}
+
+static Bool IS_MAIN_METHOD(UInt32 m)
+{
+  switch (m)
+  {
+    case k_Copy:
+    case k_LZMA:
+    #ifndef _7Z_NO_METHOD_LZMA2
+    case k_LZMA2:
+    #endif
+    #ifdef _7ZIP_PPMD_SUPPPORT
+    case k_PPMD:
+    #endif
+      return True;
+  }
+  return False;
+}
+
+static Bool IS_SUPPORTED_CODER(const CSzCoderInfo *c)
+{
+  return
+      c->NumStreams == 1
+      /* && c->MethodID <= (UInt32)0xFFFFFFFF */
+      && IS_MAIN_METHOD((UInt32)c->MethodID);
+}
+
+#define IS_BCJ2(c) ((c)->MethodID == k_BCJ2 && (c)->NumStreams == 4)
+
+static SRes CheckSupportedFolder(const CSzFolder *f)
+{
+  if (f->NumCoders < 1 || f->NumCoders > 4)
+    return SZ_ERROR_UNSUPPORTED;
+  if (!IS_SUPPORTED_CODER(&f->Coders[0]))
+    return SZ_ERROR_UNSUPPORTED;
+  if (f->NumCoders == 1)
+  {
+    if (f->NumPackStreams != 1 || f->PackStreams[0] != 0 || f->NumBonds != 0)
+      return SZ_ERROR_UNSUPPORTED;
+    return SZ_OK;
+  }
+  
+  
+  #ifndef _7Z_NO_METHODS_FILTERS
+
+  if (f->NumCoders == 2)
+  {
+    const CSzCoderInfo *c = &f->Coders[1];
+    if (
+        /* c->MethodID > (UInt32)0xFFFFFFFF || */
+        c->NumStreams != 1
+        || f->NumPackStreams != 1
+        || f->PackStreams[0] != 0
+        || f->NumBonds != 1
+        || f->Bonds[0].InIndex != 1
+        || f->Bonds[0].OutIndex != 0)
+      return SZ_ERROR_UNSUPPORTED;
+    switch ((UInt32)c->MethodID)
+    {
+      case k_Delta:
+      case k_BCJ:
+      case k_PPC:
+      case k_IA64:
+      case k_SPARC:
+      case k_ARM:
+      case k_ARMT:
+        break;
+      default:
+        return SZ_ERROR_UNSUPPORTED;
+    }
+    return SZ_OK;
+  }
+
+  #endif
+
+  
+  if (f->NumCoders == 4)
+  {
+    if (!IS_SUPPORTED_CODER(&f->Coders[1])
+        || !IS_SUPPORTED_CODER(&f->Coders[2])
+        || !IS_BCJ2(&f->Coders[3]))
+      return SZ_ERROR_UNSUPPORTED;
+    if (f->NumPackStreams != 4
+        || f->PackStreams[0] != 2
+        || f->PackStreams[1] != 6
+        || f->PackStreams[2] != 1
+        || f->PackStreams[3] != 0
+        || f->NumBonds != 3
+        || f->Bonds[0].InIndex != 5 || f->Bonds[0].OutIndex != 0
+        || f->Bonds[1].InIndex != 4 || f->Bonds[1].OutIndex != 1
+        || f->Bonds[2].InIndex != 3 || f->Bonds[2].OutIndex != 2)
+      return SZ_ERROR_UNSUPPORTED;
+    return SZ_OK;
+  }
+  
+  return SZ_ERROR_UNSUPPORTED;
+}
+
+#define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break;
+
+static SRes SzFolder_Decode2(const CSzFolder *folder,
+    const Byte *propsData,
+    const UInt64 *unpackSizes,
+    const UInt64 *packPositions,
+    ILookInStream *inStream, UInt64 startPos,
+    Byte *outBuffer, SizeT outSize, ISzAlloc *allocMain,
+    Byte *tempBuf[])
+{
+  UInt32 ci;
+  SizeT tempSizes[3] = { 0, 0, 0};
+  SizeT tempSize3 = 0;
+  Byte *tempBuf3 = 0;
+
+  RINOK(CheckSupportedFolder(folder));
+
+  for (ci = 0; ci < folder->NumCoders; ci++)
+  {
+    const CSzCoderInfo *coder = &folder->Coders[ci];
+
+    if (IS_MAIN_METHOD((UInt32)coder->MethodID))
+    {
+      UInt32 si = 0;
+      UInt64 offset;
+      UInt64 inSize;
+      Byte *outBufCur = outBuffer;
+      SizeT outSizeCur = outSize;
+      if (folder->NumCoders == 4)
+      {
+        UInt32 indices[] = { 3, 2, 0 };
+        UInt64 unpackSize = unpackSizes[ci];
+        si = indices[ci];
+        if (ci < 2)
+        {
+          Byte *temp;
+          outSizeCur = (SizeT)unpackSize;
+          if (outSizeCur != unpackSize)
+            return SZ_ERROR_MEM;
+          temp = (Byte *)IAlloc_Alloc(allocMain, outSizeCur);
+          if (!temp && outSizeCur != 0)
+            return SZ_ERROR_MEM;
+          outBufCur = tempBuf[1 - ci] = temp;
+          tempSizes[1 - ci] = outSizeCur;
+        }
+        else if (ci == 2)
+        {
+          if (unpackSize > outSize) /* check it */
+            return SZ_ERROR_PARAM;
+          tempBuf3 = outBufCur = outBuffer + (outSize - (size_t)unpackSize);
+          tempSize3 = outSizeCur = (SizeT)unpackSize;
+        }
+        else
+          return SZ_ERROR_UNSUPPORTED;
+      }
+      offset = packPositions[si];
+      inSize = packPositions[si + 1] - offset;
+      RINOK(LookInStream_SeekTo(inStream, startPos + offset));
+
+      if (coder->MethodID == k_Copy)
+      {
+        if (inSize != outSizeCur) /* check it */
+          return SZ_ERROR_DATA;
+        RINOK(SzDecodeCopy(inSize, inStream, outBufCur));
+      }
+      else if (coder->MethodID == k_LZMA)
+      {
+        RINOK(SzDecodeLzma(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
+      }
+      #ifndef _7Z_NO_METHOD_LZMA2
+      else if (coder->MethodID == k_LZMA2)
+      {
+        RINOK(SzDecodeLzma2(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
+      }
+      #endif
+      #ifdef _7ZIP_PPMD_SUPPPORT
+      else if (coder->MethodID == k_PPMD)
+      {
+        RINOK(SzDecodePpmd(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
+      }
+      #endif
+      else
+        return SZ_ERROR_UNSUPPORTED;
+    }
+    else if (coder->MethodID == k_BCJ2)
+    {
+      UInt64 offset = packPositions[1];
+      UInt64 s3Size = packPositions[2] - offset;
+      
+      if (ci != 3)
+        return SZ_ERROR_UNSUPPORTED;
+      
+      tempSizes[2] = (SizeT)s3Size;
+      if (tempSizes[2] != s3Size)
+        return SZ_ERROR_MEM;
+      tempBuf[2] = (Byte *)IAlloc_Alloc(allocMain, tempSizes[2]);
+      if (!tempBuf[2] && tempSizes[2] != 0)
+        return SZ_ERROR_MEM;
+      
+      RINOK(LookInStream_SeekTo(inStream, startPos + offset));
+      RINOK(SzDecodeCopy(s3Size, inStream, tempBuf[2]));
+
+      if ((tempSizes[0] & 3) != 0 ||
+          (tempSizes[1] & 3) != 0 ||
+          tempSize3 + tempSizes[0] + tempSizes[1] != outSize)
+        return SZ_ERROR_DATA;
+
+      {
+        CBcj2Dec p;
+        
+        p.bufs[0] = tempBuf3;   p.lims[0] = tempBuf3 + tempSize3;
+        p.bufs[1] = tempBuf[0]; p.lims[1] = tempBuf[0] + tempSizes[0];
+        p.bufs[2] = tempBuf[1]; p.lims[2] = tempBuf[1] + tempSizes[1];
+        p.bufs[3] = tempBuf[2]; p.lims[3] = tempBuf[2] + tempSizes[2];
+        
+        p.dest = outBuffer;
+        p.destLim = outBuffer + outSize;
+        
+        Bcj2Dec_Init(&p);
+        RINOK(Bcj2Dec_Decode(&p));
+
+        {
+          unsigned i;
+          for (i = 0; i < 4; i++)
+            if (p.bufs[i] != p.lims[i])
+              return SZ_ERROR_DATA;
+          
+          if (!Bcj2Dec_IsFinished(&p))
+            return SZ_ERROR_DATA;
+
+          if (p.dest != p.destLim
+             || p.state != BCJ2_STREAM_MAIN)
+            return SZ_ERROR_DATA;
+        }
+      }
+    }
+    #ifndef _7Z_NO_METHODS_FILTERS
+    else if (ci == 1)
+    {
+      if (coder->MethodID == k_Delta)
+      {
+        if (coder->PropsSize != 1)
+          return SZ_ERROR_UNSUPPORTED;
+        {
+          Byte state[DELTA_STATE_SIZE];
+          Delta_Init(state);
+          Delta_Decode(state, (unsigned)(propsData[coder->PropsOffset]) + 1, outBuffer, outSize);
+        }
+      }
+      else
+      {
+        if (coder->PropsSize != 0)
+          return SZ_ERROR_UNSUPPORTED;
+        switch (coder->MethodID)
+        {
+          case k_BCJ:
+          {
+            UInt32 state;
+            x86_Convert_Init(state);
+            x86_Convert(outBuffer, outSize, 0, &state, 0);
+            break;
+          }
+          CASE_BRA_CONV(PPC)
+          CASE_BRA_CONV(IA64)
+          CASE_BRA_CONV(SPARC)
+          CASE_BRA_CONV(ARM)
+          CASE_BRA_CONV(ARMT)
+          default:
+            return SZ_ERROR_UNSUPPORTED;
+        }
+      }
+    }
+    #endif
+    else
+      return SZ_ERROR_UNSUPPORTED;
+  }
+
+  return SZ_OK;
+}
+
+
+SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,
+    ILookInStream *inStream, UInt64 startPos,
+    Byte *outBuffer, size_t outSize,
+    ISzAlloc *allocMain)
+{
+  SRes res;
+  CSzFolder folder;
+  CSzData sd;
+  
+  const Byte *data = p->CodersData + p->FoCodersOffsets[folderIndex];
+  sd.Data = data;
+  sd.Size = p->FoCodersOffsets[folderIndex + 1] - p->FoCodersOffsets[folderIndex];
+  
+  res = SzGetNextFolderItem(&folder, &sd);
+  
+  if (res != SZ_OK)
+    return res;
+
+  if (sd.Size != 0
+      || folder.UnpackStream != p->FoToMainUnpackSizeIndex[folderIndex]
+      || outSize != SzAr_GetFolderUnpackSize(p, folderIndex))
+    return SZ_ERROR_FAIL;
+  {
+    unsigned i;
+    Byte *tempBuf[3] = { 0, 0, 0};
+
+    res = SzFolder_Decode2(&folder, data,
+        &p->CoderUnpackSizes[p->FoToCoderUnpackSizes[folderIndex]],
+        p->PackPositions + p->FoStartPackStreamIndex[folderIndex],
+        inStream, startPos,
+        outBuffer, (SizeT)outSize, allocMain, tempBuf);
+    
+    for (i = 0; i < 3; i++)
+      IAlloc_Free(allocMain, tempBuf[i]);
+
+    if (res == SZ_OK)
+      if (SzBitWithVals_Check(&p->FolderCRCs, folderIndex))
+        if (CrcCalc(outBuffer, outSize) != p->FolderCRCs.Vals[folderIndex])
+          res = SZ_ERROR_CRC;
+
+    return res;
+  }
+}
diff --git a/SevenZip/7zFile.c b/SevenZip/7zFile.c
new file mode 100644
index 0000000..041e5b1
--- /dev/null
+++ b/SevenZip/7zFile.c
@@ -0,0 +1,286 @@
+/* 7zFile.c -- File IO
+2009-11-24 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "7zFile.h"
+
+#ifndef USE_WINDOWS_FILE
+
+#ifndef UNDER_CE
+#include <errno.h>
+#endif
+
+#else
+
+/*
+   ReadFile and WriteFile functions in Windows have BUG:
+   If you Read or Write 64MB or more (probably min_failure_size = 64MB - 32KB + 1)
+   from/to Network file, it returns ERROR_NO_SYSTEM_RESOURCES
+   (Insufficient system resources exist to complete the requested service).
+   Probably in some version of Windows there are problems with other sizes:
+   for 32 MB (maybe also for 16 MB).
+   And message can be "Network connection was lost"
+*/
+
+#define kChunkSizeMax (1 << 22)
+
+#endif
+
+void File_Construct(CSzFile *p)
+{
+  #ifdef USE_WINDOWS_FILE
+  p->handle = INVALID_HANDLE_VALUE;
+  #else
+  p->file = NULL;
+  #endif
+}
+
+#if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE)
+static WRes File_Open(CSzFile *p, const char *name, int writeMode)
+{
+  #ifdef USE_WINDOWS_FILE
+  p->handle = CreateFileA(name,
+      writeMode ? GENERIC_WRITE : GENERIC_READ,
+      FILE_SHARE_READ, NULL,
+      writeMode ? CREATE_ALWAYS : OPEN_EXISTING,
+      FILE_ATTRIBUTE_NORMAL, NULL);
+  return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError();
+  #else
+  p->file = fopen(name, writeMode ? "wb+" : "rb");
+  return (p->file != 0) ? 0 :
+    #ifdef UNDER_CE
+    2; /* ENOENT */
+    #else
+    errno;
+    #endif
+  #endif
+}
+
+WRes InFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 0); }
+WRes OutFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 1); }
+#endif
+
+#ifdef USE_WINDOWS_FILE
+static WRes File_OpenW(CSzFile *p, const WCHAR *name, int writeMode)
+{
+  p->handle = CreateFileW(name,
+      writeMode ? GENERIC_WRITE : GENERIC_READ,
+      FILE_SHARE_READ, NULL,
+      writeMode ? CREATE_ALWAYS : OPEN_EXISTING,
+      FILE_ATTRIBUTE_NORMAL, NULL);
+  return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError();
+}
+WRes InFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 0); }
+WRes OutFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 1); }
+#endif
+
+WRes File_Close(CSzFile *p)
+{
+  #ifdef USE_WINDOWS_FILE
+  if (p->handle != INVALID_HANDLE_VALUE)
+  {
+    if (!CloseHandle(p->handle))
+      return GetLastError();
+    p->handle = INVALID_HANDLE_VALUE;
+  }
+  #else
+  if (p->file != NULL)
+  {
+    int res = fclose(p->file);
+    if (res != 0)
+      return res;
+    p->file = NULL;
+  }
+  #endif
+  return 0;
+}
+
+WRes File_Read(CSzFile *p, void *data, size_t *size)
+{
+  size_t originalSize = *size;
+  if (originalSize == 0)
+    return 0;
+
+  #ifdef USE_WINDOWS_FILE
+
+  *size = 0;
+  do
+  {
+    DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
+    DWORD processed = 0;
+    BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL);
+    data = (void *)((Byte *)data + processed);
+    originalSize -= processed;
+    *size += processed;
+    if (!res)
+      return GetLastError();
+    if (processed == 0)
+      break;
+  }
+  while (originalSize > 0);
+  return 0;
+
+  #else
+  
+  *size = fread(data, 1, originalSize, p->file);
+  if (*size == originalSize)
+    return 0;
+  return ferror(p->file);
+  
+  #endif
+}
+
+WRes File_Write(CSzFile *p, const void *data, size_t *size)
+{
+  size_t originalSize = *size;
+  if (originalSize == 0)
+    return 0;
+  
+  #ifdef USE_WINDOWS_FILE
+
+  *size = 0;
+  do
+  {
+    DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
+    DWORD processed = 0;
+    BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL);
+    data = (void *)((Byte *)data + processed);
+    originalSize -= processed;
+    *size += processed;
+    if (!res)
+      return GetLastError();
+    if (processed == 0)
+      break;
+  }
+  while (originalSize > 0);
+  return 0;
+
+  #else
+
+  *size = fwrite(data, 1, originalSize, p->file);
+  if (*size == originalSize)
+    return 0;
+  return ferror(p->file);
+  
+  #endif
+}
+
+WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
+{
+  #ifdef USE_WINDOWS_FILE
+
+  LARGE_INTEGER value;
+  DWORD moveMethod;
+  value.LowPart = (DWORD)*pos;
+  value.HighPart = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */
+  switch (origin)
+  {
+    case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break;
+    case SZ_SEEK_CUR: moveMethod = FILE_CURRENT; break;
+    case SZ_SEEK_END: moveMethod = FILE_END; break;
+    default: return ERROR_INVALID_PARAMETER;
+  }
+  value.LowPart = SetFilePointer(p->handle, value.LowPart, &value.HighPart, moveMethod);
+  if (value.LowPart == 0xFFFFFFFF)
+  {
+    WRes res = GetLastError();
+    if (res != NO_ERROR)
+      return res;
+  }
+  *pos = ((Int64)value.HighPart << 32) | value.LowPart;
+  return 0;
+
+  #else
+  
+  int moveMethod;
+  int res;
+  switch (origin)
+  {
+    case SZ_SEEK_SET: moveMethod = SEEK_SET; break;
+    case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break;
+    case SZ_SEEK_END: moveMethod = SEEK_END; break;
+    default: return 1;
+  }
+  res = fseek(p->file, (long)*pos, moveMethod);
+  *pos = ftell(p->file);
+  return res;
+  
+  #endif
+}
+
+WRes File_GetLength(CSzFile *p, UInt64 *length)
+{
+  #ifdef USE_WINDOWS_FILE
+  
+  DWORD sizeHigh;
+  DWORD sizeLow = GetFileSize(p->handle, &sizeHigh);
+  if (sizeLow == 0xFFFFFFFF)
+  {
+    DWORD res = GetLastError();
+    if (res != NO_ERROR)
+      return res;
+  }
+  *length = (((UInt64)sizeHigh) << 32) + sizeLow;
+  return 0;
+  
+  #else
+  
+  long pos = ftell(p->file);
+  int res = fseek(p->file, 0, SEEK_END);
+  *length = ftell(p->file);
+  fseek(p->file, pos, SEEK_SET);
+  return res;
+  
+  #endif
+}
+
+
+/* ---------- FileSeqInStream ---------- */
+
+static SRes FileSeqInStream_Read(void *pp, void *buf, size_t *size)
+{
+  CFileSeqInStream *p = (CFileSeqInStream *)pp;
+  return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ;
+}
+
+void FileSeqInStream_CreateVTable(CFileSeqInStream *p)
+{
+  p->s.Read = FileSeqInStream_Read;
+}
+
+
+/* ---------- FileInStream ---------- */
+
+static SRes FileInStream_Read(void *pp, void *buf, size_t *size)
+{
+  CFileInStream *p = (CFileInStream *)pp;
+  return (File_Read(&p->file, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ;
+}
+
+static SRes FileInStream_Seek(void *pp, Int64 *pos, ESzSeek origin)
+{
+  CFileInStream *p = (CFileInStream *)pp;
+  return File_Seek(&p->file, pos, origin);
+}
+
+void FileInStream_CreateVTable(CFileInStream *p)
+{
+  p->s.Read = FileInStream_Read;
+  p->s.Seek = FileInStream_Seek;
+}
+
+
+/* ---------- FileOutStream ---------- */
+
+static size_t FileOutStream_Write(void *pp, const void *data, size_t size)
+{
+  CFileOutStream *p = (CFileOutStream *)pp;
+  File_Write(&p->file, data, &size);
+  return size;
+}
+
+void FileOutStream_CreateVTable(CFileOutStream *p)
+{
+  p->s.Write = FileOutStream_Write;
+}
diff --git a/SevenZip/7zFile.h b/SevenZip/7zFile.h
new file mode 100644
index 0000000..658987e
--- /dev/null
+++ b/SevenZip/7zFile.h
@@ -0,0 +1,83 @@
+/* 7zFile.h -- File IO
+2013-01-18 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_FILE_H
+#define __7Z_FILE_H
+
+#ifdef _WIN32
+#define USE_WINDOWS_FILE
+#endif
+
+#ifdef USE_WINDOWS_FILE
+#include <windows.h>
+#else
+#include <stdio.h>
+#endif
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/* ---------- File ---------- */
+
+typedef struct
+{
+  #ifdef USE_WINDOWS_FILE
+  HANDLE handle;
+  #else
+  FILE *file;
+  #endif
+} CSzFile;
+
+void File_Construct(CSzFile *p);
+#if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE)
+WRes InFile_Open(CSzFile *p, const char *name);
+WRes OutFile_Open(CSzFile *p, const char *name);
+#endif
+#ifdef USE_WINDOWS_FILE
+WRes InFile_OpenW(CSzFile *p, const WCHAR *name);
+WRes OutFile_OpenW(CSzFile *p, const WCHAR *name);
+#endif
+WRes File_Close(CSzFile *p);
+
+/* reads max(*size, remain file's size) bytes */
+WRes File_Read(CSzFile *p, void *data, size_t *size);
+
+/* writes *size bytes */
+WRes File_Write(CSzFile *p, const void *data, size_t *size);
+
+WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin);
+WRes File_GetLength(CSzFile *p, UInt64 *length);
+
+
+/* ---------- FileInStream ---------- */
+
+typedef struct
+{
+  ISeqInStream s;
+  CSzFile file;
+} CFileSeqInStream;
+
+void FileSeqInStream_CreateVTable(CFileSeqInStream *p);
+
+
+typedef struct
+{
+  ISeekInStream s;
+  CSzFile file;
+} CFileInStream;
+
+void FileInStream_CreateVTable(CFileInStream *p);
+
+
+typedef struct
+{
+  ISeqOutStream s;
+  CSzFile file;
+} CFileOutStream;
+
+void FileOutStream_CreateVTable(CFileOutStream *p);
+
+EXTERN_C_END
+
+#endif
diff --git a/SevenZip/7zMemBuffer.c b/SevenZip/7zMemBuffer.c
new file mode 100644
index 0000000..e2dbd0f
--- /dev/null
+++ b/SevenZip/7zMemBuffer.c
@@ -0,0 +1,53 @@
+#include "Precomp.h"
+#include "7zMemBuffer.h"
+#include <memory.h>
+
+WRes MemBuffer_Read(CSzMemBuffer *p, void *data, size_t *size)
+{
+	size_t originalSize = *size;
+	if(originalSize == 0)
+		return 0;
+
+	size_t length = (size_t)(p->pos + (Int64)(*size) > p->size ? p->size - p->pos - 1 : *size);
+	memcpy(data, (char*)(p->buffer) + p->pos, length);
+	p->pos += length;
+	return 0;
+}
+
+WRes MemBuffer_Seek(CSzMemBuffer *p, Int64 *pos, ESzSeek origin)
+{
+	switch(origin) {
+		case SZ_SEEK_SET: p->pos = 0 + *pos; break;
+		case SZ_SEEK_CUR: p->pos += *pos; break;
+		case SZ_SEEK_END: p->pos = p->size - *pos; break;
+		default: return 1;
+	}
+	*pos = p->pos;
+	return 0;
+}
+
+static SRes MemBufferInStream_Read(void *pp, void *buf, size_t *size)
+{
+	CMemBufferInStream *p = (CMemBufferInStream *)pp;
+	return (MemBuffer_Read(&p->buffer, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ;
+}
+
+static SRes MemBufferInStream_Seek(void *pp, Int64 *pos, ESzSeek origin)
+{
+	CMemBufferInStream *p = (CMemBufferInStream *)pp;
+	return MemBuffer_Seek(&p->buffer, pos, origin);
+}
+
+void MemBufferInit(CMemBufferInStream *memBuferStream, CLookToRead *lookStream, void* buffer, size_t size)
+{
+	memBuferStream->buffer.buffer = buffer;
+	memBuferStream->buffer.pos = 0;
+	memBuferStream->buffer.size = size;
+
+	memBuferStream->s.Read = MemBufferInStream_Read;
+	memBuferStream->s.Seek = MemBufferInStream_Seek;
+
+	LookToRead_CreateVTable(lookStream, False);
+	lookStream->realStream = &memBuferStream->s;
+	LookToRead_Init(lookStream);
+}
\ No newline at end of file
diff --git a/SevenZip/7zMemBuffer.h b/SevenZip/7zMemBuffer.h
new file mode 100644
index 0000000..6226ade
--- /dev/null
+++ b/SevenZip/7zMemBuffer.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/* ---------- File ---------- */
+
+typedef struct
+{
+	void* buffer;
+	Int64 size;
+	Int64 pos;
+} CSzMemBuffer;
+
+/* reads max(*size, remain file's size) bytes */
+WRes MemBuffer_Read(CSzMemBuffer *p, void *data, size_t *size);
+WRes MemBuffer_Seek(CSzMemBuffer *p, Int64 *pos, ESzSeek origin);
+
+/* ---------- FileInStream ---------- */
+typedef struct
+{
+  ISeekInStream s;
+  CSzMemBuffer buffer;
+} CMemBufferInStream;
+
+void MemBufferInit(CMemBufferInStream *memBuferStream, CLookToRead *lookStream, void* buffer, size_t size);
+
+EXTERN_C_END
diff --git a/SevenZip/7zStream.c b/SevenZip/7zStream.c
new file mode 100644
index 0000000..88f9c42
--- /dev/null
+++ b/SevenZip/7zStream.c
@@ -0,0 +1,171 @@
+/* 7zStream.c -- 7z Stream functions
+2013-11-12 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+#include "7zTypes.h"
+
+SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType)
+{
+  while (size != 0)
+  {
+    size_t processed = size;
+    RINOK(stream->Read(stream, buf, &processed));
+    if (processed == 0)
+      return errorType;
+    buf = (void *)((Byte *)buf + processed);
+    size -= processed;
+  }
+  return SZ_OK;
+}
+
+SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size)
+{
+  return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
+}
+
+SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf)
+{
+  size_t processed = 1;
+  RINOK(stream->Read(stream, buf, &processed));
+  return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF;
+}
+
+SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset)
+{
+  Int64 t = offset;
+  return stream->Seek(stream, &t, SZ_SEEK_SET);
+}
+
+SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size)
+{
+  const void *lookBuf;
+  if (*size == 0)
+    return SZ_OK;
+  RINOK(stream->Look(stream, &lookBuf, size));
+  memcpy(buf, lookBuf, *size);
+  return stream->Skip(stream, *size);
+}
+
+SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType)
+{
+  while (size != 0)
+  {
+    size_t processed = size;
+    RINOK(stream->Read(stream, buf, &processed));
+    if (processed == 0)
+      return errorType;
+    buf = (void *)((Byte *)buf + processed);
+    size -= processed;
+  }
+  return SZ_OK;
+}
+
+SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size)
+{
+  return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
+}
+
+static SRes LookToRead_Look_Lookahead(void *pp, const void **buf, size_t *size)
+{
+  SRes res = SZ_OK;
+  CLookToRead *p = (CLookToRead *)pp;
+  size_t size2 = p->size - p->pos;
+  if (size2 == 0 && *size > 0)
+  {
+    p->pos = 0;
+    size2 = LookToRead_BUF_SIZE;
+    res = p->realStream->Read(p->realStream, p->buf, &size2);
+    p->size = size2;
+  }
+  if (size2 < *size)
+    *size = size2;
+  *buf = p->buf + p->pos;
+  return res;
+}
+
+static SRes LookToRead_Look_Exact(void *pp, const void **buf, size_t *size)
+{
+  SRes res = SZ_OK;
+  CLookToRead *p = (CLookToRead *)pp;
+  size_t size2 = p->size - p->pos;
+  if (size2 == 0 && *size > 0)
+  {
+    p->pos = 0;
+    if (*size > LookToRead_BUF_SIZE)
+      *size = LookToRead_BUF_SIZE;
+    res = p->realStream->Read(p->realStream, p->buf, size);
+    size2 = p->size = *size;
+  }
+  if (size2 < *size)
+    *size = size2;
+  *buf = p->buf + p->pos;
+  return res;
+}
+
+static SRes LookToRead_Skip(void *pp, size_t offset)
+{
+  CLookToRead *p = (CLookToRead *)pp;
+  p->pos += offset;
+  return SZ_OK;
+}
+
+static SRes LookToRead_Read(void *pp, void *buf, size_t *size)
+{
+  CLookToRead *p = (CLookToRead *)pp;
+  size_t rem = p->size - p->pos;
+  if (rem == 0)
+    return p->realStream->Read(p->realStream, buf, size);
+  if (rem > *size)
+    rem = *size;
+  memcpy(buf, p->buf + p->pos, rem);
+  p->pos += rem;
+  *size = rem;
+  return SZ_OK;
+}
+
+static SRes LookToRead_Seek(void *pp, Int64 *pos, ESzSeek origin)
+{
+  CLookToRead *p = (CLookToRead *)pp;
+  p->pos = p->size = 0;
+  return p->realStream->Seek(p->realStream, pos, origin);
+}
+
+void LookToRead_CreateVTable(CLookToRead *p, int lookahead)
+{
+  p->s.Look = lookahead ?
+      LookToRead_Look_Lookahead :
+      LookToRead_Look_Exact;
+  p->s.Skip = LookToRead_Skip;
+  p->s.Read = LookToRead_Read;
+  p->s.Seek = LookToRead_Seek;
+}
+
+void LookToRead_Init(CLookToRead *p)
+{
+  p->pos = p->size = 0;
+}
+
+static SRes SecToLook_Read(void *pp, void *buf, size_t *size)
+{
+  CSecToLook *p = (CSecToLook *)pp;
+  return LookInStream_LookRead(p->realStream, buf, size);
+}
+
+void SecToLook_CreateVTable(CSecToLook *p)
+{
+  p->s.Read = SecToLook_Read;
+}
+
+static SRes SecToRead_Read(void *pp, void *buf, size_t *size)
+{
+  CSecToRead *p = (CSecToRead *)pp;
+  return p->realStream->Read(p->realStream, buf, size);
+}
+
+void SecToRead_CreateVTable(CSecToRead *p)
+{
+  p->s.Read = SecToRead_Read;
+}
diff --git a/SevenZip/7zTypes.h b/SevenZip/7zTypes.h
new file mode 100644
index 0000000..778413e
--- /dev/null
+++ b/SevenZip/7zTypes.h
@@ -0,0 +1,256 @@
+/* 7zTypes.h -- Basic types
+2013-11-12 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_TYPES_H
+#define __7Z_TYPES_H
+
+#ifdef _WIN32
+/* #include <windows.h> */
+#endif
+
+#include <stddef.h>
+
+#ifndef EXTERN_C_BEGIN
+#ifdef __cplusplus
+#define EXTERN_C_BEGIN extern "C" {
+#define EXTERN_C_END }
+#else
+#define EXTERN_C_BEGIN
+#define EXTERN_C_END
+#endif
+#endif
+
+EXTERN_C_BEGIN
+
+#define SZ_OK 0
+
+#define SZ_ERROR_DATA 1
+#define SZ_ERROR_MEM 2
+#define SZ_ERROR_CRC 3
+#define SZ_ERROR_UNSUPPORTED 4
+#define SZ_ERROR_PARAM 5
+#define SZ_ERROR_INPUT_EOF 6
+#define SZ_ERROR_OUTPUT_EOF 7
+#define SZ_ERROR_READ 8
+#define SZ_ERROR_WRITE 9
+#define SZ_ERROR_PROGRESS 10
+#define SZ_ERROR_FAIL 11
+#define SZ_ERROR_THREAD 12
+
+#define SZ_ERROR_ARCHIVE 16
+#define SZ_ERROR_NO_ARCHIVE 17
+
+typedef int SRes;
+
+#ifdef _WIN32
+/* typedef DWORD WRes; */
+typedef unsigned WRes;
+#else
+typedef int WRes;
+#endif
+
+#ifndef RINOK
+#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
+#endif
+
+typedef unsigned char Byte;
+typedef short Int16;
+typedef unsigned short UInt16;
+
+#ifdef _LZMA_UINT32_IS_ULONG
+typedef long Int32;
+typedef unsigned long UInt32;
+#else
+typedef int Int32;
+typedef unsigned int UInt32;
+#endif
+
+#ifdef _SZ_NO_INT_64
+
+/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
+   NOTES: Some code will work incorrectly in that case! */
+
+typedef long Int64;
+typedef unsigned long UInt64;
+
+#else
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+typedef __int64 Int64;
+typedef unsigned __int64 UInt64;
+#define UINT64_CONST(n) n
+#else
+typedef long long int Int64;
+typedef unsigned long long int UInt64;
+#define UINT64_CONST(n) n ## ULL
+#endif
+
+#endif
+
+#ifdef _LZMA_NO_SYSTEM_SIZE_T
+typedef UInt32 SizeT;
+#else
+typedef size_t SizeT;
+#endif
+
+typedef int Bool;
+#define True 1
+#define False 0
+
+
+#ifdef _WIN32
+#define MY_STD_CALL __stdcall
+#else
+#define MY_STD_CALL
+#endif
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1300
+#define MY_NO_INLINE __declspec(noinline)
+#else
+#define MY_NO_INLINE
+#endif
+
+#define MY_CDECL __cdecl
+#define MY_FAST_CALL __fastcall
+
+#else
+
+#define MY_NO_INLINE
+#define MY_CDECL
+#define MY_FAST_CALL
+
+#endif
+
+
+/* The following interfaces use first parameter as pointer to structure */
+
+typedef struct
+{
+  Byte (*Read)(void *p); /* reads one byte, returns 0 in case of EOF or error */
+} IByteIn;
+
+typedef struct
+{
+  void (*Write)(void *p, Byte b);
+} IByteOut;
+
+typedef struct
+{
+  SRes (*Read)(void *p, void *buf, size_t *size);
+    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+       (output(*size) < input(*size)) is allowed */
+} ISeqInStream;
+
+/* it can return SZ_ERROR_INPUT_EOF */
+SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size);
+SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType);
+SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf);
+
+typedef struct
+{
+  size_t (*Write)(void *p, const void *buf, size_t size);
+    /* Returns: result - the number of actually written bytes.
+       (result < size) means error */
+} ISeqOutStream;
+
+typedef enum
+{
+  SZ_SEEK_SET = 0,
+  SZ_SEEK_CUR = 1,
+  SZ_SEEK_END = 2
+} ESzSeek;
+
+typedef struct
+{
+  SRes (*Read)(void *p, void *buf, size_t *size);  /* same as ISeqInStream::Read */
+  SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
+} ISeekInStream;
+
+typedef struct
+{
+  SRes (*Look)(void *p, const void **buf, size_t *size);
+    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+       (output(*size) > input(*size)) is not allowed
+       (output(*size) < input(*size)) is allowed */
+  SRes (*Skip)(void *p, size_t offset);
+    /* offset must be <= output(*size) of Look */
+
+  SRes (*Read)(void *p, void *buf, size_t *size);
+    /* reads directly (without buffer). It's same as ISeqInStream::Read */
+  SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin);
+} ILookInStream;
+
+SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size);
+SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset);
+
+/* reads via ILookInStream::Read */
+SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType);
+SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size);
+
+#define LookToRead_BUF_SIZE (1 << 14)
+
+typedef struct
+{
+  ILookInStream s;
+  ISeekInStream *realStream;
+  size_t pos;
+  size_t size;
+  Byte buf[LookToRead_BUF_SIZE];
+} CLookToRead;
+
+void LookToRead_CreateVTable(CLookToRead *p, int lookahead);
+void LookToRead_Init(CLookToRead *p);
+
+typedef struct
+{
+  ISeqInStream s;
+  ILookInStream *realStream;
+} CSecToLook;
+
+void SecToLook_CreateVTable(CSecToLook *p);
+
+typedef struct
+{
+  ISeqInStream s;
+  ILookInStream *realStream;
+} CSecToRead;
+
+void SecToRead_CreateVTable(CSecToRead *p);
+
+typedef struct
+{
+  SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize);
+    /* Returns: result. (result != SZ_OK) means break.
+       Value (UInt64)(Int64)-1 for size means unknown value. */
+} ICompressProgress;
+
+typedef struct
+{
+  void *(*Alloc)(void *p, size_t size);
+  void (*Free)(void *p, void *address); /* address can be 0 */
+} ISzAlloc;
+
+#define IAlloc_Alloc(p, size) (p)->Alloc((p), size)
+#define IAlloc_Free(p, a) (p)->Free((p), a)
+
+#ifdef _WIN32
+
+#define CHAR_PATH_SEPARATOR '\\'
+#define WCHAR_PATH_SEPARATOR L'\\'
+#define STRING_PATH_SEPARATOR "\\"
+#define WSTRING_PATH_SEPARATOR L"\\"
+
+#else
+
+#define CHAR_PATH_SEPARATOR '/'
+#define WCHAR_PATH_SEPARATOR L'/'
+#define STRING_PATH_SEPARATOR "/"
+#define WSTRING_PATH_SEPARATOR L"/"
+
+#endif
+
+EXTERN_C_END
+
+#endif
diff --git a/SevenZip/Bcj2.c b/SevenZip/Bcj2.c
new file mode 100644
index 0000000..3c88e44
--- /dev/null
+++ b/SevenZip/Bcj2.c
@@ -0,0 +1,256 @@
+/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
+2015-08-01 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Bcj2.h"
+#include "CpuArch.h"
+
+#define CProb UInt16
+
+#define kTopValue ((UInt32)1 << 24)
+#define kNumModelBits 11
+#define kBitModelTotal (1 << kNumModelBits)
+#define kNumMoveBits 5
+
+#define _IF_BIT_0 ttt = *prob; bound = (p->range >> kNumModelBits) * ttt; if (p->code < bound)
+#define _UPDATE_0 p->range = bound; *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+#define _UPDATE_1 p->range -= bound; p->code -= bound; *prob = (CProb)(ttt - (ttt >> kNumMoveBits));
+
+void Bcj2Dec_Init(CBcj2Dec *p)
+{
+  unsigned i;
+
+  p->state = BCJ2_DEC_STATE_OK;
+  p->ip = 0;
+  p->temp[3] = 0;
+  p->range = 0;
+  p->code = 0;
+  for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
+    p->probs[i] = kBitModelTotal >> 1;
+}
+
+SRes Bcj2Dec_Decode(CBcj2Dec *p)
+{
+  if (p->range <= 5)
+  {
+    p->state = BCJ2_DEC_STATE_OK;
+    for (; p->range != 5; p->range++)
+    {
+      if (p->range == 1 && p->code != 0)
+        return SZ_ERROR_DATA;
+      
+      if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
+      {
+        p->state = BCJ2_STREAM_RC;
+        return SZ_OK;
+      }
+
+      p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
+    }
+    
+    if (p->code == 0xFFFFFFFF)
+      return SZ_ERROR_DATA;
+    
+    p->range = 0xFFFFFFFF;
+  }
+  else if (p->state >= BCJ2_DEC_STATE_ORIG_0)
+  {
+    while (p->state <= BCJ2_DEC_STATE_ORIG_3)
+    {
+      Byte *dest = p->dest;
+      if (dest == p->destLim)
+        return SZ_OK;
+      *dest = p->temp[p->state++ - BCJ2_DEC_STATE_ORIG_0];
+      p->dest = dest + 1;
+    }
+  }
+
+  /*
+  if (BCJ2_IS_32BIT_STREAM(p->state))
+  {
+    const Byte *cur = p->bufs[p->state];
+    if (cur == p->lims[p->state])
+      return SZ_OK;
+    p->bufs[p->state] = cur + 4;
+    
+    {
+      UInt32 val;
+      Byte *dest;
+      SizeT rem;
+      
+      p->ip += 4;
+      val = GetBe32(cur) - p->ip;
+      dest = p->dest;
+      rem = p->destLim - dest;
+      if (rem < 4)
+      {
+        SizeT i;
+        SetUi32(p->temp, val);
+        for (i = 0; i < rem; i++)
+          dest[i] = p->temp[i];
+        p->dest = dest + rem;
+        p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
+        return SZ_OK;
+      }
+      SetUi32(dest, val);
+      p->temp[3] = (Byte)(val >> 24);
+      p->dest = dest + 4;
+      p->state = BCJ2_DEC_STATE_OK;
+    }
+  }
+  */
+
+  for (;;)
+  {
+    if (BCJ2_IS_32BIT_STREAM(p->state))
+      p->state = BCJ2_DEC_STATE_OK;
+    else
+    {
+      if (p->range < kTopValue)
+      {
+        if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
+        {
+          p->state = BCJ2_STREAM_RC;
+          return SZ_OK;
+        }
+        p->range <<= 8;
+        p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
+      }
+
+      {
+        const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
+        const Byte *srcLim;
+        Byte *dest;
+        SizeT num = p->lims[BCJ2_STREAM_MAIN] - src;
+        
+        if (num == 0)
+        {
+          p->state = BCJ2_STREAM_MAIN;
+          return SZ_OK;
+        }
+        
+        dest = p->dest;
+        if (num > (SizeT)(p->destLim - dest))
+        {
+          num = p->destLim - dest;
+          if (num == 0)
+          {
+            p->state = BCJ2_DEC_STATE_ORIG;
+            return SZ_OK;
+          }
+        }
+       
+        srcLim = src + num;
+
+        if (p->temp[3] == 0x0F && (src[0] & 0xF0) == 0x80)
+          *dest = src[0];
+        else for (;;)
+        {
+          Byte b = *src;
+          *dest = b;
+          if (b != 0x0F)
+          {
+            if ((b & 0xFE) == 0xE8)
+              break;
+            dest++;
+            if (++src != srcLim)
+              continue;
+            break;
+          }
+          dest++;
+          if (++src == srcLim)
+            break;
+          if ((*src & 0xF0) != 0x80)
+            continue;
+          *dest = *src;
+          break;
+        }
+        
+        num = src - p->bufs[BCJ2_STREAM_MAIN];
+        
+        if (src == srcLim)
+        {
+          p->temp[3] = src[-1];
+          p->bufs[BCJ2_STREAM_MAIN] = src;
+          p->ip += (UInt32)num;
+          p->dest += num;
+          p->state =
+            p->bufs[BCJ2_STREAM_MAIN] ==
+            p->lims[BCJ2_STREAM_MAIN] ?
+              (unsigned)BCJ2_STREAM_MAIN :
+              (unsigned)BCJ2_DEC_STATE_ORIG;
+          return SZ_OK;
+        }
+        
+        {
+          UInt32 bound, ttt;
+          CProb *prob;
+          Byte b = src[0];
+          Byte prev = (Byte)(num == 0 ? p->temp[3] : src[-1]);
+          
+          p->temp[3] = b;
+          p->bufs[BCJ2_STREAM_MAIN] = src + 1;
+          num++;
+          p->ip += (UInt32)num;
+          p->dest += num;
+          
+          prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)prev : (b == 0xE9 ? 1 : 0));
+          
+          _IF_BIT_0
+          {
+            _UPDATE_0
+            continue;
+          }
+          _UPDATE_1
+            
+        }
+      }
+    }
+
+    {
+      UInt32 val;
+      unsigned cj = (p->temp[3] == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
+      const Byte *cur = p->bufs[cj];
+      Byte *dest;
+      SizeT rem;
+      
+      if (cur == p->lims[cj])
+      {
+        p->state = cj;
+        break;
+      }
+      
+      val = GetBe32(cur);
+      p->bufs[cj] = cur + 4;
+
+      p->ip += 4;
+      val -= p->ip;
+      dest = p->dest;
+      rem = p->destLim - dest;
+      
+      if (rem < 4)
+      {
+        SizeT i;
+        SetUi32(p->temp, val);
+        for (i = 0; i < rem; i++)
+          dest[i] = p->temp[i];
+        p->dest = dest + rem;
+        p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
+        break;
+      }
+      
+      SetUi32(dest, val);
+      p->temp[3] = (Byte)(val >> 24);
+      p->dest = dest + 4;
+    }
+  }
+
+  if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC])
+  {
+    p->range <<= 8;
+    p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
+  }
+
+  return SZ_OK;
+}
diff --git a/SevenZip/Bcj2.h b/SevenZip/Bcj2.h
new file mode 100644
index 0000000..8824080
--- /dev/null
+++ b/SevenZip/Bcj2.h
@@ -0,0 +1,146 @@
+/* Bcj2.h -- BCJ2 Converter for x86 code
+2014-11-10 : Igor Pavlov : Public domain */
+
+#ifndef __BCJ2_H
+#define __BCJ2_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define BCJ2_NUM_STREAMS 4
+
+enum
+{
+  BCJ2_STREAM_MAIN,
+  BCJ2_STREAM_CALL,
+  BCJ2_STREAM_JUMP,
+  BCJ2_STREAM_RC
+};
+
+enum
+{
+  BCJ2_DEC_STATE_ORIG_0 = BCJ2_NUM_STREAMS,
+  BCJ2_DEC_STATE_ORIG_1,
+  BCJ2_DEC_STATE_ORIG_2,
+  BCJ2_DEC_STATE_ORIG_3,
+  
+  BCJ2_DEC_STATE_ORIG,
+  BCJ2_DEC_STATE_OK
+};
+
+enum
+{
+  BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS,
+  BCJ2_ENC_STATE_OK
+};
+
+
+#define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP)
+
+/*
+CBcj2Dec / CBcj2Enc
+bufs sizes:
+  BUF_SIZE(n) = lims[n] - bufs[n]
+bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be mutliply of 4:
+    (BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0
+    (BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0
+*/
+
+/*
+CBcj2Dec:
+dest is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions:
+  bufs[BCJ2_STREAM_MAIN] >= dest &&
+  bufs[BCJ2_STREAM_MAIN] - dest >= tempReserv +
+        BUF_SIZE(BCJ2_STREAM_CALL) +
+        BUF_SIZE(BCJ2_STREAM_JUMP)
+     tempReserv = 0 : for first call of Bcj2Dec_Decode
+     tempReserv = 4 : for any other calls of Bcj2Dec_Decode
+  overlap with offset = 1 is not allowed
+*/
+
+typedef struct
+{
+  const Byte *bufs[BCJ2_NUM_STREAMS];
+  const Byte *lims[BCJ2_NUM_STREAMS];
+  Byte *dest;
+  const Byte *destLim;
+
+  unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
+
+  UInt32 ip;
+  Byte temp[4];
+  UInt32 range;
+  UInt32 code;
+  UInt16 probs[2 + 256];
+} CBcj2Dec;
+
+void Bcj2Dec_Init(CBcj2Dec *p);
+
+/* Returns: SZ_OK or SZ_ERROR_DATA */
+SRes Bcj2Dec_Decode(CBcj2Dec *p);
+
+#define Bcj2Dec_IsFinished(_p_) ((_p_)->code == 0)
+
+
+
+typedef enum
+{
+  BCJ2_ENC_FINISH_MODE_CONTINUE,
+  BCJ2_ENC_FINISH_MODE_END_BLOCK,
+  BCJ2_ENC_FINISH_MODE_END_STREAM
+} EBcj2Enc_FinishMode;
+
+typedef struct
+{
+  Byte *bufs[BCJ2_NUM_STREAMS];
+  const Byte *lims[BCJ2_NUM_STREAMS];
+  const Byte *src;
+  const Byte *srcLim;
+
+  unsigned state;
+  EBcj2Enc_FinishMode finishMode;
+
+  Byte prevByte;
+
+  Byte cache;
+  UInt32 range;
+  UInt64 low;
+  UInt64 cacheSize;
+
+  UInt32 ip;
+
+  /* 32-bit ralative offset in JUMP/CALL commands is
+       - (mod 4 GB)   in 32-bit mode
+       - signed Int32 in 64-bit mode
+     We use (mod 4 GB) check for fileSize.
+     Use fileSize up to 2 GB, if you want to support 32-bit and 64-bit code conversion. */
+  UInt32 fileIp;
+  UInt32 fileSize;    /* (fileSize <= ((UInt32)1 << 31)), 0 means no_limit */
+  UInt32 relatLimit;  /* (relatLimit <= ((UInt32)1 << 31)), 0 means desable_conversion */
+
+  UInt32 tempTarget;
+  unsigned tempPos;
+  Byte temp[4 * 2];
+
+  unsigned flushPos;
+  
+  UInt16 probs[2 + 256];
+} CBcj2Enc;
+
+void Bcj2Enc_Init(CBcj2Enc *p);
+void Bcj2Enc_Encode(CBcj2Enc *p);
+
+#define Bcj2Enc_Get_InputData_Size(p) ((SizeT)((p)->srcLim - (p)->src) + (p)->tempPos)
+#define Bcj2Enc_IsFinished(p) ((p)->flushPos == 5)
+
+
+#define BCJ2_RELAT_LIMIT_NUM_BITS 26
+#define BCJ2_RELAT_LIMIT ((UInt32)1 << BCJ2_RELAT_LIMIT_NUM_BITS)
+
+/* limit for CBcj2Enc::fileSize variable */
+#define BCJ2_FileSize_MAX ((UInt32)1 << 31)
+
+EXTERN_C_END
+
+#endif
diff --git a/SevenZip/Bra.c b/SevenZip/Bra.c
new file mode 100644
index 0000000..cdb9456
--- /dev/null
+++ b/SevenZip/Bra.c
@@ -0,0 +1,135 @@
+/* Bra.c -- Converters for RISC code
+2010-04-16 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Bra.h"
+
+SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  SizeT i;
+  if (size < 4)
+    return 0;
+  size -= 4;
+  ip += 8;
+  for (i = 0; i <= size; i += 4)
+  {
+    if (data[i + 3] == 0xEB)
+    {
+      UInt32 dest;
+      UInt32 src = ((UInt32)data[i + 2] << 16) | ((UInt32)data[i + 1] << 8) | (data[i + 0]);
+      src <<= 2;
+      if (encoding)
+        dest = ip + (UInt32)i + src;
+      else
+        dest = src - (ip + (UInt32)i);
+      dest >>= 2;
+      data[i + 2] = (Byte)(dest >> 16);
+      data[i + 1] = (Byte)(dest >> 8);
+      data[i + 0] = (Byte)dest;
+    }
+  }
+  return i;
+}
+
+SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  SizeT i;
+  if (size < 4)
+    return 0;
+  size -= 4;
+  ip += 4;
+  for (i = 0; i <= size; i += 2)
+  {
+    if ((data[i + 1] & 0xF8) == 0xF0 &&
+        (data[i + 3] & 0xF8) == 0xF8)
+    {
+      UInt32 dest;
+      UInt32 src =
+        (((UInt32)data[i + 1] & 0x7) << 19) |
+        ((UInt32)data[i + 0] << 11) |
+        (((UInt32)data[i + 3] & 0x7) << 8) |
+        (data[i + 2]);
+      
+      src <<= 1;
+      if (encoding)
+        dest = ip + (UInt32)i + src;
+      else
+        dest = src - (ip + (UInt32)i);
+      dest >>= 1;
+      
+      data[i + 1] = (Byte)(0xF0 | ((dest >> 19) & 0x7));
+      data[i + 0] = (Byte)(dest >> 11);
+      data[i + 3] = (Byte)(0xF8 | ((dest >> 8) & 0x7));
+      data[i + 2] = (Byte)dest;
+      i += 2;
+    }
+  }
+  return i;
+}
+
+SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  SizeT i;
+  if (size < 4)
+    return 0;
+  size -= 4;
+  for (i = 0; i <= size; i += 4)
+  {
+    if ((data[i] >> 2) == 0x12 && (data[i + 3] & 3) == 1)
+    {
+      UInt32 src = ((UInt32)(data[i + 0] & 3) << 24) |
+        ((UInt32)data[i + 1] << 16) |
+        ((UInt32)data[i + 2] << 8) |
+        ((UInt32)data[i + 3] & (~3));
+      
+      UInt32 dest;
+      if (encoding)
+        dest = ip + (UInt32)i + src;
+      else
+        dest = src - (ip + (UInt32)i);
+      data[i + 0] = (Byte)(0x48 | ((dest >> 24) &  0x3));
+      data[i + 1] = (Byte)(dest >> 16);
+      data[i + 2] = (Byte)(dest >> 8);
+      data[i + 3] &= 0x3;
+      data[i + 3] |= dest;
+    }
+  }
+  return i;
+}
+
+SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  UInt32 i;
+  if (size < 4)
+    return 0;
+  size -= 4;
+  for (i = 0; i <= size; i += 4)
+  {
+    if ((data[i] == 0x40 && (data[i + 1] & 0xC0) == 0x00) ||
+        (data[i] == 0x7F && (data[i + 1] & 0xC0) == 0xC0))
+    {
+      UInt32 src =
+        ((UInt32)data[i + 0] << 24) |
+        ((UInt32)data[i + 1] << 16) |
+        ((UInt32)data[i + 2] << 8) |
+        ((UInt32)data[i + 3]);
+      UInt32 dest;
+      
+      src <<= 2;
+      if (encoding)
+        dest = ip + i + src;
+      else
+        dest = src - (ip + i);
+      dest >>= 2;
+      
+      dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF) | (dest & 0x3FFFFF) | 0x40000000;
+
+      data[i + 0] = (Byte)(dest >> 24);
+      data[i + 1] = (Byte)(dest >> 16);
+      data[i + 2] = (Byte)(dest >> 8);
+      data[i + 3] = (Byte)dest;
+    }
+  }
+  return i;
+}
diff --git a/SevenZip/Bra.h b/SevenZip/Bra.h
new file mode 100644
index 0000000..855e37a
--- /dev/null
+++ b/SevenZip/Bra.h
@@ -0,0 +1,64 @@
+/* Bra.h -- Branch converters for executables
+2013-01-18 : Igor Pavlov : Public domain */
+
+#ifndef __BRA_H
+#define __BRA_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/*
+These functions convert relative addresses to absolute addresses
+in CALL instructions to increase the compression ratio.
+  
+  In:
+    data     - data buffer
+    size     - size of data
+    ip       - current virtual Instruction Pinter (IP) value
+    state    - state variable for x86 converter
+    encoding - 0 (for decoding), 1 (for encoding)
+  
+  Out:
+    state    - state variable for x86 converter
+
+  Returns:
+    The number of processed bytes. If you call these functions with multiple calls,
+    you must start next call with first byte after block of processed bytes.
+  
+  Type   Endian  Alignment  LookAhead
+  
+  x86    little      1          4
+  ARMT   little      2          2
+  ARM    little      4          0
+  PPC     big        4          0
+  SPARC   big        4          0
+  IA64   little     16          0
+
+  size must be >= Alignment + LookAhead, if it's not last block.
+  If (size < Alignment + LookAhead), converter returns 0.
+
+  Example:
+
+    UInt32 ip = 0;
+    for ()
+    {
+      ; size must be >= Alignment + LookAhead, if it's not last block
+      SizeT processed = Convert(data, size, ip, 1);
+      data += processed;
+      size -= processed;
+      ip += processed;
+    }
+*/
+
+#define x86_Convert_Init(state) { state = 0; }
+SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);
+SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+
+EXTERN_C_END
+
+#endif
diff --git a/SevenZip/Bra86.c b/SevenZip/Bra86.c
new file mode 100644
index 0000000..6db15e7
--- /dev/null
+++ b/SevenZip/Bra86.c
@@ -0,0 +1,82 @@
+/* Bra86.c -- Converter for x86 code (BCJ)
+2013-11-12 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Bra.h"
+
+#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0)
+
+SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding)
+{
+  SizeT pos = 0;
+  UInt32 mask = *state & 7;
+  if (size < 5)
+    return 0;
+  size -= 4;
+  ip += 5;
+
+  for (;;)
+  {
+    Byte *p = data + pos;
+    const Byte *limit = data + size;
+    for (; p < limit; p++)
+      if ((*p & 0xFE) == 0xE8)
+        break;
+
+    {
+      SizeT d = (SizeT)(p - data - pos);
+      pos = (SizeT)(p - data);
+      if (p >= limit)
+      {
+        *state = (d > 2 ? 0 : mask >> (unsigned)d);
+        return pos;
+      }
+      if (d > 2)
+        mask = 0;
+      else
+      {
+        mask >>= (unsigned)d;
+        if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(mask >> 1) + 1])))
+        {
+          mask = (mask >> 1) | 4;
+          pos++;
+          continue;
+        }
+      }
+    }
+
+    if (Test86MSByte(p[4]))
+    {
+      UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]);
+      UInt32 cur = ip + (UInt32)pos;
+      pos += 5;
+      if (encoding)
+        v += cur;
+      else
+        v -= cur;
+      if (mask != 0)
+      {
+        unsigned sh = (mask & 6) << 2;
+        if (Test86MSByte((Byte)(v >> sh)))
+        {
+          v ^= (((UInt32)0x100 << sh) - 1);
+          if (encoding)
+            v += cur;
+          else
+            v -= cur;
+        }
+        mask = 0;
+      }
+      p[1] = (Byte)v;
+      p[2] = (Byte)(v >> 8);
+      p[3] = (Byte)(v >> 16);
+      p[4] = (Byte)(0 - ((v >> 24) & 1));
+    }
+    else
+    {
+      mask = (mask >> 1) | 4;
+      pos++;
+    }
+  }
+}
diff --git a/SevenZip/BraIA64.c b/SevenZip/BraIA64.c
new file mode 100644
index 0000000..fa60356
--- /dev/null
+++ b/SevenZip/BraIA64.c
@@ -0,0 +1,69 @@
+/* BraIA64.c -- Converter for IA-64 code
+2013-11-12 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Bra.h"
+
+static const Byte kBranchTable[32] =
+{
+  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,
+  4, 4, 6, 6, 0, 0, 7, 7,
+  4, 4, 0, 0, 4, 4, 0, 0
+};
+
+SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  SizeT i;
+  if (size < 16)
+    return 0;
+  size -= 16;
+  for (i = 0; i <= size; i += 16)
+  {
+    UInt32 instrTemplate = data[i] & 0x1F;
+    UInt32 mask = kBranchTable[instrTemplate];
+    UInt32 bitPos = 5;
+    int slot;
+    for (slot = 0; slot < 3; slot++, bitPos += 41)
+    {
+      UInt32 bytePos, bitRes;
+      UInt64 instruction, instNorm;
+      int j;
+      if (((mask >> slot) & 1) == 0)
+        continue;
+      bytePos = (bitPos >> 3);
+      bitRes = bitPos & 0x7;
+      instruction = 0;
+      for (j = 0; j < 6; j++)
+        instruction += (UInt64)data[i + j + bytePos] << (8 * j);
+
+      instNorm = instruction >> bitRes;
+      if (((instNorm >> 37) & 0xF) == 0x5 && ((instNorm >> 9) & 0x7) == 0)
+      {
+        UInt32 src = (UInt32)((instNorm >> 13) & 0xFFFFF);
+        UInt32 dest;
+        src |= ((UInt32)(instNorm >> 36) & 1) << 20;
+        
+        src <<= 4;
+        
+        if (encoding)
+          dest = ip + (UInt32)i + src;
+        else
+          dest = src - (ip + (UInt32)i);
+        
+        dest >>= 4;
+        
+        instNorm &= ~((UInt64)(0x8FFFFF) << 13);
+        instNorm |= ((UInt64)(dest & 0xFFFFF) << 13);
+        instNorm |= ((UInt64)(dest & 0x100000) << (36 - 20));
+        
+        instruction &= (1 << bitRes) - 1;
+        instruction |= (instNorm << bitRes);
+        for (j = 0; j < 6; j++)
+          data[i + j + bytePos] = (Byte)(instruction >> (8 * j));
+      }
+    }
+  }
+  return i;
+}
diff --git a/SevenZip/Compiler.h b/SevenZip/Compiler.h
new file mode 100644
index 0000000..5bba7ee
--- /dev/null
+++ b/SevenZip/Compiler.h
@@ -0,0 +1,32 @@
+/* Compiler.h
+2015-08-02 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_COMPILER_H
+#define __7Z_COMPILER_H
+
+#ifdef _MSC_VER
+
+  #ifdef UNDER_CE
+    #define RPC_NO_WINDOWS_H
+    /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */
+    #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
+    #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
+  #endif
+
+  #if _MSC_VER >= 1300
+    #pragma warning(disable : 4996) // This function or variable may be unsafe
+  #else
+    #pragma warning(disable : 4511) // copy constructor could not be generated
+    #pragma warning(disable : 4512) // assignment operator could not be generated
+    #pragma warning(disable : 4514) // unreferenced inline function has been removed
+    #pragma warning(disable : 4702) // unreachable code
+    #pragma warning(disable : 4710) // not inlined
+    #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
+  #endif
+
+#endif
+
+#define UNUSED_VAR(x) (void)x;
+/* #define UNUSED_VAR(x) x=x; */
+
+#endif
diff --git a/SevenZip/CpuArch.c b/SevenZip/CpuArch.c
new file mode 100644
index 0000000..554ffa4
--- /dev/null
+++ b/SevenZip/CpuArch.c
@@ -0,0 +1,200 @@
+/* CpuArch.c -- CPU specific code
+2016-02-25: Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "CpuArch.h"
+
+#ifdef MY_CPU_X86_OR_AMD64
+
+#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__)
+#define USE_ASM
+#endif
+
+#if !defined(USE_ASM) && _MSC_VER >= 1500
+#include <intrin.h>
+#endif
+
+#if defined(USE_ASM) && !defined(MY_CPU_AMD64)
+static UInt32 CheckFlag(UInt32 flag)
+{
+  #ifdef _MSC_VER
+  __asm pushfd;
+  __asm pop EAX;
+  __asm mov EDX, EAX;
+  __asm xor EAX, flag;
+  __asm push EAX;
+  __asm popfd;
+  __asm pushfd;
+  __asm pop EAX;
+  __asm xor EAX, EDX;
+  __asm push EDX;
+  __asm popfd;
+  __asm and flag, EAX;
+  #else
+  __asm__ __volatile__ (
+    "pushf\n\t"
+    "pop  %%EAX\n\t"
+    "movl %%EAX,%%EDX\n\t"
+    "xorl %0,%%EAX\n\t"
+    "push %%EAX\n\t"
+    "popf\n\t"
+    "pushf\n\t"
+    "pop  %%EAX\n\t"
+    "xorl %%EDX,%%EAX\n\t"
+    "push %%EDX\n\t"
+    "popf\n\t"
+    "andl %%EAX, %0\n\t":
+    "=c" (flag) : "c" (flag) :
+    "%eax", "%edx");
+  #endif
+  return flag;
+}
+#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;
+#else
+#define CHECK_CPUID_IS_SUPPORTED
+#endif
+
+void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
+{
+  #ifdef USE_ASM
+
+  #ifdef _MSC_VER
+
+  UInt32 a2, b2, c2, d2;
+  __asm xor EBX, EBX;
+  __asm xor ECX, ECX;
+  __asm xor EDX, EDX;
+  __asm mov EAX, function;
+  __asm cpuid;
+  __asm mov a2, EAX;
+  __asm mov b2, EBX;
+  __asm mov c2, ECX;
+  __asm mov d2, EDX;
+
+  *a = a2;
+  *b = b2;
+  *c = c2;
+  *d = d2;
+
+  #else
+
+  __asm__ __volatile__ (
+  #if defined(MY_CPU_AMD64) && defined(__PIC__)
+    "mov %%rbx, %%rdi;"
+    "cpuid;"
+    "xchg %%rbx, %%rdi;"
+    : "=a" (*a) ,
+      "=D" (*b) ,
+  #elif defined(MY_CPU_X86) && defined(__PIC__)
+    "mov %%ebx, %%edi;"
+    "cpuid;"
+    "xchgl %%ebx, %%edi;"
+    : "=a" (*a) ,
+      "=D" (*b) ,
+  #else
+    "cpuid"
+    : "=a" (*a) ,
+      "=b" (*b) ,
+  #endif
+      "=c" (*c) ,
+      "=d" (*d)
+    : "0" (function)) ;
+
+  #endif
+  
+  #else
+
+  int CPUInfo[4];
+  __cpuid(CPUInfo, function);
+  *a = CPUInfo[0];
+  *b = CPUInfo[1];
+  *c = CPUInfo[2];
+  *d = CPUInfo[3];
+
+  #endif
+}
+
+Bool x86cpuid_CheckAndRead(Cx86cpuid *p)
+{
+  CHECK_CPUID_IS_SUPPORTED
+  MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]);
+  MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);
+  return True;
+}
+
+static const UInt32 kVendors[][3] =
+{
+  { 0x756E6547, 0x49656E69, 0x6C65746E},
+  { 0x68747541, 0x69746E65, 0x444D4163},
+  { 0x746E6543, 0x48727561, 0x736C7561}
+};
+
+int x86cpuid_GetFirm(const Cx86cpuid *p)
+{
+  unsigned i;
+  for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++)
+  {
+    const UInt32 *v = kVendors[i];
+    if (v[0] == p->vendor[0] &&
+        v[1] == p->vendor[1] &&
+        v[2] == p->vendor[2])
+      return (int)i;
+  }
+  return -1;
+}
+
+Bool CPU_Is_InOrder()
+{
+  Cx86cpuid p;
+  int firm;
+  UInt32 family, model;
+  if (!x86cpuid_CheckAndRead(&p))
+    return True;
+
+  family = x86cpuid_GetFamily(p.ver);
+  model = x86cpuid_GetModel(p.ver);
+  
+  firm = x86cpuid_GetFirm(&p);
+
+  switch (firm)
+  {
+    case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
+        /* In-Order Atom CPU */
+           model == 0x1C  /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */
+        || model == 0x26  /* 45 nm, Z6xx */
+        || model == 0x27  /* 32 nm, Z2460 */
+        || model == 0x35  /* 32 nm, Z2760 */
+        || model == 0x36  /* 32 nm, N2xxx, D2xxx */
+        )));
+    case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
+    case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
+  }
+  return True;
+}
+
+#if !defined(MY_CPU_AMD64) && defined(_WIN32)
+#include <windows.h>
+static Bool CPU_Sys_Is_SSE_Supported()
+{
+  OSVERSIONINFO vi;
+  vi.dwOSVersionInfoSize = sizeof(vi);
+  if (!GetVersionEx(&vi))
+    return False;
+  return (vi.dwMajorVersion >= 5);
+}
+#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
+#else
+#define CHECK_SYS_SSE_SUPPORT
+#endif
+
+Bool CPU_Is_Aes_Supported()
+{
+  Cx86cpuid p;
+  CHECK_SYS_SSE_SUPPORT
+  if (!x86cpuid_CheckAndRead(&p))
+    return False;
+  return (p.c >> 25) & 1;
+}
+
+#endif
diff --git a/SevenZip/CpuArch.h b/SevenZip/CpuArch.h
new file mode 100644
index 0000000..f6a28ba
--- /dev/null
+++ b/SevenZip/CpuArch.h
@@ -0,0 +1,222 @@
+/* CpuArch.h -- CPU specific code
+2015-12-01: Igor Pavlov : Public domain */
+
+#ifndef __CPU_ARCH_H
+#define __CPU_ARCH_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/*
+MY_CPU_LE means that CPU is LITTLE ENDIAN.
+MY_CPU_BE means that CPU is BIG ENDIAN.
+If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.
+
+MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.
+*/
+
+#if defined(_M_X64) \
+   || defined(_M_AMD64) \
+   || defined(__x86_64__) \
+   || defined(__AMD64__) \
+   || defined(__amd64__)
+  #define MY_CPU_AMD64
+#endif
+
+#if defined(MY_CPU_AMD64) \
+    || defined(_M_IA64) \
+    || defined(__AARCH64EL__) \
+    || defined(__AARCH64EB__)
+  #define MY_CPU_64BIT
+#endif
+
+#if defined(_M_IX86) || defined(__i386__)
+#define MY_CPU_X86
+#endif
+
+#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
+#define MY_CPU_X86_OR_AMD64
+#endif
+
+#if defined(MY_CPU_X86) \
+    || defined(_M_ARM) \
+    || defined(__ARMEL__) \
+    || defined(__THUMBEL__) \
+    || defined(__ARMEB__) \
+    || defined(__THUMBEB__)
+  #define MY_CPU_32BIT
+#endif
+
+#if defined(_WIN32) && defined(_M_ARM)
+#define MY_CPU_ARM_LE
+#endif
+
+#if defined(_WIN32) && defined(_M_IA64)
+#define MY_CPU_IA64_LE
+#endif
+
+#if defined(MY_CPU_X86_OR_AMD64) \
+    || defined(MY_CPU_ARM_LE) \
+    || defined(MY_CPU_IA64_LE) \
+    || defined(__LITTLE_ENDIAN__) \
+    || defined(__ARMEL__) \
+    || defined(__THUMBEL__) \
+    || defined(__AARCH64EL__) \
+    || defined(__MIPSEL__) \
+    || defined(__MIPSEL) \
+    || defined(_MIPSEL) \
+    || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+  #define MY_CPU_LE
+#endif
+
+#if defined(__BIG_ENDIAN__) \
+    || defined(__ARMEB__) \
+    || defined(__THUMBEB__) \
+    || defined(__AARCH64EB__) \
+    || defined(__MIPSEB__) \
+    || defined(__MIPSEB) \
+    || defined(_MIPSEB) \
+    || defined(__m68k__) \
+    || defined(__s390__) \
+    || defined(__s390x__) \
+    || defined(__zarch__) \
+    || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
+  #define MY_CPU_BE
+#endif
+
+#if defined(MY_CPU_LE) && defined(MY_CPU_BE)
+Stop_Compiling_Bad_Endian
+#endif
+
+
+#ifdef MY_CPU_LE
+  #if defined(MY_CPU_X86_OR_AMD64) \
+      /* || defined(__AARCH64EL__) */
+    #define MY_CPU_LE_UNALIGN
+  #endif
+#endif
+
+
+#ifdef MY_CPU_LE_UNALIGN
+
+#define GetUi16(p) (*(const UInt16 *)(const void *)(p))
+#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
+#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
+
+#define SetUi16(p, v) { *(UInt16 *)(p) = (v); }
+#define SetUi32(p, v) { *(UInt32 *)(p) = (v); }
+#define SetUi64(p, v) { *(UInt64 *)(p) = (v); }
+
+#else
+
+#define GetUi16(p) ( (UInt16) ( \
+             ((const Byte *)(p))[0] | \
+    ((UInt16)((const Byte *)(p))[1] << 8) ))
+
+#define GetUi32(p) ( \
+             ((const Byte *)(p))[0]        | \
+    ((UInt32)((const Byte *)(p))[1] <<  8) | \
+    ((UInt32)((const Byte *)(p))[2] << 16) | \
+    ((UInt32)((const Byte *)(p))[3] << 24))
+
+#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
+
+#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+    _ppp_[0] = (Byte)_vvv_; \
+    _ppp_[1] = (Byte)(_vvv_ >> 8); }
+
+#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+    _ppp_[0] = (Byte)_vvv_; \
+    _ppp_[1] = (Byte)(_vvv_ >> 8); \
+    _ppp_[2] = (Byte)(_vvv_ >> 16); \
+    _ppp_[3] = (Byte)(_vvv_ >> 24); }
+
+#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
+    SetUi32(_ppp2_    , (UInt32)_vvv2_); \
+    SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }
+
+#endif
+
+
+#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ (_MSC_VER >= 1300)
+
+/* Note: we use bswap instruction, that is unsupported in 386 cpu */
+
+#include <stdlib.h>
+
+#pragma intrinsic(_byteswap_ulong)
+#pragma intrinsic(_byteswap_uint64)
+#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p))
+#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p))
+
+#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
+
+#elif defined(MY_CPU_LE_UNALIGN) && defined (__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+
+#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const Byte *)(p))
+#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const Byte *)(p))
+
+#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
+
+#else
+
+#define GetBe32(p) ( \
+    ((UInt32)((const Byte *)(p))[0] << 24) | \
+    ((UInt32)((const Byte *)(p))[1] << 16) | \
+    ((UInt32)((const Byte *)(p))[2] <<  8) | \
+             ((const Byte *)(p))[3] )
+
+#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
+
+#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+    _ppp_[0] = (Byte)(_vvv_ >> 24); \
+    _ppp_[1] = (Byte)(_vvv_ >> 16); \
+    _ppp_[2] = (Byte)(_vvv_ >> 8); \
+    _ppp_[3] = (Byte)_vvv_; }
+
+#endif
+
+
+#define GetBe16(p) ( (UInt16) ( \
+    ((UInt16)((const Byte *)(p))[0] << 8) | \
+             ((const Byte *)(p))[1] ))
+
+
+
+#ifdef MY_CPU_X86_OR_AMD64
+
+typedef struct
+{
+  UInt32 maxFunc;
+  UInt32 vendor[3];
+  UInt32 ver;
+  UInt32 b;
+  UInt32 c;
+  UInt32 d;
+} Cx86cpuid;
+
+enum
+{
+  CPU_FIRM_INTEL,
+  CPU_FIRM_AMD,
+  CPU_FIRM_VIA
+};
+
+void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d);
+
+Bool x86cpuid_CheckAndRead(Cx86cpuid *p);
+int x86cpuid_GetFirm(const Cx86cpuid *p);
+
+#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF))
+#define x86cpuid_GetModel(ver)  (((ver >> 12) &  0xF0) | ((ver >> 4) & 0xF))
+#define x86cpuid_GetStepping(ver) (ver & 0xF)
+
+Bool CPU_Is_InOrder();
+Bool CPU_Is_Aes_Supported();
+
+#endif
+
+EXTERN_C_END
+
+#endif
diff --git a/SevenZip/Delta.c b/SevenZip/Delta.c
new file mode 100644
index 0000000..e3edd21
--- /dev/null
+++ b/SevenZip/Delta.c
@@ -0,0 +1,64 @@
+/* Delta.c -- Delta converter
+2009-05-26 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Delta.h"
+
+void Delta_Init(Byte *state)
+{
+  unsigned i;
+  for (i = 0; i < DELTA_STATE_SIZE; i++)
+    state[i] = 0;
+}
+
+static void MyMemCpy(Byte *dest, const Byte *src, unsigned size)
+{
+  unsigned i;
+  for (i = 0; i < size; i++)
+    dest[i] = src[i];
+}
+
+void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size)
+{
+  Byte buf[DELTA_STATE_SIZE];
+  unsigned j = 0;
+  MyMemCpy(buf, state, delta);
+  {
+    SizeT i;
+    for (i = 0; i < size;)
+    {
+      for (j = 0; j < delta && i < size; i++, j++)
+      {
+        Byte b = data[i];
+        data[i] = (Byte)(b - buf[j]);
+        buf[j] = b;
+      }
+    }
+  }
+  if (j == delta)
+    j = 0;
+  MyMemCpy(state, buf + j, delta - j);
+  MyMemCpy(state + delta - j, buf, j);
+}
+
+void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size)
+{
+  Byte buf[DELTA_STATE_SIZE];
+  unsigned j = 0;
+  MyMemCpy(buf, state, delta);
+  {
+    SizeT i;
+    for (i = 0; i < size;)
+    {
+      for (j = 0; j < delta && i < size; i++, j++)
+      {
+        buf[j] = data[i] = (Byte)(buf[j] + data[i]);
+      }
+    }
+  }
+  if (j == delta)
+    j = 0;
+  MyMemCpy(state, buf + j, delta - j);
+  MyMemCpy(state + delta - j, buf, j);
+}
diff --git a/SevenZip/Delta.h b/SevenZip/Delta.h
new file mode 100644
index 0000000..2fa54ad
--- /dev/null
+++ b/SevenZip/Delta.h
@@ -0,0 +1,19 @@
+/* Delta.h -- Delta converter
+2013-01-18 : Igor Pavlov : Public domain */
+
+#ifndef __DELTA_H
+#define __DELTA_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define DELTA_STATE_SIZE 256
+
+void Delta_Init(Byte *state);
+void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size);
+void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size);
+
+EXTERN_C_END
+
+#endif
diff --git a/SevenZip/Lzma2Dec.c b/SevenZip/Lzma2Dec.c
new file mode 100644
index 0000000..b84f88a
--- /dev/null
+++ b/SevenZip/Lzma2Dec.c
@@ -0,0 +1,378 @@
+/* Lzma2Dec.c -- LZMA2 Decoder
+2015-11-09 : Igor Pavlov : Public domain */
+
+/* #define SHOW_DEBUG_INFO */
+
+#include "Precomp.h"
+
+#ifdef SHOW_DEBUG_INFO
+#include <stdio.h>
+#endif
+
+#include <string.h>
+
+#include "Lzma2Dec.h"
+
+/*
+00000000  -  EOS
+00000001 U U  -  Uncompressed Reset Dic
+00000010 U U  -  Uncompressed No Reset
+100uuuuu U U P P  -  LZMA no reset
+101uuuuu U U P P  -  LZMA reset state
+110uuuuu U U P P S  -  LZMA reset state + new prop
+111uuuuu U U P P S  -  LZMA reset state + new prop + reset dic
+
+  u, U - Unpack Size
+  P - Pack Size
+  S - Props
+*/
+
+#define LZMA2_CONTROL_LZMA (1 << 7)
+#define LZMA2_CONTROL_COPY_NO_RESET 2
+#define LZMA2_CONTROL_COPY_RESET_DIC 1
+#define LZMA2_CONTROL_EOF 0
+
+#define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & LZMA2_CONTROL_LZMA) == 0)
+
+#define LZMA2_GET_LZMA_MODE(p) (((p)->control >> 5) & 3)
+#define LZMA2_IS_THERE_PROP(mode) ((mode) >= 2)
+
+#define LZMA2_LCLP_MAX 4
+#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11))
+
+#ifdef SHOW_DEBUG_INFO
+#define PRF(x) x
+#else
+#define PRF(x)
+#endif
+
+typedef enum
+{
+  LZMA2_STATE_CONTROL,
+  LZMA2_STATE_UNPACK0,
+  LZMA2_STATE_UNPACK1,
+  LZMA2_STATE_PACK0,
+  LZMA2_STATE_PACK1,
+  LZMA2_STATE_PROP,
+  LZMA2_STATE_DATA,
+  LZMA2_STATE_DATA_CONT,
+  LZMA2_STATE_FINISHED,
+  LZMA2_STATE_ERROR
+} ELzma2State;
+
+static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props)
+{
+  UInt32 dicSize;
+  if (prop > 40)
+    return SZ_ERROR_UNSUPPORTED;
+  dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop);
+  props[0] = (Byte)LZMA2_LCLP_MAX;
+  props[1] = (Byte)(dicSize);
+  props[2] = (Byte)(dicSize >> 8);
+  props[3] = (Byte)(dicSize >> 16);
+  props[4] = (Byte)(dicSize >> 24);
+  return SZ_OK;
+}
+
+SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAlloc *alloc)
+{
+  Byte props[LZMA_PROPS_SIZE];
+  RINOK(Lzma2Dec_GetOldProps(prop, props));
+  return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
+}
+
+SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAlloc *alloc)
+{
+  Byte props[LZMA_PROPS_SIZE];
+  RINOK(Lzma2Dec_GetOldProps(prop, props));
+  return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
+}
+
+void Lzma2Dec_Init(CLzma2Dec *p)
+{
+  p->state = LZMA2_STATE_CONTROL;
+  p->needInitDic = True;
+  p->needInitState = True;
+  p->needInitProp = True;
+  LzmaDec_Init(&p->decoder);
+}
+
+static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
+{
+  switch (p->state)
+  {
+    case LZMA2_STATE_CONTROL:
+      p->control = b;
+      PRF(printf("\n %4X ", (unsigned)p->decoder.dicPos));
+      PRF(printf(" %2X", (unsigned)b));
+      if (p->control == 0)
+        return LZMA2_STATE_FINISHED;
+      if (LZMA2_IS_UNCOMPRESSED_STATE(p))
+      {
+        if ((p->control & 0x7F) > 2)
+          return LZMA2_STATE_ERROR;
+        p->unpackSize = 0;
+      }
+      else
+        p->unpackSize = (UInt32)(p->control & 0x1F) << 16;
+      return LZMA2_STATE_UNPACK0;
+    
+    case LZMA2_STATE_UNPACK0:
+      p->unpackSize |= (UInt32)b << 8;
+      return LZMA2_STATE_UNPACK1;
+    
+    case LZMA2_STATE_UNPACK1:
+      p->unpackSize |= (UInt32)b;
+      p->unpackSize++;
+      PRF(printf(" %8u", (unsigned)p->unpackSize));
+      return (LZMA2_IS_UNCOMPRESSED_STATE(p)) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0;
+    
+    case LZMA2_STATE_PACK0:
+      p->packSize = (UInt32)b << 8;
+      return LZMA2_STATE_PACK1;
+
+    case LZMA2_STATE_PACK1:
+      p->packSize |= (UInt32)b;
+      p->packSize++;
+      PRF(printf(" %8u", (unsigned)p->packSize));
+      return LZMA2_IS_THERE_PROP(LZMA2_GET_LZMA_MODE(p)) ? LZMA2_STATE_PROP:
+        (p->needInitProp ? LZMA2_STATE_ERROR : LZMA2_STATE_DATA);
+
+    case LZMA2_STATE_PROP:
+    {
+      unsigned lc, lp;
+      if (b >= (9 * 5 * 5))
+        return LZMA2_STATE_ERROR;
+      lc = b % 9;
+      b /= 9;
+      p->decoder.prop.pb = b / 5;
+      lp = b % 5;
+      if (lc + lp > LZMA2_LCLP_MAX)
+        return LZMA2_STATE_ERROR;
+      p->decoder.prop.lc = lc;
+      p->decoder.prop.lp = lp;
+      p->needInitProp = False;
+      return LZMA2_STATE_DATA;
+    }
+  }
+  return LZMA2_STATE_ERROR;
+}
+
+static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size)
+{
+  memcpy(p->dic + p->dicPos, src, size);
+  p->dicPos += size;
+  if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size)
+    p->checkDicSize = p->prop.dicSize;
+  p->processedPos += (UInt32)size;
+}
+
+void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState);
+
+SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT inSize = *srcLen;
+  *srcLen = 0;
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+
+  while (p->state != LZMA2_STATE_FINISHED)
+  {
+    SizeT dicPos = p->decoder.dicPos;
+    
+    if (p->state == LZMA2_STATE_ERROR)
+      return SZ_ERROR_DATA;
+    
+    if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY)
+    {
+      *status = LZMA_STATUS_NOT_FINISHED;
+      return SZ_OK;
+    }
+
+    if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
+    {
+      if (*srcLen == inSize)
+      {
+        *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+        return SZ_OK;
+      }
+      (*srcLen)++;
+      p->state = Lzma2Dec_UpdateState(p, *src++);
+
+      if (dicPos == dicLimit && p->state != LZMA2_STATE_FINISHED)
+      {
+        p->state = LZMA2_STATE_ERROR;
+        return SZ_ERROR_DATA;
+      }
+      continue;
+    }
+    
+    {
+      SizeT destSizeCur = dicLimit - dicPos;
+      SizeT srcSizeCur = inSize - *srcLen;
+      ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY;
+      
+      if (p->unpackSize <= destSizeCur)
+      {
+        destSizeCur = (SizeT)p->unpackSize;
+        curFinishMode = LZMA_FINISH_END;
+      }
+
+      if (LZMA2_IS_UNCOMPRESSED_STATE(p))
+      {
+        if (*srcLen == inSize)
+        {
+          *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+          return SZ_OK;
+        }
+
+        if (p->state == LZMA2_STATE_DATA)
+        {
+          Bool initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC);
+          if (initDic)
+            p->needInitProp = p->needInitState = True;
+          else if (p->needInitDic)
+          {
+            p->state = LZMA2_STATE_ERROR;
+            return SZ_ERROR_DATA;
+          }
+          p->needInitDic = False;
+          LzmaDec_InitDicAndState(&p->decoder, initDic, False);
+        }
+
+        if (srcSizeCur > destSizeCur)
+          srcSizeCur = destSizeCur;
+
+        if (srcSizeCur == 0)
+        {
+          p->state = LZMA2_STATE_ERROR;
+          return SZ_ERROR_DATA;
+        }
+
+        LzmaDec_UpdateWithUncompressed(&p->decoder, src, srcSizeCur);
+
+        src += srcSizeCur;
+        *srcLen += srcSizeCur;
+        p->unpackSize -= (UInt32)srcSizeCur;
+        p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT;
+      }
+      else
+      {
+        SizeT outSizeProcessed;
+        SRes res;
+
+        if (p->state == LZMA2_STATE_DATA)
+        {
+          unsigned mode = LZMA2_GET_LZMA_MODE(p);
+          Bool initDic = (mode == 3);
+          Bool initState = (mode != 0);
+          if ((!initDic && p->needInitDic) || (!initState && p->needInitState))
+          {
+            p->state = LZMA2_STATE_ERROR;
+            return SZ_ERROR_DATA;
+          }
+          
+          LzmaDec_InitDicAndState(&p->decoder, initDic, initState);
+          p->needInitDic = False;
+          p->needInitState = False;
+          p->state = LZMA2_STATE_DATA_CONT;
+        }
+  
+        if (srcSizeCur > p->packSize)
+          srcSizeCur = (SizeT)p->packSize;
+          
+        res = LzmaDec_DecodeToDic(&p->decoder, dicPos + destSizeCur, src, &srcSizeCur, curFinishMode, status);
+        
+        src += srcSizeCur;
+        *srcLen += srcSizeCur;
+        p->packSize -= (UInt32)srcSizeCur;
+
+        outSizeProcessed = p->decoder.dicPos - dicPos;
+        p->unpackSize -= (UInt32)outSizeProcessed;
+
+        RINOK(res);
+        if (*status == LZMA_STATUS_NEEDS_MORE_INPUT)
+          return res;
+
+        if (srcSizeCur == 0 && outSizeProcessed == 0)
+        {
+          if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+              || p->unpackSize != 0
+              || p->packSize != 0)
+          {
+            p->state = LZMA2_STATE_ERROR;
+            return SZ_ERROR_DATA;
+          }
+          p->state = LZMA2_STATE_CONTROL;
+        }
+        
+        if (*status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK)
+          *status = LZMA_STATUS_NOT_FINISHED;
+      }
+    }
+  }
+  
+  *status = LZMA_STATUS_FINISHED_WITH_MARK;
+  return SZ_OK;
+}
+
+SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT outSize = *destLen, inSize = *srcLen;
+  *srcLen = *destLen = 0;
+  for (;;)
+  {
+    SizeT srcSizeCur = inSize, outSizeCur, dicPos;
+    ELzmaFinishMode curFinishMode;
+    SRes res;
+    if (p->decoder.dicPos == p->decoder.dicBufSize)
+      p->decoder.dicPos = 0;
+    dicPos = p->decoder.dicPos;
+    if (outSize > p->decoder.dicBufSize - dicPos)
+    {
+      outSizeCur = p->decoder.dicBufSize;
+      curFinishMode = LZMA_FINISH_ANY;
+    }
+    else
+    {
+      outSizeCur = dicPos + outSize;
+      curFinishMode = finishMode;
+    }
+
+    res = Lzma2Dec_DecodeToDic(p, outSizeCur, src, &srcSizeCur, curFinishMode, status);
+    src += srcSizeCur;
+    inSize -= srcSizeCur;
+    *srcLen += srcSizeCur;
+    outSizeCur = p->decoder.dicPos - dicPos;
+    memcpy(dest, p->decoder.dic + dicPos, outSizeCur);
+    dest += outSizeCur;
+    outSize -= outSizeCur;
+    *destLen += outSizeCur;
+    if (res != 0)
+      return res;
+    if (outSizeCur == 0 || outSize == 0)
+      return SZ_OK;
+  }
+}
+
+SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAlloc *alloc)
+{
+  CLzma2Dec p;
+  SRes res;
+  SizeT outSize = *destLen, inSize = *srcLen;
+  *destLen = *srcLen = 0;
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+  Lzma2Dec_Construct(&p);
+  RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc));
+  p.decoder.dic = dest;
+  p.decoder.dicBufSize = outSize;
+  Lzma2Dec_Init(&p);
+  *srcLen = inSize;
+  res = Lzma2Dec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
+  *destLen = p.decoder.dicPos;
+  if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
+    res = SZ_ERROR_INPUT_EOF;
+  Lzma2Dec_FreeProbs(&p, alloc);
+  return res;
+}
diff --git a/SevenZip/Lzma2Dec.h b/SevenZip/Lzma2Dec.h
new file mode 100644
index 0000000..e6a0f6e
--- /dev/null
+++ b/SevenZip/Lzma2Dec.h
@@ -0,0 +1,80 @@
+/* Lzma2Dec.h -- LZMA2 Decoder
+2015-05-13 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA2_DEC_H
+#define __LZMA2_DEC_H
+
+#include "LzmaDec.h"
+
+EXTERN_C_BEGIN
+
+/* ---------- State Interface ---------- */
+
+typedef struct
+{
+  CLzmaDec decoder;
+  UInt32 packSize;
+  UInt32 unpackSize;
+  unsigned state;
+  Byte control;
+  Bool needInitDic;
+  Bool needInitState;
+  Bool needInitProp;
+} CLzma2Dec;
+
+#define Lzma2Dec_Construct(p) LzmaDec_Construct(&(p)->decoder)
+#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc);
+#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc);
+
+SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAlloc *alloc);
+SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAlloc *alloc);
+void Lzma2Dec_Init(CLzma2Dec *p);
+
+
+/*
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen or dicLimit).
+  LZMA_FINISH_ANY - use smallest number of input bytes
+  LZMA_FINISH_END - read EndOfStream marker after decoding
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_NEEDS_MORE_INPUT
+  SZ_ERROR_DATA - Data error
+*/
+
+SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- One Call Interface ---------- */
+
+/*
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - use smallest number of input bytes
+  LZMA_FINISH_END - read EndOfStream marker after decoding
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+  SZ_ERROR_DATA - Data error
+  SZ_ERROR_MEM  - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+  SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+*/
+
+SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAlloc *alloc);
+
+EXTERN_C_END
+
+#endif
diff --git a/SevenZip/LzmaDec.c b/SevenZip/LzmaDec.c
new file mode 100644
index 0000000..12dce11
--- /dev/null
+++ b/SevenZip/LzmaDec.c
@@ -0,0 +1,1100 @@
+/* LzmaDec.c -- LZMA Decoder
+2016-05-16 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "LzmaDec.h"
+
+#include <string.h>
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+
+#define RC_INIT_SIZE 5
+
+#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
+#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
+#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
+  { UPDATE_0(p); i = (i + i); A0; } else \
+  { UPDATE_1(p); i = (i + i) + 1; A1; }
+#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;)
+
+#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); }
+#define TREE_DECODE(probs, limit, i) \
+  { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
+
+/* #define _LZMA_SIZE_OPT */
+
+#ifdef _LZMA_SIZE_OPT
+#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
+#else
+#define TREE_6_DECODE(probs, i) \
+  { i = 1; \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  i -= 0x40; }
+#endif
+
+#define NORMAL_LITER_DEC GET_BIT(prob + symbol, symbol)
+#define MATCHED_LITER_DEC \
+  matchByte <<= 1; \
+  bit = (matchByte & offs); \
+  probLit = prob + offs + bit + symbol; \
+  GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit)
+
+#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
+#define UPDATE_0_CHECK range = bound;
+#define UPDATE_1_CHECK range -= bound; code -= bound;
+#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
+  { UPDATE_0_CHECK; i = (i + i); A0; } else \
+  { UPDATE_1_CHECK; i = (i + i) + 1; A1; }
+#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
+#define TREE_DECODE_CHECK(probs, limit, i) \
+  { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
+
+
+#define kNumPosBitsMax 4
+#define kNumPosStatesMax (1 << kNumPosBitsMax)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumMidBits 3
+#define kLenNumMidSymbols (1 << kLenNumMidBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define LenChoice 0
+#define LenChoice2 (LenChoice + 1)
+#define LenLow (LenChoice2 + 1)
+#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
+#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
+#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
+
+
+#define kNumStates 12
+#define kNumLitStates 7
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumPosSlotBits 6
+#define kNumLenToPosStates 4
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+
+#define kMatchMinLen 2
+#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols)
+
+#define IsMatch 0
+#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
+#define IsRepG0 (IsRep + kNumStates)
+#define IsRepG1 (IsRepG0 + kNumStates)
+#define IsRepG2 (IsRepG1 + kNumStates)
+#define IsRep0Long (IsRepG2 + kNumStates)
+#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
+#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
+#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
+#define LenCoder (Align + kAlignTableSize)
+#define RepLenCoder (LenCoder + kNumLenProbs)
+#define Literal (RepLenCoder + kNumLenProbs)
+
+#define LZMA_BASE_SIZE 1846
+#define LZMA_LIT_SIZE 0x300
+
+#if Literal != LZMA_BASE_SIZE
+StopCompilingDueBUG
+#endif
+
+#define LzmaProps_GetNumProbs(p) (Literal + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
+
+#define LZMA_DIC_MIN (1 << 12)
+
+/* First LZMA-symbol is always decoded.
+And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization
+Out:
+  Result:
+    SZ_OK - OK
+    SZ_ERROR_DATA - Error
+  p->remainLen:
+    < kMatchSpecLenStart : normal remain
+    = kMatchSpecLenStart : finished
+    = kMatchSpecLenStart + 1 : Flush marker (unused now)
+    = kMatchSpecLenStart + 2 : State Init Marker (unused now)
+*/
+
+static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+  CLzmaProb *probs = p->probs;
+
+  unsigned state = p->state;
+  UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];
+  unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
+  unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1;
+  unsigned lc = p->prop.lc;
+
+  Byte *dic = p->dic;
+  SizeT dicBufSize = p->dicBufSize;
+  SizeT dicPos = p->dicPos;
+  
+  UInt32 processedPos = p->processedPos;
+  UInt32 checkDicSize = p->checkDicSize;
+  unsigned len = 0;
+
+  const Byte *buf = p->buf;
+  UInt32 range = p->range;
+  UInt32 code = p->code;
+
+  do
+  {
+    CLzmaProb *prob;
+    UInt32 bound;
+    unsigned ttt;
+    unsigned posState = processedPos & pbMask;
+
+    prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
+    IF_BIT_0(prob)
+    {
+      unsigned symbol;
+      UPDATE_0(prob);
+      prob = probs + Literal;
+      if (processedPos != 0 || checkDicSize != 0)
+        prob += ((UInt32)LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) +
+            (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc))));
+      processedPos++;
+
+      if (state < kNumLitStates)
+      {
+        state -= (state < 4) ? state : 3;
+        symbol = 1;
+        #ifdef _LZMA_SIZE_OPT
+        do { NORMAL_LITER_DEC } while (symbol < 0x100);
+        #else
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        #endif
+      }
+      else
+      {
+        unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+        unsigned offs = 0x100;
+        state -= (state < 10) ? 3 : 6;
+        symbol = 1;
+        #ifdef _LZMA_SIZE_OPT
+        do
+        {
+          unsigned bit;
+          CLzmaProb *probLit;
+          MATCHED_LITER_DEC
+        }
+        while (symbol < 0x100);
+        #else
+        {
+          unsigned bit;
+          CLzmaProb *probLit;
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+        }
+        #endif
+      }
+
+      dic[dicPos++] = (Byte)symbol;
+      continue;
+    }
+    
+    {
+      UPDATE_1(prob);
+      prob = probs + IsRep + state;
+      IF_BIT_0(prob)
+      {
+        UPDATE_0(prob);
+        state += kNumStates;
+        prob = probs + LenCoder;
+      }
+      else
+      {
+        UPDATE_1(prob);
+        if (checkDicSize == 0 && processedPos == 0)
+          return SZ_ERROR_DATA;
+        prob = probs + IsRepG0 + state;
+        IF_BIT_0(prob)
+        {
+          UPDATE_0(prob);
+          prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState;
+          IF_BIT_0(prob)
+          {
+            UPDATE_0(prob);
+            dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+            dicPos++;
+            processedPos++;
+            state = state < kNumLitStates ? 9 : 11;
+            continue;
+          }
+          UPDATE_1(prob);
+        }
+        else
+        {
+          UInt32 distance;
+          UPDATE_1(prob);
+          prob = probs + IsRepG1 + state;
+          IF_BIT_0(prob)
+          {
+            UPDATE_0(prob);
+            distance = rep1;
+          }
+          else
+          {
+            UPDATE_1(prob);
+            prob = probs + IsRepG2 + state;
+            IF_BIT_0(prob)
+            {
+              UPDATE_0(prob);
+              distance = rep2;
+            }
+            else
+            {
+              UPDATE_1(prob);
+              distance = rep3;
+              rep3 = rep2;
+            }
+            rep2 = rep1;
+          }
+          rep1 = rep0;
+          rep0 = distance;
+        }
+        state = state < kNumLitStates ? 8 : 11;
+        prob = probs + RepLenCoder;
+      }
+      
+      #ifdef _LZMA_SIZE_OPT
+      {
+        unsigned lim, offset;
+        CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0(probLen)
+        {
+          UPDATE_0(probLen);
+          probLen = prob + LenLow + (posState << kLenNumLowBits);
+          offset = 0;
+          lim = (1 << kLenNumLowBits);
+        }
+        else
+        {
+          UPDATE_1(probLen);
+          probLen = prob + LenChoice2;
+          IF_BIT_0(probLen)
+          {
+            UPDATE_0(probLen);
+            probLen = prob + LenMid + (posState << kLenNumMidBits);
+            offset = kLenNumLowSymbols;
+            lim = (1 << kLenNumMidBits);
+          }
+          else
+          {
+            UPDATE_1(probLen);
+            probLen = prob + LenHigh;
+            offset = kLenNumLowSymbols + kLenNumMidSymbols;
+            lim = (1 << kLenNumHighBits);
+          }
+        }
+        TREE_DECODE(probLen, lim, len);
+        len += offset;
+      }
+      #else
+      {
+        CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0(probLen)
+        {
+          UPDATE_0(probLen);
+          probLen = prob + LenLow + (posState << kLenNumLowBits);
+          len = 1;
+          TREE_GET_BIT(probLen, len);
+          TREE_GET_BIT(probLen, len);
+          TREE_GET_BIT(probLen, len);
+          len -= 8;
+        }
+        else
+        {
+          UPDATE_1(probLen);
+          probLen = prob + LenChoice2;
+          IF_BIT_0(probLen)
+          {
+            UPDATE_0(probLen);
+            probLen = prob + LenMid + (posState << kLenNumMidBits);
+            len = 1;
+            TREE_GET_BIT(probLen, len);
+            TREE_GET_BIT(probLen, len);
+            TREE_GET_BIT(probLen, len);
+          }
+          else
+          {
+            UPDATE_1(probLen);
+            probLen = prob + LenHigh;
+            TREE_DECODE(probLen, (1 << kLenNumHighBits), len);
+            len += kLenNumLowSymbols + kLenNumMidSymbols;
+          }
+        }
+      }
+      #endif
+
+      if (state >= kNumStates)
+      {
+        UInt32 distance;
+        prob = probs + PosSlot +
+            ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
+        TREE_6_DECODE(prob, distance);
+        if (distance >= kStartPosModelIndex)
+        {
+          unsigned posSlot = (unsigned)distance;
+          unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
+          distance = (2 | (distance & 1));
+          if (posSlot < kEndPosModelIndex)
+          {
+            distance <<= numDirectBits;
+            prob = probs + SpecPos + distance - posSlot - 1;
+            {
+              UInt32 mask = 1;
+              unsigned i = 1;
+              do
+              {
+                GET_BIT2(prob + i, i, ; , distance |= mask);
+                mask <<= 1;
+              }
+              while (--numDirectBits != 0);
+            }
+          }
+          else
+          {
+            numDirectBits -= kNumAlignBits;
+            do
+            {
+              NORMALIZE
+              range >>= 1;
+              
+              {
+                UInt32 t;
+                code -= range;
+                t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */
+                distance = (distance << 1) + (t + 1);
+                code += range & t;
+              }
+              /*
+              distance <<= 1;
+              if (code >= range)
+              {
+                code -= range;
+                distance |= 1;
+              }
+              */
+            }
+            while (--numDirectBits != 0);
+            prob = probs + Align;
+            distance <<= kNumAlignBits;
+            {
+              unsigned i = 1;
+              GET_BIT2(prob + i, i, ; , distance |= 1);
+              GET_BIT2(prob + i, i, ; , distance |= 2);
+              GET_BIT2(prob + i, i, ; , distance |= 4);
+              GET_BIT2(prob + i, i, ; , distance |= 8);
+            }
+            if (distance == (UInt32)0xFFFFFFFF)
+            {
+              len += kMatchSpecLenStart;
+              state -= kNumStates;
+              break;
+            }
+          }
+        }
+        
+        rep3 = rep2;
+        rep2 = rep1;
+        rep1 = rep0;
+        rep0 = distance + 1;
+        if (checkDicSize == 0)
+        {
+          if (distance >= processedPos)
+          {
+            p->dicPos = dicPos;
+            return SZ_ERROR_DATA;
+          }
+        }
+        else if (distance >= checkDicSize)
+        {
+          p->dicPos = dicPos;
+          return SZ_ERROR_DATA;
+        }
+        state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
+      }
+
+      len += kMatchMinLen;
+
+      {
+        SizeT rem;
+        unsigned curLen;
+        SizeT pos;
+        
+        if ((rem = limit - dicPos) == 0)
+        {
+          p->dicPos = dicPos;
+          return SZ_ERROR_DATA;
+        }
+        
+        curLen = ((rem < len) ? (unsigned)rem : len);
+        pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
+
+        processedPos += curLen;
+
+        len -= curLen;
+        if (curLen <= dicBufSize - pos)
+        {
+          Byte *dest = dic + dicPos;
+          ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
+          const Byte *lim = dest + curLen;
+          dicPos += curLen;
+          do
+            *(dest) = (Byte)*(dest + src);
+          while (++dest != lim);
+        }
+        else
+        {
+          do
+          {
+            dic[dicPos++] = dic[pos];
+            if (++pos == dicBufSize)
+              pos = 0;
+          }
+          while (--curLen != 0);
+        }
+      }
+    }
+  }
+  while (dicPos < limit && buf < bufLimit);
+
+  NORMALIZE;
+  
+  p->buf = buf;
+  p->range = range;
+  p->code = code;
+  p->remainLen = len;
+  p->dicPos = dicPos;
+  p->processedPos = processedPos;
+  p->reps[0] = rep0;
+  p->reps[1] = rep1;
+  p->reps[2] = rep2;
+  p->reps[3] = rep3;
+  p->state = state;
+
+  return SZ_OK;
+}
+
+static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
+{
+  if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
+  {
+    Byte *dic = p->dic;
+    SizeT dicPos = p->dicPos;
+    SizeT dicBufSize = p->dicBufSize;
+    unsigned len = p->remainLen;
+    SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
+    SizeT rem = limit - dicPos;
+    if (rem < len)
+      len = (unsigned)(rem);
+
+    if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
+      p->checkDicSize = p->prop.dicSize;
+
+    p->processedPos += len;
+    p->remainLen -= len;
+    while (len != 0)
+    {
+      len--;
+      dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+      dicPos++;
+    }
+    p->dicPos = dicPos;
+  }
+}
+
+static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+  do
+  {
+    SizeT limit2 = limit;
+    if (p->checkDicSize == 0)
+    {
+      UInt32 rem = p->prop.dicSize - p->processedPos;
+      if (limit - p->dicPos > rem)
+        limit2 = p->dicPos + rem;
+    }
+    
+    RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit));
+    
+    if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
+      p->checkDicSize = p->prop.dicSize;
+    
+    LzmaDec_WriteRem(p, limit);
+  }
+  while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
+
+  if (p->remainLen > kMatchSpecLenStart)
+    p->remainLen = kMatchSpecLenStart;
+
+  return 0;
+}
+
+typedef enum
+{
+  DUMMY_ERROR, /* unexpected end of input stream */
+  DUMMY_LIT,
+  DUMMY_MATCH,
+  DUMMY_REP
+} ELzmaDummy;
+
+static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
+{
+  UInt32 range = p->range;
+  UInt32 code = p->code;
+  const Byte *bufLimit = buf + inSize;
+  const CLzmaProb *probs = p->probs;
+  unsigned state = p->state;
+  ELzmaDummy res;
+
+  {
+    const CLzmaProb *prob;
+    UInt32 bound;
+    unsigned ttt;
+    unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1);
+
+    prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
+    IF_BIT_0_CHECK(prob)
+    {
+      UPDATE_0_CHECK
+
+      /* if (bufLimit - buf >= 7) return DUMMY_LIT; */
+
+      prob = probs + Literal;
+      if (p->checkDicSize != 0 || p->processedPos != 0)
+        prob += ((UInt32)LZMA_LIT_SIZE *
+            ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
+            (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
+
+      if (state < kNumLitStates)
+      {
+        unsigned symbol = 1;
+        do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
+      }
+      else
+      {
+        unsigned matchByte = p->dic[p->dicPos - p->reps[0] +
+            (p->dicPos < p->reps[0] ? p->dicBufSize : 0)];
+        unsigned offs = 0x100;
+        unsigned symbol = 1;
+        do
+        {
+          unsigned bit;
+          const CLzmaProb *probLit;
+          matchByte <<= 1;
+          bit = (matchByte & offs);
+          probLit = prob + offs + bit + symbol;
+          GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit)
+        }
+        while (symbol < 0x100);
+      }
+      res = DUMMY_LIT;
+    }
+    else
+    {
+      unsigned len;
+      UPDATE_1_CHECK;
+
+      prob = probs + IsRep + state;
+      IF_BIT_0_CHECK(prob)
+      {
+        UPDATE_0_CHECK;
+        state = 0;
+        prob = probs + LenCoder;
+        res = DUMMY_MATCH;
+      }
+      else
+      {
+        UPDATE_1_CHECK;
+        res = DUMMY_REP;
+        prob = probs + IsRepG0 + state;
+        IF_BIT_0_CHECK(prob)
+        {
+          UPDATE_0_CHECK;
+          prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState;
+          IF_BIT_0_CHECK(prob)
+          {
+            UPDATE_0_CHECK;
+            NORMALIZE_CHECK;
+            return DUMMY_REP;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+          }
+        }
+        else
+        {
+          UPDATE_1_CHECK;
+          prob = probs + IsRepG1 + state;
+          IF_BIT_0_CHECK(prob)
+          {
+            UPDATE_0_CHECK;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+            prob = probs + IsRepG2 + state;
+            IF_BIT_0_CHECK(prob)
+            {
+              UPDATE_0_CHECK;
+            }
+            else
+            {
+              UPDATE_1_CHECK;
+            }
+          }
+        }
+        state = kNumStates;
+        prob = probs + RepLenCoder;
+      }
+      {
+        unsigned limit, offset;
+        const CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0_CHECK(probLen)
+        {
+          UPDATE_0_CHECK;
+          probLen = prob + LenLow + (posState << kLenNumLowBits);
+          offset = 0;
+          limit = 1 << kLenNumLowBits;
+        }
+        else
+        {
+          UPDATE_1_CHECK;
+          probLen = prob + LenChoice2;
+          IF_BIT_0_CHECK(probLen)
+          {
+            UPDATE_0_CHECK;
+            probLen = prob + LenMid + (posState << kLenNumMidBits);
+            offset = kLenNumLowSymbols;
+            limit = 1 << kLenNumMidBits;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+            probLen = prob + LenHigh;
+            offset = kLenNumLowSymbols + kLenNumMidSymbols;
+            limit = 1 << kLenNumHighBits;
+          }
+        }
+        TREE_DECODE_CHECK(probLen, limit, len);
+        len += offset;
+      }
+
+      if (state < 4)
+      {
+        unsigned posSlot;
+        prob = probs + PosSlot +
+            ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) <<
+            kNumPosSlotBits);
+        TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
+        if (posSlot >= kStartPosModelIndex)
+        {
+          unsigned numDirectBits = ((posSlot >> 1) - 1);
+
+          /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
+
+          if (posSlot < kEndPosModelIndex)
+          {
+            prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1;
+          }
+          else
+          {
+            numDirectBits -= kNumAlignBits;
+            do
+            {
+              NORMALIZE_CHECK
+              range >>= 1;
+              code -= range & (((code - range) >> 31) - 1);
+              /* if (code >= range) code -= range; */
+            }
+            while (--numDirectBits != 0);
+            prob = probs + Align;
+            numDirectBits = kNumAlignBits;
+          }
+          {
+            unsigned i = 1;
+            do
+            {
+              GET_BIT_CHECK(prob + i, i);
+            }
+            while (--numDirectBits != 0);
+          }
+        }
+      }
+    }
+  }
+  NORMALIZE_CHECK;
+  return res;
+}
+
+
+void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState)
+{
+  p->needFlush = 1;
+  p->remainLen = 0;
+  p->tempBufSize = 0;
+
+  if (initDic)
+  {
+    p->processedPos = 0;
+    p->checkDicSize = 0;
+    p->needInitState = 1;
+  }
+  if (initState)
+    p->needInitState = 1;
+}
+
+void LzmaDec_Init(CLzmaDec *p)
+{
+  p->dicPos = 0;
+  LzmaDec_InitDicAndState(p, True, True);
+}
+
+static void LzmaDec_InitStateReal(CLzmaDec *p)
+{
+  SizeT numProbs = LzmaProps_GetNumProbs(&p->prop);
+  SizeT i;
+  CLzmaProb *probs = p->probs;
+  for (i = 0; i < numProbs; i++)
+    probs[i] = kBitModelTotal >> 1;
+  p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
+  p->state = 0;
+  p->needInitState = 0;
+}
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
+    ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT inSize = *srcLen;
+  (*srcLen) = 0;
+  LzmaDec_WriteRem(p, dicLimit);
+  
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+
+  while (p->remainLen != kMatchSpecLenStart)
+  {
+      int checkEndMarkNow;
+
+      if (p->needFlush)
+      {
+        for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
+          p->tempBuf[p->tempBufSize++] = *src++;
+        if (p->tempBufSize < RC_INIT_SIZE)
+        {
+          *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+          return SZ_OK;
+        }
+        if (p->tempBuf[0] != 0)
+          return SZ_ERROR_DATA;
+        p->code =
+              ((UInt32)p->tempBuf[1] << 24)
+            | ((UInt32)p->tempBuf[2] << 16)
+            | ((UInt32)p->tempBuf[3] << 8)
+            | ((UInt32)p->tempBuf[4]);
+        p->range = 0xFFFFFFFF;
+        p->needFlush = 0;
+        p->tempBufSize = 0;
+      }
+
+      checkEndMarkNow = 0;
+      if (p->dicPos >= dicLimit)
+      {
+        if (p->remainLen == 0 && p->code == 0)
+        {
+          *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK;
+          return SZ_OK;
+        }
+        if (finishMode == LZMA_FINISH_ANY)
+        {
+          *status = LZMA_STATUS_NOT_FINISHED;
+          return SZ_OK;
+        }
+        if (p->remainLen != 0)
+        {
+          *status = LZMA_STATUS_NOT_FINISHED;
+          return SZ_ERROR_DATA;
+        }
+        checkEndMarkNow = 1;
+      }
+
+      if (p->needInitState)
+        LzmaDec_InitStateReal(p);
+  
+      if (p->tempBufSize == 0)
+      {
+        SizeT processed;
+        const Byte *bufLimit;
+        if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+        {
+          int dummyRes = LzmaDec_TryDummy(p, src, inSize);
+          if (dummyRes == DUMMY_ERROR)
+          {
+            memcpy(p->tempBuf, src, inSize);
+            p->tempBufSize = (unsigned)inSize;
+            (*srcLen) += inSize;
+            *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+            return SZ_OK;
+          }
+          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+          {
+            *status = LZMA_STATUS_NOT_FINISHED;
+            return SZ_ERROR_DATA;
+          }
+          bufLimit = src;
+        }
+        else
+          bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
+        p->buf = src;
+        if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
+          return SZ_ERROR_DATA;
+        processed = (SizeT)(p->buf - src);
+        (*srcLen) += processed;
+        src += processed;
+        inSize -= processed;
+      }
+      else
+      {
+        unsigned rem = p->tempBufSize, lookAhead = 0;
+        while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
+          p->tempBuf[rem++] = src[lookAhead++];
+        p->tempBufSize = rem;
+        if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+        {
+          int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem);
+          if (dummyRes == DUMMY_ERROR)
+          {
+            (*srcLen) += lookAhead;
+            *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+            return SZ_OK;
+          }
+          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+          {
+            *status = LZMA_STATUS_NOT_FINISHED;
+            return SZ_ERROR_DATA;
+          }
+        }
+        p->buf = p->tempBuf;
+        if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
+          return SZ_ERROR_DATA;
+        
+        {
+          unsigned kkk = (unsigned)(p->buf - p->tempBuf);
+          if (rem < kkk)
+            return SZ_ERROR_FAIL; /* some internal error */
+          rem -= kkk;
+          if (lookAhead < rem)
+            return SZ_ERROR_FAIL; /* some internal error */
+          lookAhead -= rem;
+        }
+        (*srcLen) += lookAhead;
+        src += lookAhead;
+        inSize -= lookAhead;
+        p->tempBufSize = 0;
+      }
+  }
+  if (p->code == 0)
+    *status = LZMA_STATUS_FINISHED_WITH_MARK;
+  return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA;
+}
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT outSize = *destLen;
+  SizeT inSize = *srcLen;
+  *srcLen = *destLen = 0;
+  for (;;)
+  {
+    SizeT inSizeCur = inSize, outSizeCur, dicPos;
+    ELzmaFinishMode curFinishMode;
+    SRes res;
+    if (p->dicPos == p->dicBufSize)
+      p->dicPos = 0;
+    dicPos = p->dicPos;
+    if (outSize > p->dicBufSize - dicPos)
+    {
+      outSizeCur = p->dicBufSize;
+      curFinishMode = LZMA_FINISH_ANY;
+    }
+    else
+    {
+      outSizeCur = dicPos + outSize;
+      curFinishMode = finishMode;
+    }
+
+    res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status);
+    src += inSizeCur;
+    inSize -= inSizeCur;
+    *srcLen += inSizeCur;
+    outSizeCur = p->dicPos - dicPos;
+    memcpy(dest, p->dic + dicPos, outSizeCur);
+    dest += outSizeCur;
+    outSize -= outSizeCur;
+    *destLen += outSizeCur;
+    if (res != 0)
+      return res;
+    if (outSizeCur == 0 || outSize == 0)
+      return SZ_OK;
+  }
+}
+
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc)
+{
+  alloc->Free(alloc, p->probs);
+  p->probs = NULL;
+}
+
+static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc)
+{
+  alloc->Free(alloc, p->dic);
+  p->dic = NULL;
+}
+
+void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc)
+{
+  LzmaDec_FreeProbs(p, alloc);
+  LzmaDec_FreeDict(p, alloc);
+}
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
+{
+  UInt32 dicSize;
+  Byte d;
+  
+  if (size < LZMA_PROPS_SIZE)
+    return SZ_ERROR_UNSUPPORTED;
+  else
+    dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24);
+ 
+  if (dicSize < LZMA_DIC_MIN)
+    dicSize = LZMA_DIC_MIN;
+  p->dicSize = dicSize;
+
+  d = data[0];
+  if (d >= (9 * 5 * 5))
+    return SZ_ERROR_UNSUPPORTED;
+
+  p->lc = d % 9;
+  d /= 9;
+  p->pb = d / 5;
+  p->lp = d % 5;
+
+  return SZ_OK;
+}
+
+static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc)
+{
+  UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
+  if (!p->probs || numProbs != p->numProbs)
+  {
+    LzmaDec_FreeProbs(p, alloc);
+    p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb));
+    p->numProbs = numProbs;
+    if (!p->probs)
+      return SZ_ERROR_MEM;
+  }
+  return SZ_OK;
+}
+
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc)
+{
+  CLzmaProps propNew;
+  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+  p->prop = propNew;
+  return SZ_OK;
+}
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc)
+{
+  CLzmaProps propNew;
+  SizeT dicBufSize;
+  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+
+  {
+    UInt32 dictSize = propNew.dicSize;
+    SizeT mask = ((UInt32)1 << 12) - 1;
+         if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
+    else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;;
+    dicBufSize = ((SizeT)dictSize + mask) & ~mask;
+    if (dicBufSize < dictSize)
+      dicBufSize = dictSize;
+  }
+
+  if (!p->dic || dicBufSize != p->dicBufSize)
+  {
+    LzmaDec_FreeDict(p, alloc);
+    p->dic = (Byte *)alloc->Alloc(alloc, dicBufSize);
+    if (!p->dic)
+    {
+      LzmaDec_FreeProbs(p, alloc);
+      return SZ_ERROR_MEM;
+    }
+  }
+  p->dicBufSize = dicBufSize;
+  p->prop = propNew;
+  return SZ_OK;
+}
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+    ELzmaStatus *status, ISzAlloc *alloc)
+{
+  CLzmaDec p;
+  SRes res;
+  SizeT outSize = *destLen, inSize = *srcLen;
+  *destLen = *srcLen = 0;
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+  if (inSize < RC_INIT_SIZE)
+    return SZ_ERROR_INPUT_EOF;
+  LzmaDec_Construct(&p);
+  RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc));
+  p.dic = dest;
+  p.dicBufSize = outSize;
+  LzmaDec_Init(&p);
+  *srcLen = inSize;
+  res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
+  *destLen = p.dicPos;
+  if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
+    res = SZ_ERROR_INPUT_EOF;
+  LzmaDec_FreeProbs(&p, alloc);
+  return res;
+}
diff --git a/SevenZip/LzmaDec.h b/SevenZip/LzmaDec.h
new file mode 100644
index 0000000..cc44dae
--- /dev/null
+++ b/SevenZip/LzmaDec.h
@@ -0,0 +1,227 @@
+/* LzmaDec.h -- LZMA Decoder
+2013-01-18 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA_DEC_H
+#define __LZMA_DEC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/* #define _LZMA_PROB32 */
+/* _LZMA_PROB32 can increase the speed on some CPUs,
+   but memory usage for CLzmaDec::probs will be doubled in that case */
+
+#ifdef _LZMA_PROB32
+#define CLzmaProb UInt32
+#else
+#define CLzmaProb UInt16
+#endif
+
+
+/* ---------- LZMA Properties ---------- */
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct _CLzmaProps
+{
+  unsigned lc, lp, pb;
+  UInt32 dicSize;
+} CLzmaProps;
+
+/* LzmaProps_Decode - decodes properties
+Returns:
+  SZ_OK
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
+
+
+/* ---------- LZMA Decoder state ---------- */
+
+/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
+   Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
+
+#define LZMA_REQUIRED_INPUT_MAX 20
+
+typedef struct
+{
+  CLzmaProps prop;
+  CLzmaProb *probs;
+  Byte *dic;
+  const Byte *buf;
+  UInt32 range, code;
+  SizeT dicPos;
+  SizeT dicBufSize;
+  UInt32 processedPos;
+  UInt32 checkDicSize;
+  unsigned state;
+  UInt32 reps[4];
+  unsigned remainLen;
+  int needFlush;
+  int needInitState;
+  UInt32 numProbs;
+  unsigned tempBufSize;
+  Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
+} CLzmaDec;
+
+#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; }
+
+void LzmaDec_Init(CLzmaDec *p);
+
+/* There are two types of LZMA streams:
+     0) Stream with end mark. That end mark adds about 6 bytes to compressed size.
+     1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */
+
+typedef enum
+{
+  LZMA_FINISH_ANY,   /* finish at any point */
+  LZMA_FINISH_END    /* block must be finished at the end */
+} ELzmaFinishMode;
+
+/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
+
+   You must use LZMA_FINISH_END, when you know that current output buffer
+   covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
+
+   If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
+   and output value of destLen will be less than output buffer size limit.
+   You can check status result also.
+
+   You can use multiple checks to test data integrity after full decompression:
+     1) Check Result and "status" variable.
+     2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
+     3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
+        You must use correct finish mode in that case. */
+
+typedef enum
+{
+  LZMA_STATUS_NOT_SPECIFIED,               /* use main error code instead */
+  LZMA_STATUS_FINISHED_WITH_MARK,          /* stream was finished with end mark. */
+  LZMA_STATUS_NOT_FINISHED,                /* stream was not finished */
+  LZMA_STATUS_NEEDS_MORE_INPUT,            /* you must provide more input bytes */
+  LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK  /* there is probability that stream was finished without end mark */
+} ELzmaStatus;
+
+/* ELzmaStatus is used only as output value for function call */
+
+
+/* ---------- Interfaces ---------- */
+
+/* There are 3 levels of interfaces:
+     1) Dictionary Interface
+     2) Buffer Interface
+     3) One Call Interface
+   You can select any of these interfaces, but don't mix functions from different
+   groups for same object. */
+
+
+/* There are two variants to allocate state for Dictionary Interface:
+     1) LzmaDec_Allocate / LzmaDec_Free
+     2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
+   You can use variant 2, if you set dictionary buffer manually.
+   For Buffer Interface you must always use variant 1.
+
+LzmaDec_Allocate* can return:
+  SZ_OK
+  SZ_ERROR_MEM         - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+   
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc);
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc);
+
+SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc);
+void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc);
+
+/* ---------- Dictionary Interface ---------- */
+
+/* You can use it, if you want to eliminate the overhead for data copying from
+   dictionary to some other external buffer.
+   You must work with CLzmaDec variables directly in this interface.
+
+   STEPS:
+     LzmaDec_Constr()
+     LzmaDec_Allocate()
+     for (each new stream)
+     {
+       LzmaDec_Init()
+       while (it needs more decompression)
+       {
+         LzmaDec_DecodeToDic()
+         use data from CLzmaDec::dic and update CLzmaDec::dicPos
+       }
+     }
+     LzmaDec_Free()
+*/
+
+/* LzmaDec_DecodeToDic
+   
+   The decoding to internal dictionary buffer (CLzmaDec::dic).
+   You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (dicLimit).
+  LZMA_FINISH_ANY - Decode just dicLimit bytes.
+  LZMA_FINISH_END - Stream must be finished after dicLimit.
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_NEEDS_MORE_INPUT
+      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+  SZ_ERROR_DATA - Data error
+*/
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- Buffer Interface ---------- */
+
+/* It's zlib-like interface.
+   See LzmaDec_DecodeToDic description for information about STEPS and return results,
+   but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
+   to work with CLzmaDec variables manually.
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - Decode just destLen bytes.
+  LZMA_FINISH_END - Stream must be finished after (*destLen).
+*/
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- One Call Interface ---------- */
+
+/* LzmaDecode
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - Decode just destLen bytes.
+  LZMA_FINISH_END - Stream must be finished after (*destLen).
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+  SZ_ERROR_DATA - Data error
+  SZ_ERROR_MEM  - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+  SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+*/
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+    ELzmaStatus *status, ISzAlloc *alloc);
+
+EXTERN_C_END
+
+#endif
diff --git a/SevenZip/Ppmd.h b/SevenZip/Ppmd.h
new file mode 100644
index 0000000..5655b26
--- /dev/null
+++ b/SevenZip/Ppmd.h
@@ -0,0 +1,85 @@
+/* Ppmd.h -- PPMD codec common code
+2016-05-16 : Igor Pavlov : Public domain
+This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
+#ifndef __PPMD_H
+#define __PPMD_H
+
+#include "CpuArch.h"
+
+EXTERN_C_BEGIN
+
+#ifdef MY_CPU_32BIT
+  #define PPMD_32BIT
+#endif
+
+#define PPMD_INT_BITS 7
+#define PPMD_PERIOD_BITS 7
+#define PPMD_BIN_SCALE (1 << (PPMD_INT_BITS + PPMD_PERIOD_BITS))
+
+#define PPMD_GET_MEAN_SPEC(summ, shift, round) (((summ) + (1 << ((shift) - (round)))) >> (shift))
+#define PPMD_GET_MEAN(summ) PPMD_GET_MEAN_SPEC((summ), PPMD_PERIOD_BITS, 2)
+#define PPMD_UPDATE_PROB_0(prob) ((prob) + (1 << PPMD_INT_BITS) - PPMD_GET_MEAN(prob))
+#define PPMD_UPDATE_PROB_1(prob) ((prob) - PPMD_GET_MEAN(prob))
+
+#define PPMD_N1 4
+#define PPMD_N2 4
+#define PPMD_N3 4
+#define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4)
+#define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4)
+
+#pragma pack(push, 1)
+/* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */
+
+/* SEE-contexts for PPM-contexts with masked symbols */
+typedef struct
+{
+  UInt16 Summ; /* Freq */
+  Byte Shift;  /* Speed of Freq change; low Shift is for fast change */
+  Byte Count;  /* Count to next change of Shift */
+} CPpmd_See;
+
+#define Ppmd_See_Update(p)  if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \
+    { (p)->Summ <<= 1; (p)->Count = (Byte)(3 << (p)->Shift++); }
+
+typedef struct
+{
+  Byte Symbol;
+  Byte Freq;
+  UInt16 SuccessorLow;
+  UInt16 SuccessorHigh;
+} CPpmd_State;
+
+#pragma pack(pop)
+
+typedef
+  #ifdef PPMD_32BIT
+    CPpmd_State *
+  #else
+    UInt32
+  #endif
+  CPpmd_State_Ref;
+
+typedef
+  #ifdef PPMD_32BIT
+    void *
+  #else
+    UInt32
+  #endif
+  CPpmd_Void_Ref;
+
+typedef
+  #ifdef PPMD_32BIT
+    Byte *
+  #else
+    UInt32
+  #endif
+  CPpmd_Byte_Ref;
+
+#define PPMD_SetAllBitsIn256Bytes(p) \
+  { unsigned z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \
+  p[z+7] = p[z+6] = p[z+5] = p[z+4] = p[z+3] = p[z+2] = p[z+1] = p[z+0] = ~(size_t)0; }}
+
+EXTERN_C_END
+ 
+#endif
diff --git a/SevenZip/Ppmd7.c b/SevenZip/Ppmd7.c
new file mode 100644
index 0000000..eda8eb7
--- /dev/null
+++ b/SevenZip/Ppmd7.c
@@ -0,0 +1,710 @@
+/* Ppmd7.c -- PPMdH codec
+2016-05-21 : Igor Pavlov : Public domain
+This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+#include "Ppmd7.h"
+
+const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
+static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
+
+#define MAX_FREQ 124
+#define UNIT_SIZE 12
+
+#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE)
+#define U2I(nu) (p->Units2Indx[(nu) - 1])
+#define I2U(indx) (p->Indx2Units[indx])
+
+#ifdef PPMD_32BIT
+  #define REF(ptr) (ptr)
+#else
+  #define REF(ptr) ((UInt32)((Byte *)(ptr) - (p)->Base))
+#endif
+
+#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr))
+
+#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
+#define STATS(ctx) Ppmd7_GetStats(p, ctx)
+#define ONE_STATE(ctx) Ppmd7Context_OneState(ctx)
+#define SUFFIX(ctx) CTX((ctx)->Suffix)
+
+typedef CPpmd7_Context * CTX_PTR;
+
+struct CPpmd7_Node_;
+
+typedef
+  #ifdef PPMD_32BIT
+    struct CPpmd7_Node_ *
+  #else
+    UInt32
+  #endif
+  CPpmd7_Node_Ref;
+
+typedef struct CPpmd7_Node_
+{
+  UInt16 Stamp; /* must be at offset 0 as CPpmd7_Context::NumStats. Stamp=0 means free */
+  UInt16 NU;
+  CPpmd7_Node_Ref Next; /* must be at offset >= 4 */
+  CPpmd7_Node_Ref Prev;
+} CPpmd7_Node;
+
+#ifdef PPMD_32BIT
+  #define NODE(ptr) (ptr)
+#else
+  #define NODE(offs) ((CPpmd7_Node *)(p->Base + (offs)))
+#endif
+
+void Ppmd7_Construct(CPpmd7 *p)
+{
+  unsigned i, k, m;
+
+  p->Base = 0;
+
+  for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++)
+  {
+    unsigned step = (i >= 12 ? 4 : (i >> 2) + 1);
+    do { p->Units2Indx[k++] = (Byte)i; } while (--step);
+    p->Indx2Units[i] = (Byte)k;
+  }
+
+  p->NS2BSIndx[0] = (0 << 1);
+  p->NS2BSIndx[1] = (1 << 1);
+  memset(p->NS2BSIndx + 2, (2 << 1), 9);
+  memset(p->NS2BSIndx + 11, (3 << 1), 256 - 11);
+
+  for (i = 0; i < 3; i++)
+    p->NS2Indx[i] = (Byte)i;
+  for (m = i, k = 1; i < 256; i++)
+  {
+    p->NS2Indx[i] = (Byte)m;
+    if (--k == 0)
+      k = (++m) - 2;
+  }
+
+  memset(p->HB2Flag, 0, 0x40);
+  memset(p->HB2Flag + 0x40, 8, 0x100 - 0x40);
+}
+
+void Ppmd7_Free(CPpmd7 *p, ISzAlloc *alloc)
+{
+  alloc->Free(alloc, p->Base);
+  p->Size = 0;
+  p->Base = 0;
+}
+
+Bool Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAlloc *alloc)
+{
+  if (p->Base == 0 || p->Size != size)
+  {
+    Ppmd7_Free(p, alloc);
+    p->AlignOffset =
+      #ifdef PPMD_32BIT
+        (4 - size) & 3;
+      #else
+        4 - (size & 3);
+      #endif
+    if ((p->Base = (Byte *)alloc->Alloc(alloc, p->AlignOffset + size
+        #ifndef PPMD_32BIT
+        + UNIT_SIZE
+        #endif
+        )) == 0)
+      return False;
+    p->Size = size;
+  }
+  return True;
+}
+
+static void InsertNode(CPpmd7 *p, void *node, unsigned indx)
+{
+  *((CPpmd_Void_Ref *)node) = p->FreeList[indx];
+  p->FreeList[indx] = REF(node);
+}
+
+static void *RemoveNode(CPpmd7 *p, unsigned indx)
+{
+  CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]);
+  p->FreeList[indx] = *node;
+  return node;
+}
+
+static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
+{
+  unsigned i, nu = I2U(oldIndx) - I2U(newIndx);
+  ptr = (Byte *)ptr + U2B(I2U(newIndx));
+  if (I2U(i = U2I(nu)) != nu)
+  {
+    unsigned k = I2U(--i);
+    InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1);
+  }
+  InsertNode(p, ptr, i);
+}
+
+static void GlueFreeBlocks(CPpmd7 *p)
+{
+  #ifdef PPMD_32BIT
+  CPpmd7_Node headItem;
+  CPpmd7_Node_Ref head = &headItem;
+  #else
+  CPpmd7_Node_Ref head = p->AlignOffset + p->Size;
+  #endif
+  
+  CPpmd7_Node_Ref n = head;
+  unsigned i;
+
+  p->GlueCount = 255;
+
+  /* create doubly-linked list of free blocks */
+  for (i = 0; i < PPMD_NUM_INDEXES; i++)
+  {
+    UInt16 nu = I2U(i);
+    CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i];
+    p->FreeList[i] = 0;
+    while (next != 0)
+    {
+      CPpmd7_Node *node = NODE(next);
+      node->Next = n;
+      n = NODE(n)->Prev = next;
+      next = *(const CPpmd7_Node_Ref *)node;
+      node->Stamp = 0;
+      node->NU = (UInt16)nu;
+    }
+  }
+  NODE(head)->Stamp = 1;
+  NODE(head)->Next = n;
+  NODE(n)->Prev = head;
+  if (p->LoUnit != p->HiUnit)
+    ((CPpmd7_Node *)p->LoUnit)->Stamp = 1;
+  
+  /* Glue free blocks */
+  while (n != head)
+  {
+    CPpmd7_Node *node = NODE(n);
+    UInt32 nu = (UInt32)node->NU;
+    for (;;)
+    {
+      CPpmd7_Node *node2 = NODE(n) + nu;
+      nu += node2->NU;
+      if (node2->Stamp != 0 || nu >= 0x10000)
+        break;
+      NODE(node2->Prev)->Next = node2->Next;
+      NODE(node2->Next)->Prev = node2->Prev;
+      node->NU = (UInt16)nu;
+    }
+    n = node->Next;
+  }
+  
+  /* Fill lists of free blocks */
+  for (n = NODE(head)->Next; n != head;)
+  {
+    CPpmd7_Node *node = NODE(n);
+    unsigned nu;
+    CPpmd7_Node_Ref next = node->Next;
+    for (nu = node->NU; nu > 128; nu -= 128, node += 128)
+      InsertNode(p, node, PPMD_NUM_INDEXES - 1);
+    if (I2U(i = U2I(nu)) != nu)
+    {
+      unsigned k = I2U(--i);
+      InsertNode(p, node + k, nu - k - 1);
+    }
+    InsertNode(p, node, i);
+    n = next;
+  }
+}
+
+static void *AllocUnitsRare(CPpmd7 *p, unsigned indx)
+{
+  unsigned i;
+  void *retVal;
+  if (p->GlueCount == 0)
+  {
+    GlueFreeBlocks(p);
+    if (p->FreeList[indx] != 0)
+      return RemoveNode(p, indx);
+  }
+  i = indx;
+  do
+  {
+    if (++i == PPMD_NUM_INDEXES)
+    {
+      UInt32 numBytes = U2B(I2U(indx));
+      p->GlueCount--;
+      return ((UInt32)(p->UnitsStart - p->Text) > numBytes) ? (p->UnitsStart -= numBytes) : (NULL);
+    }
+  }
+  while (p->FreeList[i] == 0);
+  retVal = RemoveNode(p, i);
+  SplitBlock(p, retVal, i, indx);
+  return retVal;
+}
+
+static void *AllocUnits(CPpmd7 *p, unsigned indx)
+{
+  UInt32 numBytes;
+  if (p->FreeList[indx] != 0)
+    return RemoveNode(p, indx);
+  numBytes = U2B(I2U(indx));
+  if (numBytes <= (UInt32)(p->HiUnit - p->LoUnit))
+  {
+    void *retVal = p->LoUnit;
+    p->LoUnit += numBytes;
+    return retVal;
+  }
+  return AllocUnitsRare(p, indx);
+}
+
+#define MyMem12Cpy(dest, src, num) \
+  { UInt32 *d = (UInt32 *)dest; const UInt32 *s = (const UInt32 *)src; UInt32 n = num; \
+    do { d[0] = s[0]; d[1] = s[1]; d[2] = s[2]; s += 3; d += 3; } while (--n); }
+
+static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU)
+{
+  unsigned i0 = U2I(oldNU);
+  unsigned i1 = U2I(newNU);
+  if (i0 == i1)
+    return oldPtr;
+  if (p->FreeList[i1] != 0)
+  {
+    void *ptr = RemoveNode(p, i1);
+    MyMem12Cpy(ptr, oldPtr, newNU);
+    InsertNode(p, oldPtr, i0);
+    return ptr;
+  }
+  SplitBlock(p, oldPtr, i0, i1);
+  return oldPtr;
+}
+
+#define SUCCESSOR(p) ((CPpmd_Void_Ref)((p)->SuccessorLow | ((UInt32)(p)->SuccessorHigh << 16)))
+
+static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v)
+{
+  (p)->SuccessorLow = (UInt16)((UInt32)(v) & 0xFFFF);
+  (p)->SuccessorHigh = (UInt16)(((UInt32)(v) >> 16) & 0xFFFF);
+}
+
+static void RestartModel(CPpmd7 *p)
+{
+  unsigned i, k, m;
+
+  memset(p->FreeList, 0, sizeof(p->FreeList));
+  p->Text = p->Base + p->AlignOffset;
+  p->HiUnit = p->Text + p->Size;
+  p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE;
+  p->GlueCount = 0;
+
+  p->OrderFall = p->MaxOrder;
+  p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1;
+  p->PrevSuccess = 0;
+
+  p->MinContext = p->MaxContext = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
+  p->MinContext->Suffix = 0;
+  p->MinContext->NumStats = 256;
+  p->MinContext->SummFreq = 256 + 1;
+  p->FoundState = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
+  p->LoUnit += U2B(256 / 2);
+  p->MinContext->Stats = REF(p->FoundState);
+  for (i = 0; i < 256; i++)
+  {
+    CPpmd_State *s = &p->FoundState[i];
+    s->Symbol = (Byte)i;
+    s->Freq = 1;
+    SetSuccessor(s, 0);
+  }
+
+  for (i = 0; i < 128; i++)
+    for (k = 0; k < 8; k++)
+    {
+      UInt16 *dest = p->BinSumm[i] + k;
+      UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 2));
+      for (m = 0; m < 64; m += 8)
+        dest[m] = val;
+    }
+  
+  for (i = 0; i < 25; i++)
+    for (k = 0; k < 16; k++)
+    {
+      CPpmd_See *s = &p->See[i][k];
+      s->Summ = (UInt16)((5 * i + 10) << (s->Shift = PPMD_PERIOD_BITS - 4));
+      s->Count = 4;
+    }
+}
+
+void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder)
+{
+  p->MaxOrder = maxOrder;
+  RestartModel(p);
+  p->DummySee.Shift = PPMD_PERIOD_BITS;
+  p->DummySee.Summ = 0; /* unused */
+  p->DummySee.Count = 64; /* unused */
+}
+
+static CTX_PTR CreateSuccessors(CPpmd7 *p, Bool skip)
+{
+  CPpmd_State upState;
+  CTX_PTR c = p->MinContext;
+  CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState);
+  CPpmd_State *ps[PPMD7_MAX_ORDER];
+  unsigned numPs = 0;
+  
+  if (!skip)
+    ps[numPs++] = p->FoundState;
+  
+  while (c->Suffix)
+  {
+    CPpmd_Void_Ref successor;
+    CPpmd_State *s;
+    c = SUFFIX(c);
+    if (c->NumStats != 1)
+    {
+      for (s = STATS(c); s->Symbol != p->FoundState->Symbol; s++);
+    }
+    else
+      s = ONE_STATE(c);
+    successor = SUCCESSOR(s);
+    if (successor != upBranch)
+    {
+      c = CTX(successor);
+      if (numPs == 0)
+        return c;
+      break;
+    }
+    ps[numPs++] = s;
+  }
+  
+  upState.Symbol = *(const Byte *)Ppmd7_GetPtr(p, upBranch);
+  SetSuccessor(&upState, upBranch + 1);
+  
+  if (c->NumStats == 1)
+    upState.Freq = ONE_STATE(c)->Freq;
+  else
+  {
+    UInt32 cf, s0;
+    CPpmd_State *s;
+    for (s = STATS(c); s->Symbol != upState.Symbol; s++);
+    cf = s->Freq - 1;
+    s0 = c->SummFreq - c->NumStats - cf;
+    upState.Freq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((2 * cf + 3 * s0 - 1) / (2 * s0))));
+  }
+
+  do
+  {
+    /* Create Child */
+    CTX_PTR c1; /* = AllocContext(p); */
+    if (p->HiUnit != p->LoUnit)
+      c1 = (CTX_PTR)(p->HiUnit -= UNIT_SIZE);
+    else if (p->FreeList[0] != 0)
+      c1 = (CTX_PTR)RemoveNode(p, 0);
+    else
+    {
+      c1 = (CTX_PTR)AllocUnitsRare(p, 0);
+      if (!c1)
+        return NULL;
+    }
+    c1->NumStats = 1;
+    *ONE_STATE(c1) = upState;
+    c1->Suffix = REF(c);
+    SetSuccessor(ps[--numPs], REF(c1));
+    c = c1;
+  }
+  while (numPs != 0);
+  
+  return c;
+}
+
+static void SwapStates(CPpmd_State *t1, CPpmd_State *t2)
+{
+  CPpmd_State tmp = *t1;
+  *t1 = *t2;
+  *t2 = tmp;
+}
+
+static void UpdateModel(CPpmd7 *p)
+{
+  CPpmd_Void_Ref successor, fSuccessor = SUCCESSOR(p->FoundState);
+  CTX_PTR c;
+  unsigned s0, ns;
+  
+  if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0)
+  {
+    c = SUFFIX(p->MinContext);
+    
+    if (c->NumStats == 1)
+    {
+      CPpmd_State *s = ONE_STATE(c);
+      if (s->Freq < 32)
+        s->Freq++;
+    }
+    else
+    {
+      CPpmd_State *s = STATS(c);
+      if (s->Symbol != p->FoundState->Symbol)
+      {
+        do { s++; } while (s->Symbol != p->FoundState->Symbol);
+        if (s[0].Freq >= s[-1].Freq)
+        {
+          SwapStates(&s[0], &s[-1]);
+          s--;
+        }
+      }
+      if (s->Freq < MAX_FREQ - 9)
+      {
+        s->Freq += 2;
+        c->SummFreq += 2;
+      }
+    }
+  }
+
+  if (p->OrderFall == 0)
+  {
+    p->MinContext = p->MaxContext = CreateSuccessors(p, True);
+    if (p->MinContext == 0)
+    {
+      RestartModel(p);
+      return;
+    }
+    SetSuccessor(p->FoundState, REF(p->MinContext));
+    return;
+  }
+  
+  *p->Text++ = p->FoundState->Symbol;
+  successor = REF(p->Text);
+  if (p->Text >= p->UnitsStart)
+  {
+    RestartModel(p);
+    return;
+  }
+  
+  if (fSuccessor)
+  {
+    if (fSuccessor <= successor)
+    {
+      CTX_PTR cs = CreateSuccessors(p, False);
+      if (cs == NULL)
+      {
+        RestartModel(p);
+        return;
+      }
+      fSuccessor = REF(cs);
+    }
+    if (--p->OrderFall == 0)
+    {
+      successor = fSuccessor;
+      p->Text -= (p->MaxContext != p->MinContext);
+    }
+  }
+  else
+  {
+    SetSuccessor(p->FoundState, successor);
+    fSuccessor = REF(p->MinContext);
+  }
+  
+  s0 = p->MinContext->SummFreq - (ns = p->MinContext->NumStats) - (p->FoundState->Freq - 1);
+  
+  for (c = p->MaxContext; c != p->MinContext; c = SUFFIX(c))
+  {
+    unsigned ns1;
+    UInt32 cf, sf;
+    if ((ns1 = c->NumStats) != 1)
+    {
+      if ((ns1 & 1) == 0)
+      {
+        /* Expand for one UNIT */
+        unsigned oldNU = ns1 >> 1;
+        unsigned i = U2I(oldNU);
+        if (i != U2I(oldNU + 1))
+        {
+          void *ptr = AllocUnits(p, i + 1);
+          void *oldPtr;
+          if (!ptr)
+          {
+            RestartModel(p);
+            return;
+          }
+          oldPtr = STATS(c);
+          MyMem12Cpy(ptr, oldPtr, oldNU);
+          InsertNode(p, oldPtr, i);
+          c->Stats = STATS_REF(ptr);
+        }
+      }
+      c->SummFreq = (UInt16)(c->SummFreq + (2 * ns1 < ns) + 2 * ((4 * ns1 <= ns) & (c->SummFreq <= 8 * ns1)));
+    }
+    else
+    {
+      CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0);
+      if (!s)
+      {
+        RestartModel(p);
+        return;
+      }
+      *s = *ONE_STATE(c);
+      c->Stats = REF(s);
+      if (s->Freq < MAX_FREQ / 4 - 1)
+        s->Freq <<= 1;
+      else
+        s->Freq = MAX_FREQ - 4;
+      c->SummFreq = (UInt16)(s->Freq + p->InitEsc + (ns > 3));
+    }
+    cf = 2 * (UInt32)p->FoundState->Freq * (c->SummFreq + 6);
+    sf = (UInt32)s0 + c->SummFreq;
+    if (cf < 6 * sf)
+    {
+      cf = 1 + (cf > sf) + (cf >= 4 * sf);
+      c->SummFreq += 3;
+    }
+    else
+    {
+      cf = 4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf);
+      c->SummFreq = (UInt16)(c->SummFreq + cf);
+    }
+    {
+      CPpmd_State *s = STATS(c) + ns1;
+      SetSuccessor(s, successor);
+      s->Symbol = p->FoundState->Symbol;
+      s->Freq = (Byte)cf;
+      c->NumStats = (UInt16)(ns1 + 1);
+    }
+  }
+  p->MaxContext = p->MinContext = CTX(fSuccessor);
+}
+  
+static void Rescale(CPpmd7 *p)
+{
+  unsigned i, adder, sumFreq, escFreq;
+  CPpmd_State *stats = STATS(p->MinContext);
+  CPpmd_State *s = p->FoundState;
+  {
+    CPpmd_State tmp = *s;
+    for (; s != stats; s--)
+      s[0] = s[-1];
+    *s = tmp;
+  }
+  escFreq = p->MinContext->SummFreq - s->Freq;
+  s->Freq += 4;
+  adder = (p->OrderFall != 0);
+  s->Freq = (Byte)((s->Freq + adder) >> 1);
+  sumFreq = s->Freq;
+  
+  i = p->MinContext->NumStats - 1;
+  do
+  {
+    escFreq -= (++s)->Freq;
+    s->Freq = (Byte)((s->Freq + adder) >> 1);
+    sumFreq += s->Freq;
+    if (s[0].Freq > s[-1].Freq)
+    {
+      CPpmd_State *s1 = s;
+      CPpmd_State tmp = *s1;
+      do
+        s1[0] = s1[-1];
+      while (--s1 != stats && tmp.Freq > s1[-1].Freq);
+      *s1 = tmp;
+    }
+  }
+  while (--i);
+  
+  if (s->Freq == 0)
+  {
+    unsigned numStats = p->MinContext->NumStats;
+    unsigned n0, n1;
+    do { i++; } while ((--s)->Freq == 0);
+    escFreq += i;
+    p->MinContext->NumStats = (UInt16)(p->MinContext->NumStats - i);
+    if (p->MinContext->NumStats == 1)
+    {
+      CPpmd_State tmp = *stats;
+      do
+      {
+        tmp.Freq = (Byte)(tmp.Freq - (tmp.Freq >> 1));
+        escFreq >>= 1;
+      }
+      while (escFreq > 1);
+      InsertNode(p, stats, U2I(((numStats + 1) >> 1)));
+      *(p->FoundState = ONE_STATE(p->MinContext)) = tmp;
+      return;
+    }
+    n0 = (numStats + 1) >> 1;
+    n1 = (p->MinContext->NumStats + 1) >> 1;
+    if (n0 != n1)
+      p->MinContext->Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
+  }
+  p->MinContext->SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
+  p->FoundState = STATS(p->MinContext);
+}
+
+CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
+{
+  CPpmd_See *see;
+  unsigned nonMasked = p->MinContext->NumStats - numMasked;
+  if (p->MinContext->NumStats != 256)
+  {
+    see = p->See[(unsigned)p->NS2Indx[nonMasked - 1]] +
+        (nonMasked < (unsigned)SUFFIX(p->MinContext)->NumStats - p->MinContext->NumStats) +
+        2 * (unsigned)(p->MinContext->SummFreq < 11 * p->MinContext->NumStats) +
+        4 * (unsigned)(numMasked > nonMasked) +
+        p->HiBitsFlag;
+    {
+      unsigned r = (see->Summ >> see->Shift);
+      see->Summ = (UInt16)(see->Summ - r);
+      *escFreq = r + (r == 0);
+    }
+  }
+  else
+  {
+    see = &p->DummySee;
+    *escFreq = 1;
+  }
+  return see;
+}
+
+static void NextContext(CPpmd7 *p)
+{
+  CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
+  if (p->OrderFall == 0 && (Byte *)c > p->Text)
+    p->MinContext = p->MaxContext = c;
+  else
+    UpdateModel(p);
+}
+
+void Ppmd7_Update1(CPpmd7 *p)
+{
+  CPpmd_State *s = p->FoundState;
+  s->Freq += 4;
+  p->MinContext->SummFreq += 4;
+  if (s[0].Freq > s[-1].Freq)
+  {
+    SwapStates(&s[0], &s[-1]);
+    p->FoundState = --s;
+    if (s->Freq > MAX_FREQ)
+      Rescale(p);
+  }
+  NextContext(p);
+}
+
+void Ppmd7_Update1_0(CPpmd7 *p)
+{
+  p->PrevSuccess = (2 * p->FoundState->Freq > p->MinContext->SummFreq);
+  p->RunLength += p->PrevSuccess;
+  p->MinContext->SummFreq += 4;
+  if ((p->FoundState->Freq += 4) > MAX_FREQ)
+    Rescale(p);
+  NextContext(p);
+}
+
+void Ppmd7_UpdateBin(CPpmd7 *p)
+{
+  p->FoundState->Freq = (Byte)(p->FoundState->Freq + (p->FoundState->Freq < 128 ? 1: 0));
+  p->PrevSuccess = 1;
+  p->RunLength++;
+  NextContext(p);
+}
+
+void Ppmd7_Update2(CPpmd7 *p)
+{
+  p->MinContext->SummFreq += 4;
+  if ((p->FoundState->Freq += 4) > MAX_FREQ)
+    Rescale(p);
+  p->RunLength = p->InitRL;
+  UpdateModel(p);
+}
diff --git a/SevenZip/Ppmd7.h b/SevenZip/Ppmd7.h
new file mode 100644
index 0000000..87eefde
--- /dev/null
+++ b/SevenZip/Ppmd7.h
@@ -0,0 +1,140 @@
+/* Ppmd7.h -- PPMdH compression codec
+2016-05-21 : Igor Pavlov : Public domain
+This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
+/* This code supports virtual RangeDecoder and includes the implementation
+of RangeCoder from 7z, instead of RangeCoder from original PPMd var.H.
+If you need the compatibility with original PPMd var.H, you can use external RangeDecoder */
+
+#ifndef __PPMD7_H
+#define __PPMD7_H
+
+#include "Ppmd.h"
+
+EXTERN_C_BEGIN
+
+#define PPMD7_MIN_ORDER 2
+#define PPMD7_MAX_ORDER 64
+
+#define PPMD7_MIN_MEM_SIZE (1 << 11)
+#define PPMD7_MAX_MEM_SIZE (0xFFFFFFFF - 12 * 3)
+
+struct CPpmd7_Context_;
+
+typedef
+  #ifdef PPMD_32BIT
+    struct CPpmd7_Context_ *
+  #else
+    UInt32
+  #endif
+  CPpmd7_Context_Ref;
+
+typedef struct CPpmd7_Context_
+{
+  UInt16 NumStats;
+  UInt16 SummFreq;
+  CPpmd_State_Ref Stats;
+  CPpmd7_Context_Ref Suffix;
+} CPpmd7_Context;
+
+#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->SummFreq)
+
+typedef struct
+{
+  CPpmd7_Context *MinContext, *MaxContext;
+  CPpmd_State *FoundState;
+  unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, HiBitsFlag;
+  Int32 RunLength, InitRL; /* must be 32-bit at least */
+
+  UInt32 Size;
+  UInt32 GlueCount;
+  Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
+  UInt32 AlignOffset;
+
+  Byte Indx2Units[PPMD_NUM_INDEXES];
+  Byte Units2Indx[128];
+  CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES];
+  Byte NS2Indx[256], NS2BSIndx[256], HB2Flag[256];
+  CPpmd_See DummySee, See[25][16];
+  UInt16 BinSumm[128][64];
+} CPpmd7;
+
+void Ppmd7_Construct(CPpmd7 *p);
+Bool Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAlloc *alloc);
+void Ppmd7_Free(CPpmd7 *p, ISzAlloc *alloc);
+void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder);
+#define Ppmd7_WasAllocated(p) ((p)->Base != NULL)
+
+
+/* ---------- Internal Functions ---------- */
+
+extern const Byte PPMD7_kExpEscape[16];
+
+#ifdef PPMD_32BIT
+  #define Ppmd7_GetPtr(p, ptr) (ptr)
+  #define Ppmd7_GetContext(p, ptr) (ptr)
+  #define Ppmd7_GetStats(p, ctx) ((ctx)->Stats)
+#else
+  #define Ppmd7_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
+  #define Ppmd7_GetContext(p, offs) ((CPpmd7_Context *)Ppmd7_GetPtr((p), (offs)))
+  #define Ppmd7_GetStats(p, ctx) ((CPpmd_State *)Ppmd7_GetPtr((p), ((ctx)->Stats)))
+#endif
+
+void Ppmd7_Update1(CPpmd7 *p);
+void Ppmd7_Update1_0(CPpmd7 *p);
+void Ppmd7_Update2(CPpmd7 *p);
+void Ppmd7_UpdateBin(CPpmd7 *p);
+
+#define Ppmd7_GetBinSumm(p) \
+    &p->BinSumm[(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1][p->PrevSuccess + \
+    p->NS2BSIndx[Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] + \
+    (p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]) + \
+    2 * p->HB2Flag[(unsigned)Ppmd7Context_OneState(p->MinContext)->Symbol] + \
+    ((p->RunLength >> 26) & 0x20)]
+
+CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale);
+
+
+/* ---------- Decode ---------- */
+
+typedef struct
+{
+  UInt32 (*GetThreshold)(void *p, UInt32 total);
+  void (*Decode)(void *p, UInt32 start, UInt32 size);
+  UInt32 (*DecodeBit)(void *p, UInt32 size0);
+} IPpmd7_RangeDec;
+
+typedef struct
+{
+  IPpmd7_RangeDec p;
+  UInt32 Range;
+  UInt32 Code;
+  IByteIn *Stream;
+} CPpmd7z_RangeDec;
+
+void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p);
+Bool Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p);
+#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
+
+int Ppmd7_DecodeSymbol(CPpmd7 *p, IPpmd7_RangeDec *rc);
+
+
+/* ---------- Encode ---------- */
+
+typedef struct
+{
+  UInt64 Low;
+  UInt32 Range;
+  Byte Cache;
+  UInt64 CacheSize;
+  IByteOut *Stream;
+} CPpmd7z_RangeEnc;
+
+void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p);
+void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p);
+
+void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol);
+
+EXTERN_C_END
+ 
+#endif
diff --git a/SevenZip/Ppmd7Dec.c b/SevenZip/Ppmd7Dec.c
new file mode 100644
index 0000000..04b4b09
--- /dev/null
+++ b/SevenZip/Ppmd7Dec.c
@@ -0,0 +1,189 @@
+/* Ppmd7Dec.c -- PPMdH Decoder
+2010-03-12 : Igor Pavlov : Public domain
+This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
+#include "Precomp.h"
+
+#include "Ppmd7.h"
+
+#define kTopValue (1 << 24)
+
+Bool Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p)
+{
+  unsigned i;
+  p->Code = 0;
+  p->Range = 0xFFFFFFFF;
+  if (p->Stream->Read((void *)p->Stream) != 0)
+    return False;
+  for (i = 0; i < 4; i++)
+    p->Code = (p->Code << 8) | p->Stream->Read((void *)p->Stream);
+  return (p->Code < 0xFFFFFFFF);
+}
+
+static UInt32 Range_GetThreshold(void *pp, UInt32 total)
+{
+  CPpmd7z_RangeDec *p = (CPpmd7z_RangeDec *)pp;
+  return (p->Code) / (p->Range /= total);
+}
+
+static void Range_Normalize(CPpmd7z_RangeDec *p)
+{
+  if (p->Range < kTopValue)
+  {
+    p->Code = (p->Code << 8) | p->Stream->Read((void *)p->Stream);
+    p->Range <<= 8;
+    if (p->Range < kTopValue)
+    {
+      p->Code = (p->Code << 8) | p->Stream->Read((void *)p->Stream);
+      p->Range <<= 8;
+    }
+  }
+}
+
+static void Range_Decode(void *pp, UInt32 start, UInt32 size)
+{
+  CPpmd7z_RangeDec *p = (CPpmd7z_RangeDec *)pp;
+  p->Code -= start * p->Range;
+  p->Range *= size;
+  Range_Normalize(p);
+}
+
+static UInt32 Range_DecodeBit(void *pp, UInt32 size0)
+{
+  CPpmd7z_RangeDec *p = (CPpmd7z_RangeDec *)pp;
+  UInt32 newBound = (p->Range >> 14) * size0;
+  UInt32 symbol;
+  if (p->Code < newBound)
+  {
+    symbol = 0;
+    p->Range = newBound;
+  }
+  else
+  {
+    symbol = 1;
+    p->Code -= newBound;
+    p->Range -= newBound;
+  }
+  Range_Normalize(p);
+  return symbol;
+}
+
+void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p)
+{
+  p->p.GetThreshold = Range_GetThreshold;
+  p->p.Decode = Range_Decode;
+  p->p.DecodeBit = Range_DecodeBit;
+}
+
+
+#define MASK(sym) ((signed char *)charMask)[sym]
+
+int Ppmd7_DecodeSymbol(CPpmd7 *p, IPpmd7_RangeDec *rc)
+{
+  size_t charMask[256 / sizeof(size_t)];
+  if (p->MinContext->NumStats != 1)
+  {
+    CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
+    unsigned i;
+    UInt32 count, hiCnt;
+    if ((count = rc->GetThreshold(rc, p->MinContext->SummFreq)) < (hiCnt = s->Freq))
+    {
+      Byte symbol;
+      rc->Decode(rc, 0, s->Freq);
+      p->FoundState = s;
+      symbol = s->Symbol;
+      Ppmd7_Update1_0(p);
+      return symbol;
+    }
+    p->PrevSuccess = 0;
+    i = p->MinContext->NumStats - 1;
+    do
+    {
+      if ((hiCnt += (++s)->Freq) > count)
+      {
+        Byte symbol;
+        rc->Decode(rc, hiCnt - s->Freq, s->Freq);
+        p->FoundState = s;
+        symbol = s->Symbol;
+        Ppmd7_Update1(p);
+        return symbol;
+      }
+    }
+    while (--i);
+    if (count >= p->MinContext->SummFreq)
+      return -2;
+    p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol];
+    rc->Decode(rc, hiCnt, p->MinContext->SummFreq - hiCnt);
+    PPMD_SetAllBitsIn256Bytes(charMask);
+    MASK(s->Symbol) = 0;
+    i = p->MinContext->NumStats - 1;
+    do { MASK((--s)->Symbol) = 0; } while (--i);
+  }
+  else
+  {
+    UInt16 *prob = Ppmd7_GetBinSumm(p);
+    if (rc->DecodeBit(rc, *prob) == 0)
+    {
+      Byte symbol;
+      *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
+      symbol = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
+      Ppmd7_UpdateBin(p);
+      return symbol;
+    }
+    *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
+    p->InitEsc = PPMD7_kExpEscape[*prob >> 10];
+    PPMD_SetAllBitsIn256Bytes(charMask);
+    MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
+    p->PrevSuccess = 0;
+  }
+  for (;;)
+  {
+    CPpmd_State *ps[256], *s;
+    UInt32 freqSum, count, hiCnt;
+    CPpmd_See *see;
+    unsigned i, num, numMasked = p->MinContext->NumStats;
+    do
+    {
+      p->OrderFall++;
+      if (!p->MinContext->Suffix)
+        return -1;
+      p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix);
+    }
+    while (p->MinContext->NumStats == numMasked);
+    hiCnt = 0;
+    s = Ppmd7_GetStats(p, p->MinContext);
+    i = 0;
+    num = p->MinContext->NumStats - numMasked;
+    do
+    {
+      int k = (int)(MASK(s->Symbol));
+      hiCnt += (s->Freq & k);
+      ps[i] = s++;
+      i -= k;
+    }
+    while (i != num);
+    
+    see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum);
+    freqSum += hiCnt;
+    count = rc->GetThreshold(rc, freqSum);
+    
+    if (count < hiCnt)
+    {
+      Byte symbol;
+      CPpmd_State **pps = ps;
+      for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++);
+      s = *pps;
+      rc->Decode(rc, hiCnt - s->Freq, s->Freq);
+      Ppmd_See_Update(see);
+      p->FoundState = s;
+      symbol = s->Symbol;
+      Ppmd7_Update2(p);
+      return symbol;
+    }
+    if (count >= freqSum)
+      return -2;
+    rc->Decode(rc, hiCnt, freqSum - hiCnt);
+    see->Summ = (UInt16)(see->Summ + freqSum);
+    do { MASK(ps[--i]->Symbol) = 0; } while (i != 0);
+  }
+}
diff --git a/SevenZip/Precomp.c b/SevenZip/Precomp.c
new file mode 100644
index 0000000..01605e3
--- /dev/null
+++ b/SevenZip/Precomp.c
@@ -0,0 +1,4 @@
+/* Precomp.c -- StdAfx
+2013-01-21 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
diff --git a/SevenZip/Precomp.h b/SevenZip/Precomp.h
new file mode 100644
index 0000000..e8ff8b4
--- /dev/null
+++ b/SevenZip/Precomp.h
@@ -0,0 +1,10 @@
+/* Precomp.h -- StdAfx
+2013-11-12 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_PRECOMP_H
+#define __7Z_PRECOMP_H
+
+#include "Compiler.h"
+/* #include "7zTypes.h" */
+
+#endif
diff --git a/SevenZip/SevenZip.vcxproj b/SevenZip/SevenZip.vcxproj
new file mode 100644
index 0000000..3fc2fb8
--- /dev/null
+++ b/SevenZip/SevenZip.vcxproj
@@ -0,0 +1,416 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Libretro|Win32">
+      <Configuration>Libretro</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Libretro|x64">
+      <Configuration>Libretro</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Optimize|Win32">
+      <Configuration>PGO Optimize</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Optimize|x64">
+      <Configuration>PGO Optimize</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Profile|Win32">
+      <Configuration>PGO Profile</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Profile|x64">
+      <Configuration>PGO Profile</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="7z.h" />
+    <ClInclude Include="7zAlloc.h" />
+    <ClInclude Include="7zBuf.h" />
+    <ClInclude Include="7zCrc.h" />
+    <ClInclude Include="7zFile.h" />
+    <ClInclude Include="7zMemBuffer.h" />
+    <ClInclude Include="7zTypes.h" />
+    <ClInclude Include="Bcj2.h" />
+    <ClInclude Include="Bra.h" />
+    <ClInclude Include="Compiler.h" />
+    <ClInclude Include="CpuArch.h" />
+    <ClInclude Include="Delta.h" />
+    <ClInclude Include="Lzma2Dec.h" />
+    <ClInclude Include="LzmaDec.h" />
+    <ClInclude Include="Ppmd.h" />
+    <ClInclude Include="Ppmd7.h" />
+    <ClInclude Include="Precomp.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="7zAlloc.c" />
+    <ClCompile Include="7zArcIn.c" />
+    <ClCompile Include="7zBuf.c" />
+    <ClCompile Include="7zCrc.c" />
+    <ClCompile Include="7zCrcOpt.c" />
+    <ClCompile Include="7zDec.c" />
+    <ClCompile Include="7zFile.c" />
+    <ClCompile Include="7zMemBuffer.c" />
+    <ClCompile Include="7zStream.c" />
+    <ClCompile Include="Bcj2.c" />
+    <ClCompile Include="Bra.c" />
+    <ClCompile Include="Bra86.c" />
+    <ClCompile Include="BraIA64.c" />
+    <ClCompile Include="CpuArch.c" />
+    <ClCompile Include="Delta.c" />
+    <ClCompile Include="Lzma2Dec.c" />
+    <ClCompile Include="LzmaDec.c" />
+    <ClCompile Include="Ppmd7.c" />
+    <ClCompile Include="Ppmd7Dec.c" />
+    <ClCompile Include="Precomp.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{52C4BA3A-E699-4305-B23F-C9083FD07AB6}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>SevenZip</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\PGO Profile\</OutDir>
+    <IntDir>obj\$(Platform)\PGO Profile\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\PGO Profile\</OutDir>
+    <IntDir>obj\$(Platform)\PGO Profile\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <CompileAs>CompileAsC</CompileAs>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <CompileAs>CompileAsC</CompileAs>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <CompileAs>CompileAsC</CompileAs>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <CompileAs>CompileAsC</CompileAs>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/SevenZip/SevenZip.vcxproj.filters b/SevenZip/SevenZip.vcxproj.filters
new file mode 100644
index 0000000..b4894d2
--- /dev/null
+++ b/SevenZip/SevenZip.vcxproj.filters
@@ -0,0 +1,128 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="7z.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="7zAlloc.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="7zBuf.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="7zCrc.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="7zFile.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="7zMemBuffer.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="7zTypes.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="Bcj2.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="Bra.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="Compiler.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="CpuArch.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="Delta.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="Lzma2Dec.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="LzmaDec.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="Ppmd.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="Ppmd7.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="Precomp.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="7zAlloc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="7zArcIn.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="7zBuf.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="7zCrc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="7zCrcOpt.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="7zDec.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="7zFile.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="7zMemBuffer.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="7zStream.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="Bcj2.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="Bra.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="Bra86.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="BraIA64.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="CpuArch.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="Delta.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="Lzma2Dec.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="LzmaDec.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="Ppmd7.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="Ppmd7Dec.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="Precomp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/Utilities/ArchiveReader.cpp b/Utilities/ArchiveReader.cpp
new file mode 100644
index 0000000..7dcda01
--- /dev/null
+++ b/Utilities/ArchiveReader.cpp
@@ -0,0 +1,125 @@
+#include "stdafx.h"
+#include "ArchiveReader.h"
+#include <string.h>
+#include <sstream>
+#include <algorithm>
+#include "FolderUtilities.h"
+#include "ZipReader.h"
+#include "SZReader.h"
+
+ArchiveReader::~ArchiveReader()
+{
+	if(_buffer) {
+		delete[] _buffer;
+		_buffer = nullptr;
+	}
+}
+
+bool ArchiveReader::GetStream(string filename, std::stringstream &stream)
+{
+	if(_initialized) {
+		vector<uint8_t> fileData;
+		if(ExtractFile(filename, fileData)) {
+			stream.write((char*)fileData.data(), fileData.size());
+			return true;
+		}
+	}
+	return false;
+}
+
+vector<string> ArchiveReader::GetFileList(std::initializer_list<string> extensions)
+{
+	if(extensions.size() == 0) {
+		return InternalGetFileList();
+	}
+
+	vector<string> filenames;
+	for(string filename : InternalGetFileList()) {
+		string lcFilename = filename;
+		std::transform(lcFilename.begin(), lcFilename.end(), lcFilename.begin(), ::tolower);
+		for(string ext : extensions) {
+			if(lcFilename.size() >= ext.size()) {
+				if(lcFilename.substr(lcFilename.length() - ext.size(), ext.size()).compare(ext) == 0) {
+					filenames.push_back(filename);
+				}
+			}
+		}
+	}
+
+	return filenames;
+}
+
+bool ArchiveReader::CheckFile(string filename)
+{
+	vector<string> files = InternalGetFileList();
+	return std::find(files.begin(), files.end(), filename) != files.end();
+}
+
+bool ArchiveReader::LoadArchive(std::istream &in)
+{
+	in.seekg(0, std::ios::end);
+	std::streampos filesize = in.tellg();
+	in.seekg(0, std::ios::beg);
+
+	if(_buffer) {
+		delete[] _buffer;
+		_buffer = nullptr;
+	}
+
+	_buffer = new uint8_t[(uint32_t)filesize];
+	in.read((char*)_buffer, filesize);
+	in.seekg(0, std::ios::beg);
+	bool result = LoadArchive(_buffer, (size_t)filesize);
+	return result;
+}
+
+bool ArchiveReader::LoadArchive(vector<uint8_t> &data)
+{
+	return LoadArchive(data.data(), data.size());
+}
+
+bool ArchiveReader::LoadArchive(void* buffer, size_t size)
+{
+	if(InternalLoadArchive(buffer, size)) {
+		_initialized = true;
+		return true;
+	}
+	return false;
+}
+
+bool ArchiveReader::LoadArchive(string filename)
+{
+	ifstream in(filename, std::ios::binary | std::ios::in);
+	if(in.good()) {
+		LoadArchive(in);
+		in.close();
+	}
+	return false;
+}
+
+shared_ptr<ArchiveReader> ArchiveReader::GetReader(std::istream &in)
+{
+	uint8_t header[2] = { 0,0 };
+	in.read((char*)header, 2);
+
+	shared_ptr<ArchiveReader> reader;
+	if(memcmp(header, "PK", 2) == 0) {
+		reader.reset(new ZipReader());
+	} else if(memcmp(header, "7z", 2) == 0) {
+		reader.reset(new SZReader());
+	}
+
+	if(reader) {
+		reader->LoadArchive(in);
+	}
+	return reader;
+}
+
+shared_ptr<ArchiveReader> ArchiveReader::GetReader(string filepath)
+{
+	ifstream in(filepath, std::ios::in | std::ios::binary);
+	if(in) {
+		return GetReader(in);
+	}
+	return nullptr;
+}
\ No newline at end of file
diff --git a/Utilities/ArchiveReader.h b/Utilities/ArchiveReader.h
new file mode 100644
index 0000000..a2076c8
--- /dev/null
+++ b/Utilities/ArchiveReader.h
@@ -0,0 +1,28 @@
+#pragma once
+#include "stdafx.h"
+
+class ArchiveReader
+{
+protected:
+	bool _initialized = false;
+	uint8_t* _buffer = nullptr;
+	virtual bool InternalLoadArchive(void* buffer, size_t size) = 0;
+	virtual vector<string> InternalGetFileList() = 0;
+public:
+	~ArchiveReader();
+
+	bool LoadArchive(void* buffer, size_t size);
+	bool LoadArchive(vector<uint8_t>& data);
+	bool LoadArchive(string filename);
+	bool LoadArchive(std::istream &in);
+
+	bool GetStream(string filename, std::stringstream &stream);
+
+	vector<string> GetFileList(std::initializer_list<string> extensions = {});
+	bool CheckFile(string filename);
+
+	virtual bool ExtractFile(string filename, vector<uint8_t> &output) = 0;
+
+	static shared_ptr<ArchiveReader> GetReader(std::istream &in);
+	static shared_ptr<ArchiveReader> GetReader(string filepath);
+};
\ No newline at end of file
diff --git a/Utilities/AutoResetEvent.cpp b/Utilities/AutoResetEvent.cpp
new file mode 100644
index 0000000..f04943c
--- /dev/null
+++ b/Utilities/AutoResetEvent.cpp
@@ -0,0 +1,40 @@
+#include "stdafx.h"
+#include "AutoResetEvent.h"
+
+AutoResetEvent::AutoResetEvent()
+{
+	_signaled = false;
+}
+
+AutoResetEvent::~AutoResetEvent()
+{
+	//Can't signal here, seems to cause process crashes when this occurs while the
+	//application is exiting.
+}
+
+void AutoResetEvent::Wait(int timeoutDelay)
+{
+	std::unique_lock<std::mutex> lock(_mutex);
+	if(timeoutDelay == 0) {
+		//Wait until signaled
+		_signal.wait(lock, [this] { return _signaled; });
+	} else {
+		//Wait until signaled or timeout
+		auto timeoutTime = std::chrono::system_clock::now() + std::chrono::duration<int, std::milli>(timeoutDelay);
+		_signal.wait_until(lock, timeoutTime, [this] { return _signaled; });
+	}
+	_signaled = false;
+}
+
+void AutoResetEvent::Reset()
+{
+	std::unique_lock<std::mutex> lock(_mutex);
+	_signaled = false;
+}
+
+void AutoResetEvent::Signal()
+{
+	std::unique_lock<std::mutex> lock(_mutex);
+	_signaled = true;
+	_signal.notify_all();
+}
diff --git a/Utilities/AutoResetEvent.h b/Utilities/AutoResetEvent.h
new file mode 100644
index 0000000..b272f1d
--- /dev/null
+++ b/Utilities/AutoResetEvent.h
@@ -0,0 +1,21 @@
+#pragma once 
+#include "stdafx.h"
+
+#include <condition_variable>
+#include <mutex>
+
+class AutoResetEvent
+{
+private:
+	std::condition_variable _signal;
+	std::mutex _mutex;
+	bool _signaled;
+
+public:
+	AutoResetEvent();
+	~AutoResetEvent();
+
+	void Reset();
+	void Wait(int timeoutDelay = 0);
+	void Signal();
+};
diff --git a/Utilities/AviWriter.cpp b/Utilities/AviWriter.cpp
new file mode 100644
index 0000000..fdd63fc
--- /dev/null
+++ b/Utilities/AviWriter.cpp
@@ -0,0 +1,273 @@
+// This file is a part of Mesen
+// It is a heavily modified version of the hardware.h/cpp file found in DOSBox's code.
+
+/*
+*  Copyright (C) 2002-2011  The DOSBox Team
+*
+*  This program is free software; you can redistribute it and/or modify
+*  it under the terms of the GNU General Public License as published by
+*  the Free Software Foundation; either version 2 of the License, or
+*  (at your option) any later version.
+*
+*  This program is distributed in the hope that it will be useful,
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*  GNU General Public License for more details.
+*
+*  You should have received a copy of the GNU General Public License
+*  along with this program; if not, write to the Free Software
+*  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include "stdafx.h"
+#include <fstream>
+#include <cstring>
+#include "AviWriter.h"
+#include "BaseCodec.h"
+#include "RawCodec.h"
+#include "ZmbvCodec.h"
+#include "CamstudioCodec.h"
+
+void AviWriter::WriteAviChunk(const char *tag, uint32_t size, void *data, uint32_t flags)
+{
+	uint8_t chunk[8] = { (uint8_t)tag[0], (uint8_t)tag[1], (uint8_t)tag[2], (uint8_t)tag[3] };
+	host_writed(&chunk[4], size);
+	_file.write((char*)chunk, 8);
+	
+	uint32_t writesize = (size + 1)&~1;
+	_file.write((char*)data, writesize);
+	
+	uint32_t pos = _written + 4;
+	_written += writesize + 8;
+
+	_aviIndex.push_back(tag[0]);
+	_aviIndex.push_back(tag[1]);
+	_aviIndex.push_back(tag[2]);
+	_aviIndex.push_back(tag[3]);
+	_aviIndex.insert(_aviIndex.end(), 12, 0);
+	host_writed(_aviIndex.data() + _aviIndex.size() - 12, flags);
+	host_writed(_aviIndex.data() + _aviIndex.size() - 8, pos);
+	host_writed(_aviIndex.data() + _aviIndex.size() - 4, size);
+}
+
+void AviWriter::host_writew(uint8_t* buffer, uint16_t value)
+{
+	buffer[0] = value & 0xFF;
+	buffer[1] = value >> 8;
+}
+
+void AviWriter::host_writed(uint8_t* buffer, uint32_t value)
+{
+	buffer[0] = value;
+	buffer[1] = value >> 8;
+	buffer[2] = value >> 16;
+	buffer[3] = value >> 24;
+}
+
+bool AviWriter::StartWrite(string filename, VideoCodec codec, uint32_t width, uint32_t height, uint32_t bpp, uint32_t fps, uint32_t audioSampleRate, uint32_t compressionLevel)
+{
+	_codecType = codec;
+	_file.open(filename, std::ios::out | std::ios::binary);
+	if(!_file) {
+		return false;
+	}
+	
+	switch(_codecType) {
+		default:
+		case VideoCodec::None: _codec.reset(new RawCodec()); break;
+		case VideoCodec::ZMBV: _codec.reset(new ZmbvCodec()); break;
+		case VideoCodec::CSCD: _codec.reset(new CamstudioCodec()); break;
+	}
+
+	if(!_codec->SetupCompress(width, height, compressionLevel)) {
+		return false;
+	}
+
+	_frameBuffer = new uint8_t[width*height*bpp];
+
+	_aviIndex.clear();
+	_aviIndex.insert(_aviIndex.end(), 8, 0);
+
+	_width = width;
+	_height = height;
+	_bpp = bpp;
+	_fps = fps;
+
+	_audiorate = audioSampleRate;
+
+	for(int i = 0; i < AviWriter::AviHeaderSize; i++) {
+		_file.put(0);
+	}
+	_frames = 0;
+	_written = 0;
+	_audioPos = 0;
+	_audiowritten = 0;
+
+	return true;
+}
+
+void AviWriter::EndWrite()
+{
+	/* Close the video */
+	uint8_t avi_header[AviWriter::AviHeaderSize];
+	uint32_t main_list;
+	uint32_t header_pos = 0;
+#define AVIOUT4(_S_) memcpy(&avi_header[header_pos],_S_,4);header_pos+=4;
+#define AVIOUTw(_S_) host_writew(&avi_header[header_pos], _S_);header_pos+=2;
+#define AVIOUTd(_S_) host_writed(&avi_header[header_pos], _S_);header_pos+=4;
+	/* Try and write an avi header */
+	AVIOUT4("RIFF");                    // Riff header 
+	AVIOUTd(AviWriter::AviHeaderSize + _written - 8 + (uint32_t)_aviIndex.size());
+	AVIOUT4("AVI ");
+	AVIOUT4("LIST");                    // List header
+	main_list = header_pos;
+	AVIOUTd(0);				            // TODO size of list
+	AVIOUT4("hdrl");
+
+	AVIOUT4("avih");
+	AVIOUTd(56);                         /* # of bytes to follow */
+	AVIOUTd((uint32_t)(1000000 / _fps));       /* Microseconds per frame */
+	AVIOUTd(0);
+	AVIOUTd(0);                         /* PaddingGranularity (whatever that might be) */
+	AVIOUTd(0x110);                     /* Flags,0x10 has index, 0x100 interleaved */
+	AVIOUTd(_frames);      /* TotalFrames */
+	AVIOUTd(0);                         /* InitialFrames */
+	AVIOUTd(2);                         /* Stream count */
+	AVIOUTd(0);                         /* SuggestedBufferSize */
+	AVIOUTd(_width);       /* Width */
+	AVIOUTd(_height);      /* Height */
+	AVIOUTd(0);                         /* TimeScale:  Unit used to measure time */
+	AVIOUTd(0);                         /* DataRate:   Data rate of playback     */
+	AVIOUTd(0);                         /* StartTime:  Starting time of AVI data */
+	AVIOUTd(0);                         /* DataLength: Size of AVI data chunk    */
+
+													/* Video stream list */
+	AVIOUT4("LIST");
+	AVIOUTd(4 + 8 + 56 + 8 + 40);       /* Size of the list */
+	AVIOUT4("strl");
+	/* video stream header */
+	AVIOUT4("strh");
+	AVIOUTd(56);                        /* # of bytes to follow */
+	AVIOUT4("vids");                    /* Type */
+	AVIOUT4(_codec->GetFourCC());		            /* Handler */
+	AVIOUTd(0);                         /* Flags */
+	AVIOUTd(0);                         /* Reserved, MS says: wPriority, wLanguage */
+	AVIOUTd(0);                         /* InitialFrames */
+	AVIOUTd(1000000);                   /* Scale */
+	AVIOUTd(_fps);              /* Rate: Rate/Scale == samples/second */
+	AVIOUTd(0);                         /* Start */
+	AVIOUTd(_frames);      /* Length */
+	AVIOUTd(0);                  /* SuggestedBufferSize */
+	AVIOUTd(~0);                 /* Quality */
+	AVIOUTd(0);                  /* SampleSize */
+	AVIOUTd(0);                  /* Frame */
+	AVIOUTd(0);                  /* Frame */
+											/* The video stream format */
+	AVIOUT4("strf");
+	AVIOUTd(40);                 /* # of bytes to follow */
+	AVIOUTd(40);                 /* Size */
+	AVIOUTd(_width);         /* Width */
+	AVIOUTd(_height);        /* Height */
+														//		OUTSHRT(1); OUTSHRT(24);     /* Planes, Count */
+	AVIOUTw(1);  //number of planes
+	AVIOUTw(24); //bits for colors
+	AVIOUT4(_codec->GetFourCC());          /* Compression */
+	AVIOUTd(_width * _height * 4);  /* SizeImage (in bytes?) */
+	AVIOUTd(0);                  /* XPelsPerMeter */
+	AVIOUTd(0);                  /* YPelsPerMeter */
+	AVIOUTd(0);                  /* ClrUsed: Number of colors used */
+	AVIOUTd(0);                  /* ClrImportant: Number of colors important */
+
+											/* Audio stream list */
+	AVIOUT4("LIST");
+	AVIOUTd(4 + 8 + 56 + 8 + 16);  /* Length of list in bytes */
+	AVIOUT4("strl");
+	/* The audio stream header */
+	AVIOUT4("strh");
+	AVIOUTd(56);            /* # of bytes to follow */
+	AVIOUT4("auds");
+	AVIOUTd(0);             /* Format (Optionally) */
+	AVIOUTd(0);             /* Flags */
+	AVIOUTd(0);             /* Reserved, MS says: wPriority, wLanguage */
+	AVIOUTd(0);             /* InitialFrames */
+	AVIOUTd(4);    /* Scale */
+	AVIOUTd(_audiorate * 4);             /* Rate, actual rate is scale/rate */
+	AVIOUTd(0);             /* Start */
+	if(!_audiorate)
+		_audiorate = 1;
+	AVIOUTd(_audiowritten / 4);   /* Length */
+	AVIOUTd(0);             /* SuggestedBufferSize */
+	AVIOUTd(~0);            /* Quality */
+	AVIOUTd(4);				/* SampleSize */
+	AVIOUTd(0);             /* Frame */
+	AVIOUTd(0);             /* Frame */
+									/* The audio stream format */
+	AVIOUT4("strf");
+	AVIOUTd(16);            /* # of bytes to follow */
+	AVIOUTw(1);             /* Format, WAVE_ZMBV_FORMAT_PCM */
+	AVIOUTw(2);             /* Number of channels */
+	AVIOUTd(_audiorate);          /* SamplesPerSec */
+	AVIOUTd(_audiorate * 4);        /* AvgBytesPerSec*/
+	AVIOUTw(4);             /* BlockAlign */
+	AVIOUTw(16);            /* BitsPerSample */
+	int nmain = header_pos - main_list - 4;
+	/* Finish stream list, i.e. put number of bytes in the list to proper pos */
+
+	int njunk = AviWriter::AviHeaderSize - 8 - 12 - header_pos;
+	AVIOUT4("JUNK");
+	AVIOUTd(njunk);
+	/* Fix the size of the main list */
+	header_pos = main_list;
+	AVIOUTd(nmain);
+	header_pos = AviWriter::AviHeaderSize - 12;
+	AVIOUT4("LIST");
+	AVIOUTd(_written + 4); /* Length of list in bytes */
+	AVIOUT4("movi");
+	/* First add the index table to the end */
+	memcpy(_aviIndex.data(), "idx1", 4);
+	host_writed(_aviIndex.data() + 4, (uint32_t)_aviIndex.size() - 8);
+	
+	_file.write((char*)_aviIndex.data(), _aviIndex.size());
+	_file.seekp(std::ios::beg);
+	_file.write((char*)avi_header, AviWriter::AviHeaderSize);
+	_file.close();
+}
+
+void AviWriter::AddFrame(uint8_t *frameData)
+{
+	if(!_file) {
+		return;
+	}
+
+	bool isKeyFrame = (_frames % 120 == 0) ? 1 : 0;
+
+	uint8_t* compressedData = nullptr;
+	int written = _codec->CompressFrame(isKeyFrame, frameData, &compressedData);
+	if(written < 0) {
+		return;
+	}
+
+	if(_codecType == VideoCodec::None) {
+		isKeyFrame = true;
+	}
+	WriteAviChunk(_codecType == VideoCodec::None ? "00db" : "00dc", written, compressedData, isKeyFrame ? 0x10 : 0);
+	_frames++;
+
+	if(_audioPos) {
+		auto lock = _audioLock.AcquireSafe();
+		WriteAviChunk("01wb", _audioPos, _audiobuf, 0);
+		_audiowritten += _audioPos;
+		_audioPos = 0;
+	}
+}
+
+void AviWriter::AddSound(int16_t *data, uint32_t sampleCount)
+{
+	if(!_file) {
+		return;
+	}
+
+	auto lock = _audioLock.AcquireSafe();
+	memcpy(_audiobuf+_audioPos/2, data, sampleCount * 4);
+	_audioPos += sampleCount * 4;
+}
\ No newline at end of file
diff --git a/Utilities/AviWriter.h b/Utilities/AviWriter.h
new file mode 100644
index 0000000..3941c9c
--- /dev/null
+++ b/Utilities/AviWriter.h
@@ -0,0 +1,56 @@
+// This file is a part of Mesen
+// It is a heavily modified version of the hardware.h/cpp file found in DOSBox's code.
+
+#pragma once
+#include "stdafx.h"
+#include "SimpleLock.h"
+#include "BaseCodec.h"
+
+enum class VideoCodec
+{
+	None = 0,
+	ZMBV = 1,
+	CSCD = 2,
+};
+
+class AviWriter
+{
+private:
+	static constexpr int WaveBufferSize = 16 * 1024;
+	static constexpr int AviHeaderSize = 500;
+
+	std::unique_ptr<BaseCodec> _codec;
+	ofstream _file;
+
+	VideoCodec _codecType;
+
+	int16_t _audiobuf[WaveBufferSize];
+	uint32_t _audioPos = 0;
+	uint32_t _audiorate = 0;
+	uint32_t _audiowritten = 0;
+
+	uint32_t _frames = 0;
+	uint32_t _width = 0;
+	uint32_t _height = 0;
+	uint32_t _bpp = 0;
+	uint32_t _written = 0;
+	uint32_t _fps = 0;
+
+	uint8_t* _frameBuffer = nullptr;
+
+	vector<uint8_t> _aviIndex;
+	
+	SimpleLock _audioLock;
+
+private:
+	void host_writew(uint8_t* buffer, uint16_t value);
+	void host_writed(uint8_t* buffer, uint32_t value);
+	void WriteAviChunk(const char * tag, uint32_t size, void * data, uint32_t flags);
+
+public:
+	void AddFrame(uint8_t* frameData);
+	void AddSound(int16_t * data, uint32_t sampleCount);
+
+	bool StartWrite(string filename, VideoCodec codec, uint32_t width, uint32_t height, uint32_t bpp, uint32_t fps, uint32_t audioSampleRate, uint32_t compressionLevel);
+	void EndWrite();
+};
\ No newline at end of file
diff --git a/Utilities/Base64.h b/Utilities/Base64.h
new file mode 100644
index 0000000..eb6c6ec
--- /dev/null
+++ b/Utilities/Base64.h
@@ -0,0 +1,44 @@
+#pragma once
+#include "stdafx.h"
+
+class Base64
+{
+public:
+	static string Encode(const vector<uint8_t> data)
+	{
+		std::string out;
+
+		int val = 0, valb = -6;
+		for(uint8_t c : data) {
+			val = (val << 8) + c;
+			valb += 8;
+			while(valb >= 0) {
+				out.push_back("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(val >> valb) & 0x3F]);
+				valb -= 6;
+			}
+		}
+		if(valb>-6) out.push_back("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[((val << 8) >> (valb + 8)) & 0x3F]);
+		while(out.size() % 4) out.push_back('=');
+		return out;
+	}
+
+	static vector<uint8_t> Decode(string in)
+	{
+		vector<uint8_t> out;
+
+		vector<int> T(256, -1);
+		for(int i = 0; i < 64; i++) T["ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[i]] = i;
+
+		int val = 0, valb = -8;
+		for(uint8_t c : in) {
+			if(T[c] == -1) break;
+			val = (val << 6) + T[c];
+			valb += 6;
+			if(valb >= 0) {
+				out.push_back(val >> valb);
+				valb -= 8;
+			}
+		}
+		return out;
+	}
+};
diff --git a/Utilities/BaseCodec.h b/Utilities/BaseCodec.h
new file mode 100644
index 0000000..6133378
--- /dev/null
+++ b/Utilities/BaseCodec.h
@@ -0,0 +1,12 @@
+#pragma once
+#include "stdafx.h"
+
+class BaseCodec
+{
+public:
+	virtual bool SetupCompress(int width, int height, uint32_t compressionLevel) = 0;
+	virtual int CompressFrame(bool isKeyFrame, uint8_t *frameData, uint8_t** compressedData) = 0;
+	virtual const char* GetFourCC() = 0;
+
+	virtual ~BaseCodec() { }
+};
\ No newline at end of file
diff --git a/Utilities/BpsPatcher.cpp b/Utilities/BpsPatcher.cpp
new file mode 100644
index 0000000..f958f19
--- /dev/null
+++ b/Utilities/BpsPatcher.cpp
@@ -0,0 +1,128 @@
+#include "stdafx.h"
+#include <assert.h>
+#include <cstring>
+#include "BpsPatcher.h"
+#include "CRC32.h"
+
+int64_t BpsPatcher::ReadBase128Number(std::istream &file)
+{
+	int64_t result = 0;
+	int shift = 0;
+	uint8_t buffer;
+	while(true) {
+		file.read((char*)&buffer, 1);
+		if(file.eof()) {
+			return -1;
+		}
+		result += (buffer & 0x7F) << shift;
+		shift += 7;
+		if(buffer & 0x80) {
+			break;
+		}
+		result += (int64_t)1 << shift;
+	}
+
+	return result;
+}
+
+bool BpsPatcher::PatchBuffer(string bpsFilepath, vector<uint8_t> &input, vector<uint8_t> &output)
+{
+	ifstream bpsFile(bpsFilepath, std::ios::in | std::ios::binary);
+	if(bpsFile) {
+		return PatchBuffer(bpsFile, input, output);
+	}
+	return false;
+}
+
+bool BpsPatcher::PatchBuffer(std::istream &bpsFile, vector<uint8_t> &input, vector<uint8_t> &output)
+{
+	bpsFile.seekg(0, std::ios::end);
+	size_t fileSize = (size_t)bpsFile.tellg();
+	bpsFile.seekg(0, std::ios::beg);
+
+	char header[4];
+	bpsFile.read((char*)&header, 4);
+	if(memcmp((char*)&header, "BPS1", 4) != 0) {
+		//Invalid BPS file
+		return false;
+	}
+
+	int64_t inputFileSize = ReadBase128Number(bpsFile);
+	int64_t outputFileSize = ReadBase128Number(bpsFile);
+	if(inputFileSize == -1 || outputFileSize == -1) {
+		//Invalid file
+		return false;
+	}
+
+	int64_t metadataSize = ReadBase128Number(bpsFile);
+	bpsFile.seekg(metadataSize, std::ios::cur);
+
+	output.resize((size_t)outputFileSize);
+
+	uint32_t outputOffset = 0;
+	uint32_t inputRelativeOffset = 0;
+	uint32_t outputRelativeOffset = 0;
+	while((size_t)bpsFile.tellg() < fileSize - 12) {
+		int64_t data = ReadBase128Number(bpsFile);
+		if(data == -1) {
+			//Invalid file
+			return false;
+		}
+
+		uint8_t command = data & 0x03;
+		uint64_t length = (data >> 2) + 1;
+		switch(command) {
+			case 0:
+				//SourceRead
+				while(length--) {
+					output[outputOffset] = input[outputOffset];
+					outputOffset++;
+				}
+				break;
+
+			case 1:
+				//TargetRead
+				while(length--) {
+					uint8_t value = 0;
+					bpsFile.read((char*)&value, 1);
+
+					output[outputOffset++] = value;
+				}
+				break;
+
+			case 2: {
+				//SourceCopy
+				int32_t data = (int32_t)ReadBase128Number(bpsFile);
+				inputRelativeOffset += (data & 1 ? -1 : +1) * (data >> 1);
+				while(length--) {
+					output[outputOffset++] = input[inputRelativeOffset++];
+				}
+				break;
+			}
+
+			case 3: {
+				//TargetCopy
+				int32_t data = (int32_t)ReadBase128Number(bpsFile);
+				outputRelativeOffset += (data & 1 ? -1 : +1) * (data >> 1);
+				while(length--) {
+					output[outputOffset++] = output[outputRelativeOffset++];
+				}
+				break;
+			}
+		}			
+	}
+
+	uint8_t inputChecksum[4];
+	uint8_t outputChecksum[4];
+	bpsFile.read((char*)inputChecksum, 4);
+	bpsFile.read((char*)outputChecksum, 4);
+	uint32_t patchInputCrc = inputChecksum[0] | (inputChecksum[1] << 8) | (inputChecksum[2] << 16) | (inputChecksum[3] << 24);
+	uint32_t patchOutputCrc = outputChecksum[0] | (outputChecksum[1] << 8) | (outputChecksum[2] << 16) | (outputChecksum[3] << 24);
+	uint32_t inputCrc = CRC32::GetCRC(input.data(), input.size());
+	uint32_t outputCrc = CRC32::GetCRC(output.data(), output.size());
+
+	if(patchInputCrc != inputCrc || patchOutputCrc != outputCrc) {
+		return false;
+	}
+	return true;
+}
diff --git a/Utilities/BpsPatcher.h b/Utilities/BpsPatcher.h
new file mode 100644
index 0000000..f10bb13
--- /dev/null
+++ b/Utilities/BpsPatcher.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include "stdafx.h"
+
+class BpsPatcher
+{
+private:
+	static int64_t ReadBase128Number(std::istream &file);
+
+public:
+	static bool PatchBuffer(std::istream &bpsFile, vector<uint8_t> &input, vector<uint8_t> &output);
+	static bool PatchBuffer(string bpsFilepath, vector<uint8_t> &input, vector<uint8_t> &output);
+};
\ No newline at end of file
diff --git a/Utilities/CRC32.cpp b/Utilities/CRC32.cpp
new file mode 100644
index 0000000..89aab9b
--- /dev/null
+++ b/Utilities/CRC32.cpp
@@ -0,0 +1,40 @@
+#include "stdafx.h"
+
+#include "CRC32.h"
+
+void CRC32::AddData(const uint8_t* pData, const std::streamoff length)
+{
+	uint8_t* pCur = (uint8_t*)pData;
+	for(std::streamoff remaining = length; remaining--; ++pCur) {
+		_crc = (_crc >> 8) ^ kCrc32Table[(_crc ^ *pCur) & 0xff];
+	}
+}
+
+uint32_t CRC32::GetCRC(uint8_t *buffer, std::streamoff length)
+{
+	CRC32 crc;
+	crc.AddData(buffer, length);
+	return ~(crc._crc);
+}
+
+uint32_t CRC32::GetCRC(string filename)
+{
+	uint32_t crc = 0;
+
+	ifstream file(filename, std::ios::in | std::ios::binary);
+
+	if(file) {
+		file.seekg(0, std::ios::end);
+		std::streamoff fileSize = file.tellg();
+		file.seekg(0, std::ios::beg);
+		uint8_t* buffer = new uint8_t[(uint32_t)fileSize];
+
+		file.read((char*)buffer, fileSize);
+		file.close();
+
+		crc = GetCRC(buffer, fileSize);
+
+		delete[] buffer;
+	}
+	return ~crc;
+}
\ No newline at end of file
diff --git a/Utilities/CRC32.h b/Utilities/CRC32.h
new file mode 100644
index 0000000..836f1a4
--- /dev/null
+++ b/Utilities/CRC32.h
@@ -0,0 +1,83 @@
+//From: http://tdistler.com/2011/06/22/crc32-a-simple-c-class
+//"You are free to use and adapt this code however you like�c that goes for anyone."
+#pragma once
+#include "stdafx.h"
+
+static const uint32_t kCrc32Table[256] = {
+    0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
+    0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
+    0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+    0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
+    0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+    0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+    0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
+    0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
+    0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+    0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+    0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
+    0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+    0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
+    0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
+    0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+    0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
+    0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
+    0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+    0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
+    0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+    0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+    0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
+    0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
+    0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+    0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+    0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
+    0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+    0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
+    0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
+    0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+    0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
+    0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
+    0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+    0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
+    0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+    0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+    0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
+    0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
+    0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+    0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+    0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
+    0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+    0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
+    0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
+    0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+    0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
+    0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
+    0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+    0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
+    0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+    0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+    0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
+    0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
+    0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+    0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+    0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
+    0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+    0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
+    0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
+    0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+    0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
+    0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
+    0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+    0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,
+}; // kCrc32Table
+
+class CRC32
+{
+private:
+	uint32_t _crc = 0xFFFFFFFF;
+
+	void AddData(const uint8_t* pData, const std::streamoff length);
+
+public:
+	static uint32_t GetCRC(uint8_t *buffer, std::streamoff length);
+	static uint32_t GetCRC(string filename);
+};
\ No newline at end of file
diff --git a/Utilities/CamstudioCodec.cpp b/Utilities/CamstudioCodec.cpp
new file mode 100644
index 0000000..d8fba5b
--- /dev/null
+++ b/Utilities/CamstudioCodec.cpp
@@ -0,0 +1,110 @@
+//This is based on the code in lsnes' cscd.cpp file
+//A few modifications were done to improve compression speed
+#include "stdafx.h"
+#include <cstring>
+#include "CamstudioCodec.h"
+#include "miniz.h"
+
+CamstudioCodec::~CamstudioCodec()
+{
+	if(_prevFrame) {
+		delete[] _prevFrame;
+		_prevFrame = nullptr;
+	}
+
+	if(_currentFrame) {
+		delete[] _currentFrame;
+		_currentFrame = nullptr;
+	}
+
+	if(_buffer) {
+		delete[] _buffer;
+		_buffer = nullptr;
+	}
+
+	if(_compressBuffer) {
+		delete[] _compressBuffer;
+		_compressBuffer = nullptr;
+	}
+
+	deflateEnd(&_compressor);
+}
+
+bool CamstudioCodec::SetupCompress(int width, int height, uint32_t compressionLevel)
+{
+	_compressionLevel = compressionLevel;
+	_orgWidth = width;
+
+	if(width % 4 != 0) {
+		_rowStride = ((int)((width * 24 + 31) / 32 * 4));
+	} else {
+		_rowStride = width*3;
+	}
+	_height = height;
+
+	_prevFrame = new uint8_t[_rowStride*_height]; //24-bit RGB
+	_currentFrame = new uint8_t[_rowStride*_height]; //24-bit RGB
+	_buffer = new uint8_t[_rowStride*_height]; //24-bit RGB
+
+	_compressBufferLength = compressBound(_rowStride*_height) + 2;
+	_compressBuffer = new uint8_t[_compressBufferLength];
+	
+	memset(_prevFrame, 0, _rowStride * _height);
+	memset(_currentFrame, 0, _rowStride * _height);
+	memset(_buffer, 0, _rowStride * _height);
+	memset(_compressBuffer, 0, _compressBufferLength);
+	
+	deflateInit(&_compressor, compressionLevel);
+
+	return true;
+}
+
+void CamstudioCodec::LoadRow(uint8_t* inPointer, uint8_t* outPointer)
+{
+	for(int x = 0; x < _orgWidth; x++) {
+		outPointer[0] = inPointer[0];
+		outPointer[1] = inPointer[1];
+		outPointer[2] = inPointer[2];
+		outPointer += 3;
+		inPointer += 4;
+	}
+}
+
+int CamstudioCodec::CompressFrame(bool isKeyFrame, uint8_t *frameData, uint8_t** compressedData)
+{
+	deflateReset(&_compressor);
+
+	_compressor.next_out = _compressBuffer + 2;
+	_compressor.avail_out = _compressBufferLength - 2;
+
+	_compressBuffer[0] = (isKeyFrame ? 0x03 : 0x02) | (_compressionLevel << 4);
+	_compressBuffer[1] = 8; //8-bit per color
+
+	uint8_t* rowBuffer = _currentFrame;
+	for(int y = 0; y < _height; y++) {
+		LoadRow(frameData + (_height - y - 1) * _orgWidth * 4, rowBuffer);
+		rowBuffer += _rowStride;
+	}
+
+	if(isKeyFrame) {
+		_compressor.next_in = _currentFrame;
+	} else {
+		for(int i = 0, len = _rowStride * _height; i < len; i++) {
+			_buffer[i] = _currentFrame[i] - _prevFrame[i];
+		}
+		_compressor.next_in = _buffer;
+	}
+
+	memcpy(_prevFrame, _currentFrame, _rowStride*_height);
+	
+	_compressor.avail_in = _height * _rowStride;
+	deflate(&_compressor, MZ_FINISH);
+	
+	*compressedData = _compressBuffer;
+	return _compressor.total_out + 2;
+}
+
+const char* CamstudioCodec::GetFourCC()
+{
+	return "CSCD";
+}
\ No newline at end of file
diff --git a/Utilities/CamstudioCodec.h b/Utilities/CamstudioCodec.h
new file mode 100644
index 0000000..4e5eede
--- /dev/null
+++ b/Utilities/CamstudioCodec.h
@@ -0,0 +1,30 @@
+#pragma once
+#include "stdafx.h"
+#include "BaseCodec.h"
+#include "miniz.h"
+
+class CamstudioCodec : public BaseCodec
+{
+private:
+	uint8_t* _prevFrame = nullptr;
+	uint8_t* _currentFrame = nullptr;
+	uint8_t* _buffer = nullptr;
+
+	uint32_t _compressBufferLength = 0;
+	uint8_t* _compressBuffer = nullptr;
+	z_stream _compressor = {};
+	int _compressionLevel = 0;
+
+	int _orgWidth = 0;
+	int _rowStride = 0;
+	int _height = 0;
+
+	void LoadRow(uint8_t* inPointer, uint8_t* outPointer);
+
+public:
+	virtual ~CamstudioCodec();
+
+	virtual bool SetupCompress(int width, int height, uint32_t compressionLevel) override;
+	virtual int CompressFrame(bool isKeyFrame, uint8_t *frameData, uint8_t** compressedData) override;
+	virtual const char* GetFourCC() override;
+};
\ No newline at end of file
diff --git a/Utilities/FastString.h b/Utilities/FastString.h
new file mode 100644
index 0000000..26a3e7a
--- /dev/null
+++ b/Utilities/FastString.h
@@ -0,0 +1,62 @@
+#pragma once
+#include "stdafx.h"
+
+class FastString
+{
+private:
+	char _buffer[1000];
+	uint16_t _pos = 0;
+
+	void Write() {}
+
+public:
+	FastString() {}
+	FastString(const char* str, uint16_t size) { Write(str, size); }
+	FastString(string &str) { Write(str); }
+
+	void Write(char c)
+	{
+		_buffer[_pos++] = c;
+	}
+
+	void Write(const char* str, uint16_t size)
+	{
+		memcpy(_buffer + _pos, str, size);
+		_pos += size;
+	}
+
+	void Write(const char* str)
+	{
+		Write(str, (uint16_t)strlen(str));
+	}
+
+	void Write(string &str)
+	{
+		memcpy(_buffer + _pos, str.c_str(), str.size());
+		_pos += (uint16_t)str.size();
+	}
+
+	void Write(FastString &str)
+	{
+		memcpy(_buffer + _pos, str._buffer, str._pos);
+		_pos += str._pos;
+	}
+
+	const char* ToString()
+	{
+		_buffer[_pos] = 0;
+		return _buffer;
+	}
+
+	template<typename T, typename... Args>
+	void Write(T first, Args... args)
+	{
+		Write(first);
+		Write(args...);
+	}
+
+	const char operator[](int idx)
+	{
+		return _buffer[idx];
+	}
+};
diff --git a/Utilities/FolderUtilities.cpp b/Utilities/FolderUtilities.cpp
new file mode 100644
index 0000000..bb4d4f3
--- /dev/null
+++ b/Utilities/FolderUtilities.cpp
@@ -0,0 +1,269 @@
+#include "stdafx.h"
+
+//TODO: Use non-experimental namespace (once it is officially supported by VC & GCC)
+#ifndef LIBRETRO
+#include <experimental/filesystem>
+namespace fs = std::experimental::filesystem;
+#endif
+
+#include <unordered_set>
+#include <algorithm>
+#include "FolderUtilities.h"
+#include "UTF8Util.h"
+
+string FolderUtilities::_homeFolder = "";
+string FolderUtilities::_saveFolderOverride = "";
+string FolderUtilities::_saveStateFolderOverride = "";
+string FolderUtilities::_screenshotFolderOverride = "";
+vector<string> FolderUtilities::_gameFolders = vector<string>();
+
+void FolderUtilities::SetHomeFolder(string homeFolder)
+{
+	_homeFolder = homeFolder;
+	CreateFolder(homeFolder);
+}
+
+string FolderUtilities::GetHomeFolder()
+{
+	if(_homeFolder.size() == 0) {
+		throw std::runtime_error("Home folder not specified");
+	}
+	return _homeFolder;
+}
+
+void FolderUtilities::AddKnownGameFolder(string gameFolder)
+{
+	bool alreadyExists = false;
+	string lowerCaseFolder = gameFolder;
+	std::transform(lowerCaseFolder.begin(), lowerCaseFolder.end(), lowerCaseFolder.begin(), ::tolower);
+
+	for(string folder : _gameFolders) {
+		std::transform(folder.begin(), folder.end(), folder.begin(), ::tolower);
+		if(folder.compare(lowerCaseFolder) == 0) {
+			alreadyExists = true;
+			break;
+		}
+	}
+
+	if(!alreadyExists) {
+		_gameFolders.push_back(gameFolder);
+	}
+}
+
+vector<string> FolderUtilities::GetKnownGameFolders()
+{
+	return _gameFolders;
+}
+
+void FolderUtilities::SetFolderOverrides(string saveFolder, string saveStateFolder, string screenshotFolder)
+{
+	_saveFolderOverride = saveFolder;
+	_saveStateFolderOverride = saveStateFolder;
+	_screenshotFolderOverride = screenshotFolder;
+}
+
+string FolderUtilities::GetSaveFolder()
+{
+	string folder;
+	if(_saveFolderOverride.empty()) {
+		folder = CombinePath(GetHomeFolder(), "Saves");
+	} else {
+		folder = _saveFolderOverride;
+	}
+	CreateFolder(folder);
+	return folder;
+}
+
+string FolderUtilities::GetHdPackFolder()
+{
+	string folder = CombinePath(GetHomeFolder(), "HdPacks");
+	CreateFolder(folder);
+	return folder;
+}
+
+string FolderUtilities::GetDebuggerFolder()
+{
+	string folder = CombinePath(GetHomeFolder(), "Debugger");
+	CreateFolder(folder);
+	return folder;
+}
+
+string FolderUtilities::GetSaveStateFolder()
+{
+	string folder;
+	if(_saveStateFolderOverride.empty()) {
+		folder = CombinePath(GetHomeFolder(), "SaveStates");
+	} else {
+		folder = _saveStateFolderOverride;
+	}
+	CreateFolder(folder);
+	return folder;
+}
+
+string FolderUtilities::GetScreenshotFolder()
+{
+	string folder;
+	if(_screenshotFolderOverride.empty()) {
+		folder = CombinePath(GetHomeFolder(), "Screenshots");
+	} else {
+		folder = _screenshotFolderOverride;
+	}
+	CreateFolder(folder);
+	return folder;
+}
+
+string FolderUtilities::GetRecentGamesFolder()
+{
+	string folder = CombinePath(GetHomeFolder(), "RecentGames");
+	CreateFolder(folder);
+	return folder;
+}
+
+#ifndef LIBRETRO
+void FolderUtilities::CreateFolder(string folder)
+{
+	std::error_code errorCode;
+	fs::create_directory(fs::u8path(folder), errorCode);
+}
+
+vector<string> FolderUtilities::GetFolders(string rootFolder)
+{
+	vector<string> folders;
+
+	std::error_code errorCode;
+	if(!fs::is_directory(fs::u8path(rootFolder), errorCode)) {
+		return folders;
+	} 
+
+	for(fs::recursive_directory_iterator i(fs::u8path(rootFolder)), end; i != end; i++) {
+		if(i.depth() > 1) {
+			//Prevent excessive recursion
+			i.disable_recursion_pending();
+		} else {
+			if(fs::is_directory(i->path(), errorCode)) {
+				folders.push_back(i->path().u8string());
+			}
+		}
+	}
+
+	return folders;
+}
+
+vector<string> FolderUtilities::GetFilesInFolder(string rootFolder, std::unordered_set<string> extensions, bool recursive)
+{
+	vector<string> files;
+	vector<string> folders = { { rootFolder } };
+
+	std::error_code errorCode;
+	if(!fs::is_directory(fs::u8path(rootFolder), errorCode)) {
+		return files;
+	}
+
+	if(recursive) {
+		for(fs::recursive_directory_iterator i(fs::u8path(rootFolder)), end; i != end; i++) {
+			if(i.depth() > 1) {
+				//Prevent excessive recursion
+				i.disable_recursion_pending();
+			} else {
+				string extension = i->path().extension().u8string();
+				std::transform(extension.begin(), extension.end(), extension.begin(), ::tolower);
+				if(extensions.empty() || extensions.find(extension) != extensions.end()) {
+					files.push_back(i->path().u8string());
+				}
+			}
+		}
+	} else {
+		for(fs::directory_iterator i(fs::u8path(rootFolder)), end; i != end; i++) {
+			string extension = i->path().extension().u8string();
+			std::transform(extension.begin(), extension.end(), extension.begin(), ::tolower);
+			if(extensions.empty() || extensions.find(extension) != extensions.end()) {
+				files.push_back(i->path().u8string());
+			}
+		}
+	}
+
+	return files;
+}
+
+string FolderUtilities::GetFilename(string filepath, bool includeExtension)
+{
+	fs::path filename = fs::u8path(filepath).filename();
+	if(!includeExtension) {
+		filename.replace_extension("");
+	}
+	return filename.u8string();
+}
+
+string FolderUtilities::GetFolderName(string filepath)
+{
+	return fs::u8path(filepath).remove_filename().u8string();
+}
+
+string FolderUtilities::CombinePath(string folder, string filename)
+{
+	//Windows supports forward slashes for paths, too.  And fs::u8path is abnormally slow.
+	if(folder[folder.length() - 1] != '/') {
+		return folder + "/" + filename;
+	} else {
+		return folder + filename;
+	}
+}
+
+int64_t FolderUtilities::GetFileModificationTime(string filepath)
+{
+	std::error_code errorCode;
+	return fs::last_write_time(fs::u8path(filepath), errorCode).time_since_epoch() / std::chrono::seconds(1);
+}
+#else
+
+//Libretro: Avoid using filesystem API.
+
+#ifdef _WIN32
+static const char* PATHSEPARATOR = "\\";
+#else 
+static const char* PATHSEPARATOR = "/";
+#endif
+
+void FolderUtilities::CreateFolder(string folder)
+{
+}
+
+vector<string> FolderUtilities::GetFolders(string rootFolder)
+{
+	return vector<string>();
+}
+
+vector<string> FolderUtilities::GetFilesInFolder(string rootFolder, std::unordered_set<string> extensions, bool recursive)
+{
+	return vector<string>();
+}
+
+string FolderUtilities::GetFilename(string filepath, bool includeExtension)
+{
+	size_t index = filepath.find_last_of(PATHSEPARATOR);
+	string filename = (index == std::string::basic_string::npos) ? filepath : filepath.substr(index + 1);
+	if(!includeExtension) {
+		filename = filename.substr(0, filename.find_last_of("."));
+	}
+	return filename;
+}
+
+string FolderUtilities::GetFolderName(string filepath)
+{
+	size_t index = filepath.find_last_of(PATHSEPARATOR);
+	return filepath.substr(0, index);
+}
+
+string FolderUtilities::CombinePath(string folder, string filename)
+{
+	if(folder.find_last_of(PATHSEPARATOR) != folder.length() - 1) {
+		folder += PATHSEPARATOR;
+	}
+	return folder + filename;
+}
+
+int64_t FolderUtilities::GetFileModificationTime(string filepath)
+{
+	return 0;
+}
+#endif
\ No newline at end of file
diff --git a/Utilities/FolderUtilities.h b/Utilities/FolderUtilities.h
new file mode 100644
index 0000000..6d6d141
--- /dev/null
+++ b/Utilities/FolderUtilities.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include "stdafx.h"
+#include <unordered_set>
+
+class FolderUtilities
+{
+private:
+	static string _homeFolder;
+	static string _saveFolderOverride;
+	static string _saveStateFolderOverride;
+	static string _screenshotFolderOverride;
+	static vector<string> _gameFolders;
+
+public:
+	static void SetHomeFolder(string homeFolder);
+	static string GetHomeFolder();
+
+	static void SetFolderOverrides(string saveFolder, string saveStateFolder, string screenshotFolder);
+
+	static void AddKnownGameFolder(string gameFolder);
+	static vector<string> GetKnownGameFolders();
+
+	static string GetSaveFolder();
+	static string GetSaveStateFolder();
+	static string GetScreenshotFolder();
+	static string GetHdPackFolder();
+	static string GetDebuggerFolder();
+	static string GetRecentGamesFolder();
+
+	static vector<string> GetFolders(string rootFolder);
+	static vector<string> GetFilesInFolder(string rootFolder, std::unordered_set<string> extensions, bool recursive);
+
+	static string GetFilename(string filepath, bool includeExtension);
+	static string GetFolderName(string filepath);
+
+	static void CreateFolder(string folder);
+
+	static int64_t GetFileModificationTime(string filepath);
+
+	static string CombinePath(string folder, string filename);
+};
\ No newline at end of file
diff --git a/Utilities/HQX/common.h b/Utilities/HQX/common.h
new file mode 100644
index 0000000..dbd6c33
--- /dev/null
+++ b/Utilities/HQX/common.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2003 Maxim Stepin ( maxst@hiend3d.com )
+ *
+ * Copyright (C) 2010 Cameron Zemek ( grom@zeminvaders.net)
+ * Copyright (C) 2011 Francois Gannaz <mytskine@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __HQX_COMMON_H_
+#define __HQX_COMMON_H_
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <complex>
+
+#define MASK_2     0x0000FF00
+#define MASK_13    0x00FF00FF
+#define MASK_RGB   0x00FFFFFF
+#define MASK_ALPHA 0xFF000000
+
+#define Ymask 0x00FF0000
+#define Umask 0x0000FF00
+#define Vmask 0x000000FF
+#define trY   0x00300000
+#define trU   0x00000700
+#define trV   0x00000006
+
+/* RGB to YUV lookup table */
+extern uint32_t RGBtoYUV[16777216];
+
+static inline uint32_t rgb_to_yuv(uint32_t c)
+{
+    // Mask against MASK_RGB to discard the alpha channel
+    return RGBtoYUV[MASK_RGB & c];
+}
+
+/* Test if there is difference in color */
+static inline int yuv_diff(uint32_t yuv1, uint32_t yuv2) {
+    return (( std::abs((int)((yuv1 & Ymask) - (yuv2 & Ymask))) > trY ) ||
+            ( std::abs((int)((yuv1 & Umask) - (yuv2 & Umask))) > trU ) ||
+            ( std::abs((int)((yuv1 & Vmask) - (yuv2 & Vmask))) > trV ) );
+}
+
+static inline int Diff(uint32_t c1, uint32_t c2)
+{
+    return yuv_diff(rgb_to_yuv(c1), rgb_to_yuv(c2));
+}
+
+/* Interpolate functions */
+static inline uint32_t Interpolate_2(uint32_t c1, int w1, uint32_t c2, int w2, int s)
+{
+    if (c1 == c2) {
+        return c1;
+    }
+    return
+        (((((c1 & MASK_ALPHA) >> 24) * w1 + ((c2 & MASK_ALPHA) >> 24) * w2) << (24-s)) & MASK_ALPHA) +
+        ((((c1 & MASK_2) * w1 + (c2 & MASK_2) * w2) >> s) & MASK_2)	+
+        ((((c1 & MASK_13) * w1 + (c2 & MASK_13) * w2) >> s) & MASK_13);
+}
+
+static inline uint32_t Interpolate_3(uint32_t c1, int w1, uint32_t c2, int w2, uint32_t c3, int w3, int s)
+{
+    return
+        (((((c1 & MASK_ALPHA) >> 24) * w1 + ((c2 & MASK_ALPHA) >> 24) * w2 + ((c3 & MASK_ALPHA) >> 24) * w3) << (24-s)) & MASK_ALPHA) +
+        ((((c1 & MASK_2) * w1 + (c2 & MASK_2) * w2 + (c3 & MASK_2) * w3) >> s) & MASK_2) +
+        ((((c1 & MASK_13) * w1 + (c2 & MASK_13) * w2 + (c3 & MASK_13) * w3) >> s) & MASK_13);
+}
+
+static inline uint32_t Interp1(uint32_t c1, uint32_t c2)
+{
+    //(c1*3+c2) >> 2;
+    return Interpolate_2(c1, 3, c2, 1, 2);
+}
+
+static inline uint32_t Interp2(uint32_t c1, uint32_t c2, uint32_t c3)
+{
+    //(c1*2+c2+c3) >> 2;
+    return Interpolate_3(c1, 2, c2, 1, c3, 1, 2);
+}
+
+static inline uint32_t Interp3(uint32_t c1, uint32_t c2)
+{
+    //(c1*7+c2)/8;
+    return Interpolate_2(c1, 7, c2, 1, 3);
+}
+
+static inline uint32_t Interp4(uint32_t c1, uint32_t c2, uint32_t c3)
+{
+    //(c1*2+(c2+c3)*7)/16;
+    return Interpolate_3(c1, 2, c2, 7, c3, 7, 4);
+}
+
+static inline uint32_t Interp5(uint32_t c1, uint32_t c2)
+{
+    //(c1+c2) >> 1;
+    return Interpolate_2(c1, 1, c2, 1, 1);
+}
+
+static inline uint32_t Interp6(uint32_t c1, uint32_t c2, uint32_t c3)
+{
+    //(c1*5+c2*2+c3)/8;
+    return Interpolate_3(c1, 5, c2, 2, c3, 1, 3);
+}
+
+static inline uint32_t Interp7(uint32_t c1, uint32_t c2, uint32_t c3)
+{
+    //(c1*6+c2+c3)/8;
+    return Interpolate_3(c1, 6, c2, 1, c3, 1, 3);
+}
+
+static inline uint32_t Interp8(uint32_t c1, uint32_t c2)
+{
+    //(c1*5+c2*3)/8;
+    return Interpolate_2(c1, 5, c2, 3, 3);
+}
+
+static inline uint32_t Interp9(uint32_t c1, uint32_t c2, uint32_t c3)
+{
+    //(c1*2+(c2+c3)*3)/8;
+    return Interpolate_3(c1, 2, c2, 3, c3, 3, 3);
+}
+
+static inline uint32_t Interp10(uint32_t c1, uint32_t c2, uint32_t c3)
+{
+    //(c1*14+c2+c3)/16;
+    return Interpolate_3(c1, 14, c2, 1, c3, 1, 4);
+}
+
+#endif
diff --git a/Utilities/HQX/hq2x.cpp b/Utilities/HQX/hq2x.cpp
new file mode 100644
index 0000000..0aa7fff
--- /dev/null
+++ b/Utilities/HQX/hq2x.cpp
@@ -0,0 +1,2810 @@
+/*
+ * Copyright (C) 2003 Maxim Stepin ( maxst@hiend3d.com )
+ *
+ * Copyright (C) 2010 Cameron Zemek ( grom@zeminvaders.net)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "../stdafx.h"
+#include <stdint.h>
+#include "common.h"
+#include "hqx.h"
+
+#define PIXEL00_0     *dp = w[5];
+#define PIXEL00_10    *dp = Interp1(w[5], w[1]);
+#define PIXEL00_11    *dp = Interp1(w[5], w[4]);
+#define PIXEL00_12    *dp = Interp1(w[5], w[2]);
+#define PIXEL00_20    *dp = Interp2(w[5], w[4], w[2]);
+#define PIXEL00_21    *dp = Interp2(w[5], w[1], w[2]);
+#define PIXEL00_22    *dp = Interp2(w[5], w[1], w[4]);
+#define PIXEL00_60    *dp = Interp6(w[5], w[2], w[4]);
+#define PIXEL00_61    *dp = Interp6(w[5], w[4], w[2]);
+#define PIXEL00_70    *dp = Interp7(w[5], w[4], w[2]);
+#define PIXEL00_90    *dp = Interp9(w[5], w[4], w[2]);
+#define PIXEL00_100   *dp = Interp10(w[5], w[4], w[2]);
+#define PIXEL01_0     *(dp+1) = w[5];
+#define PIXEL01_10    *(dp+1) = Interp1(w[5], w[3]);
+#define PIXEL01_11    *(dp+1) = Interp1(w[5], w[2]);
+#define PIXEL01_12    *(dp+1) = Interp1(w[5], w[6]);
+#define PIXEL01_20    *(dp+1) = Interp2(w[5], w[2], w[6]);
+#define PIXEL01_21    *(dp+1) = Interp2(w[5], w[3], w[6]);
+#define PIXEL01_22    *(dp+1) = Interp2(w[5], w[3], w[2]);
+#define PIXEL01_60    *(dp+1) = Interp6(w[5], w[6], w[2]);
+#define PIXEL01_61    *(dp+1) = Interp6(w[5], w[2], w[6]);
+#define PIXEL01_70    *(dp+1) = Interp7(w[5], w[2], w[6]);
+#define PIXEL01_90    *(dp+1) = Interp9(w[5], w[2], w[6]);
+#define PIXEL01_100   *(dp+1) = Interp10(w[5], w[2], w[6]);
+#define PIXEL10_0     *(dp+dpL) = w[5];
+#define PIXEL10_10    *(dp+dpL) = Interp1(w[5], w[7]);
+#define PIXEL10_11    *(dp+dpL) = Interp1(w[5], w[8]);
+#define PIXEL10_12    *(dp+dpL) = Interp1(w[5], w[4]);
+#define PIXEL10_20    *(dp+dpL) = Interp2(w[5], w[8], w[4]);
+#define PIXEL10_21    *(dp+dpL) = Interp2(w[5], w[7], w[4]);
+#define PIXEL10_22    *(dp+dpL) = Interp2(w[5], w[7], w[8]);
+#define PIXEL10_60    *(dp+dpL) = Interp6(w[5], w[4], w[8]);
+#define PIXEL10_61    *(dp+dpL) = Interp6(w[5], w[8], w[4]);
+#define PIXEL10_70    *(dp+dpL) = Interp7(w[5], w[8], w[4]);
+#define PIXEL10_90    *(dp+dpL) = Interp9(w[5], w[8], w[4]);
+#define PIXEL10_100   *(dp+dpL) = Interp10(w[5], w[8], w[4]);
+#define PIXEL11_0     *(dp+dpL+1) = w[5];
+#define PIXEL11_10    *(dp+dpL+1) = Interp1(w[5], w[9]);
+#define PIXEL11_11    *(dp+dpL+1) = Interp1(w[5], w[6]);
+#define PIXEL11_12    *(dp+dpL+1) = Interp1(w[5], w[8]);
+#define PIXEL11_20    *(dp+dpL+1) = Interp2(w[5], w[6], w[8]);
+#define PIXEL11_21    *(dp+dpL+1) = Interp2(w[5], w[9], w[8]);
+#define PIXEL11_22    *(dp+dpL+1) = Interp2(w[5], w[9], w[6]);
+#define PIXEL11_60    *(dp+dpL+1) = Interp6(w[5], w[8], w[6]);
+#define PIXEL11_61    *(dp+dpL+1) = Interp6(w[5], w[6], w[8]);
+#define PIXEL11_70    *(dp+dpL+1) = Interp7(w[5], w[6], w[8]);
+#define PIXEL11_90    *(dp+dpL+1) = Interp9(w[5], w[6], w[8]);
+#define PIXEL11_100   *(dp+dpL+1) = Interp10(w[5], w[6], w[8]);
+
+void HQX_CALLCONV hq2x_32_rb( uint32_t * sp, uint32_t srb, uint32_t * dp, uint32_t drb, int Xres, int Yres )
+{
+    int  i, j, k;
+    int  prevline, nextline;
+    uint32_t  w[10];
+    int dpL = (drb >> 2);
+    int spL = (srb >> 2);
+    uint8_t *sRowP = (uint8_t *) sp;
+    uint8_t *dRowP = (uint8_t *) dp;
+    uint32_t yuv1, yuv2;
+
+    //   +----+----+----+
+    //   |    |    |    |
+    //   | w1 | w2 | w3 |
+    //   +----+----+----+
+    //   |    |    |    |
+    //   | w4 | w5 | w6 |
+    //   +----+----+----+
+    //   |    |    |    |
+    //   | w7 | w8 | w9 |
+    //   +----+----+----+
+
+    for (j=0; j<Yres; j++)
+    {
+        if (j>0)      prevline = -spL; else prevline = 0;
+        if (j<Yres-1) nextline =  spL; else nextline = 0;
+
+        for (i=0; i<Xres; i++)
+        {
+            w[2] = *(sp + prevline);
+            w[5] = *sp;
+            w[8] = *(sp + nextline);
+
+            if (i>0)
+            {
+                w[1] = *(sp + prevline - 1);
+                w[4] = *(sp - 1);
+                w[7] = *(sp + nextline - 1);
+            }
+            else
+            {
+                w[1] = w[2];
+                w[4] = w[5];
+                w[7] = w[8];
+            }
+
+            if (i<Xres-1)
+            {
+                w[3] = *(sp + prevline + 1);
+                w[6] = *(sp + 1);
+                w[9] = *(sp + nextline + 1);
+            }
+            else
+            {
+                w[3] = w[2];
+                w[6] = w[5];
+                w[9] = w[8];
+            }
+
+            int pattern = 0;
+            int flag = 1;
+
+            yuv1 = rgb_to_yuv(w[5]);
+
+            for (k=1; k<=9; k++)
+            {
+                if (k==5) continue;
+
+                if ( w[k] != w[5] )
+                {
+                    yuv2 = rgb_to_yuv(w[k]);
+                    if (yuv_diff(yuv1, yuv2))
+                        pattern |= flag;
+                }
+                flag <<= 1;
+            }
+
+            switch (pattern)
+            {
+                case 0:
+                case 1:
+                case 4:
+                case 32:
+                case 128:
+                case 5:
+                case 132:
+                case 160:
+                case 33:
+                case 129:
+                case 36:
+                case 133:
+                case 164:
+                case 161:
+                case 37:
+                case 165:
+                    {
+                        PIXEL00_20
+                        PIXEL01_20
+                        PIXEL10_20
+                        PIXEL11_20
+                        break;
+                    }
+                case 2:
+                case 34:
+                case 130:
+                case 162:
+                    {
+                        PIXEL00_22
+                        PIXEL01_21
+                        PIXEL10_20
+                        PIXEL11_20
+                        break;
+                    }
+                case 16:
+                case 17:
+                case 48:
+                case 49:
+                    {
+                        PIXEL00_20
+                        PIXEL01_22
+                        PIXEL10_20
+                        PIXEL11_21
+                        break;
+                    }
+                case 64:
+                case 65:
+                case 68:
+                case 69:
+                    {
+                        PIXEL00_20
+                        PIXEL01_20
+                        PIXEL10_21
+                        PIXEL11_22
+                        break;
+                    }
+                case 8:
+                case 12:
+                case 136:
+                case 140:
+                    {
+                        PIXEL00_21
+                        PIXEL01_20
+                        PIXEL10_22
+                        PIXEL11_20
+                        break;
+                    }
+                case 3:
+                case 35:
+                case 131:
+                case 163:
+                    {
+                        PIXEL00_11
+                        PIXEL01_21
+                        PIXEL10_20
+                        PIXEL11_20
+                        break;
+                    }
+                case 6:
+                case 38:
+                case 134:
+                case 166:
+                    {
+                        PIXEL00_22
+                        PIXEL01_12
+                        PIXEL10_20
+                        PIXEL11_20
+                        break;
+                    }
+                case 20:
+                case 21:
+                case 52:
+                case 53:
+                    {
+                        PIXEL00_20
+                        PIXEL01_11
+                        PIXEL10_20
+                        PIXEL11_21
+                        break;
+                    }
+                case 144:
+                case 145:
+                case 176:
+                case 177:
+                    {
+                        PIXEL00_20
+                        PIXEL01_22
+                        PIXEL10_20
+                        PIXEL11_12
+                        break;
+                    }
+                case 192:
+                case 193:
+                case 196:
+                case 197:
+                    {
+                        PIXEL00_20
+                        PIXEL01_20
+                        PIXEL10_21
+                        PIXEL11_11
+                        break;
+                    }
+                case 96:
+                case 97:
+                case 100:
+                case 101:
+                    {
+                        PIXEL00_20
+                        PIXEL01_20
+                        PIXEL10_12
+                        PIXEL11_22
+                        break;
+                    }
+                case 40:
+                case 44:
+                case 168:
+                case 172:
+                    {
+                        PIXEL00_21
+                        PIXEL01_20
+                        PIXEL10_11
+                        PIXEL11_20
+                        break;
+                    }
+                case 9:
+                case 13:
+                case 137:
+                case 141:
+                    {
+                        PIXEL00_12
+                        PIXEL01_20
+                        PIXEL10_22
+                        PIXEL11_20
+                        break;
+                    }
+                case 18:
+                case 50:
+                    {
+                        PIXEL00_22
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_10
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        PIXEL10_20
+                        PIXEL11_21
+                        break;
+                    }
+                case 80:
+                case 81:
+                    {
+                        PIXEL00_20
+                        PIXEL01_22
+                        PIXEL10_21
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 72:
+                case 76:
+                    {
+                        PIXEL00_21
+                        PIXEL01_20
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_10
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        PIXEL11_22
+                        break;
+                    }
+                case 10:
+                case 138:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_21
+                        PIXEL10_22
+                        PIXEL11_20
+                        break;
+                    }
+                case 66:
+                    {
+                        PIXEL00_22
+                        PIXEL01_21
+                        PIXEL10_21
+                        PIXEL11_22
+                        break;
+                    }
+                case 24:
+                    {
+                        PIXEL00_21
+                        PIXEL01_22
+                        PIXEL10_22
+                        PIXEL11_21
+                        break;
+                    }
+                case 7:
+                case 39:
+                case 135:
+                    {
+                        PIXEL00_11
+                        PIXEL01_12
+                        PIXEL10_20
+                        PIXEL11_20
+                        break;
+                    }
+                case 148:
+                case 149:
+                case 180:
+                    {
+                        PIXEL00_20
+                        PIXEL01_11
+                        PIXEL10_20
+                        PIXEL11_12
+                        break;
+                    }
+                case 224:
+                case 228:
+                case 225:
+                    {
+                        PIXEL00_20
+                        PIXEL01_20
+                        PIXEL10_12
+                        PIXEL11_11
+                        break;
+                    }
+                case 41:
+                case 169:
+                case 45:
+                    {
+                        PIXEL00_12
+                        PIXEL01_20
+                        PIXEL10_11
+                        PIXEL11_20
+                        break;
+                    }
+                case 22:
+                case 54:
+                    {
+                        PIXEL00_22
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        PIXEL10_20
+                        PIXEL11_21
+                        break;
+                    }
+                case 208:
+                case 209:
+                    {
+                        PIXEL00_20
+                        PIXEL01_22
+                        PIXEL10_21
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 104:
+                case 108:
+                    {
+                        PIXEL00_21
+                        PIXEL01_20
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        PIXEL11_22
+                        break;
+                    }
+                case 11:
+                case 139:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_21
+                        PIXEL10_22
+                        PIXEL11_20
+                        break;
+                    }
+                case 19:
+                case 51:
+                    {
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL00_11
+                            PIXEL01_10
+                        }
+                        else
+                        {
+                            PIXEL00_60
+                            PIXEL01_90
+                        }
+                        PIXEL10_20
+                        PIXEL11_21
+                        break;
+                    }
+                case 146:
+                case 178:
+                    {
+                        PIXEL00_22
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_10
+                            PIXEL11_12
+                        }
+                        else
+                        {
+                            PIXEL01_90
+                            PIXEL11_61
+                        }
+                        PIXEL10_20
+                        break;
+                    }
+                case 84:
+                case 85:
+                    {
+                        PIXEL00_20
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL01_11
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL01_60
+                            PIXEL11_90
+                        }
+                        PIXEL10_21
+                        break;
+                    }
+                case 112:
+                case 113:
+                    {
+                        PIXEL00_20
+                        PIXEL01_22
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL10_12
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL10_61
+                            PIXEL11_90
+                        }
+                        break;
+                    }
+                case 200:
+                case 204:
+                    {
+                        PIXEL00_21
+                        PIXEL01_20
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_10
+                            PIXEL11_11
+                        }
+                        else
+                        {
+                            PIXEL10_90
+                            PIXEL11_60
+                        }
+                        break;
+                    }
+                case 73:
+                case 77:
+                    {
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL00_12
+                            PIXEL10_10
+                        }
+                        else
+                        {
+                            PIXEL00_61
+                            PIXEL10_90
+                        }
+                        PIXEL01_20
+                        PIXEL11_22
+                        break;
+                    }
+                case 42:
+                case 170:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                            PIXEL10_11
+                        }
+                        else
+                        {
+                            PIXEL00_90
+                            PIXEL10_60
+                        }
+                        PIXEL01_21
+                        PIXEL11_20
+                        break;
+                    }
+                case 14:
+                case 142:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                            PIXEL01_12
+                        }
+                        else
+                        {
+                            PIXEL00_90
+                            PIXEL01_61
+                        }
+                        PIXEL10_22
+                        PIXEL11_20
+                        break;
+                    }
+                case 67:
+                    {
+                        PIXEL00_11
+                        PIXEL01_21
+                        PIXEL10_21
+                        PIXEL11_22
+                        break;
+                    }
+                case 70:
+                    {
+                        PIXEL00_22
+                        PIXEL01_12
+                        PIXEL10_21
+                        PIXEL11_22
+                        break;
+                    }
+                case 28:
+                    {
+                        PIXEL00_21
+                        PIXEL01_11
+                        PIXEL10_22
+                        PIXEL11_21
+                        break;
+                    }
+                case 152:
+                    {
+                        PIXEL00_21
+                        PIXEL01_22
+                        PIXEL10_22
+                        PIXEL11_12
+                        break;
+                    }
+                case 194:
+                    {
+                        PIXEL00_22
+                        PIXEL01_21
+                        PIXEL10_21
+                        PIXEL11_11
+                        break;
+                    }
+                case 98:
+                    {
+                        PIXEL00_22
+                        PIXEL01_21
+                        PIXEL10_12
+                        PIXEL11_22
+                        break;
+                    }
+                case 56:
+                    {
+                        PIXEL00_21
+                        PIXEL01_22
+                        PIXEL10_11
+                        PIXEL11_21
+                        break;
+                    }
+                case 25:
+                    {
+                        PIXEL00_12
+                        PIXEL01_22
+                        PIXEL10_22
+                        PIXEL11_21
+                        break;
+                    }
+                case 26:
+                case 31:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        PIXEL10_22
+                        PIXEL11_21
+                        break;
+                    }
+                case 82:
+                case 214:
+                    {
+                        PIXEL00_22
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        PIXEL10_21
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 88:
+                case 248:
+                    {
+                        PIXEL00_21
+                        PIXEL01_22
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 74:
+                case 107:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_21
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        PIXEL11_22
+                        break;
+                    }
+                case 27:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_10
+                        PIXEL10_22
+                        PIXEL11_21
+                        break;
+                    }
+                case 86:
+                    {
+                        PIXEL00_22
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        PIXEL10_21
+                        PIXEL11_10
+                        break;
+                    }
+                case 216:
+                    {
+                        PIXEL00_21
+                        PIXEL01_22
+                        PIXEL10_10
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 106:
+                    {
+                        PIXEL00_10
+                        PIXEL01_21
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        PIXEL11_22
+                        break;
+                    }
+                case 30:
+                    {
+                        PIXEL00_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        PIXEL10_22
+                        PIXEL11_21
+                        break;
+                    }
+                case 210:
+                    {
+                        PIXEL00_22
+                        PIXEL01_10
+                        PIXEL10_21
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 120:
+                    {
+                        PIXEL00_21
+                        PIXEL01_22
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        PIXEL11_10
+                        break;
+                    }
+                case 75:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_21
+                        PIXEL10_10
+                        PIXEL11_22
+                        break;
+                    }
+                case 29:
+                    {
+                        PIXEL00_12
+                        PIXEL01_11
+                        PIXEL10_22
+                        PIXEL11_21
+                        break;
+                    }
+                case 198:
+                    {
+                        PIXEL00_22
+                        PIXEL01_12
+                        PIXEL10_21
+                        PIXEL11_11
+                        break;
+                    }
+                case 184:
+                    {
+                        PIXEL00_21
+                        PIXEL01_22
+                        PIXEL10_11
+                        PIXEL11_12
+                        break;
+                    }
+                case 99:
+                    {
+                        PIXEL00_11
+                        PIXEL01_21
+                        PIXEL10_12
+                        PIXEL11_22
+                        break;
+                    }
+                case 57:
+                    {
+                        PIXEL00_12
+                        PIXEL01_22
+                        PIXEL10_11
+                        PIXEL11_21
+                        break;
+                    }
+                case 71:
+                    {
+                        PIXEL00_11
+                        PIXEL01_12
+                        PIXEL10_21
+                        PIXEL11_22
+                        break;
+                    }
+                case 156:
+                    {
+                        PIXEL00_21
+                        PIXEL01_11
+                        PIXEL10_22
+                        PIXEL11_12
+                        break;
+                    }
+                case 226:
+                    {
+                        PIXEL00_22
+                        PIXEL01_21
+                        PIXEL10_12
+                        PIXEL11_11
+                        break;
+                    }
+                case 60:
+                    {
+                        PIXEL00_21
+                        PIXEL01_11
+                        PIXEL10_11
+                        PIXEL11_21
+                        break;
+                    }
+                case 195:
+                    {
+                        PIXEL00_11
+                        PIXEL01_21
+                        PIXEL10_21
+                        PIXEL11_11
+                        break;
+                    }
+                case 102:
+                    {
+                        PIXEL00_22
+                        PIXEL01_12
+                        PIXEL10_12
+                        PIXEL11_22
+                        break;
+                    }
+                case 153:
+                    {
+                        PIXEL00_12
+                        PIXEL01_22
+                        PIXEL10_22
+                        PIXEL11_12
+                        break;
+                    }
+                case 58:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                        }
+                        else
+                        {
+                            PIXEL00_70
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_10
+                        }
+                        else
+                        {
+                            PIXEL01_70
+                        }
+                        PIXEL10_11
+                        PIXEL11_21
+                        break;
+                    }
+                case 83:
+                    {
+                        PIXEL00_11
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_10
+                        }
+                        else
+                        {
+                            PIXEL01_70
+                        }
+                        PIXEL10_21
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL11_70
+                        }
+                        break;
+                    }
+                case 92:
+                    {
+                        PIXEL00_21
+                        PIXEL01_11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_10
+                        }
+                        else
+                        {
+                            PIXEL10_70
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL11_70
+                        }
+                        break;
+                    }
+                case 202:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                        }
+                        else
+                        {
+                            PIXEL00_70
+                        }
+                        PIXEL01_21
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_10
+                        }
+                        else
+                        {
+                            PIXEL10_70
+                        }
+                        PIXEL11_11
+                        break;
+                    }
+                case 78:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                        }
+                        else
+                        {
+                            PIXEL00_70
+                        }
+                        PIXEL01_12
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_10
+                        }
+                        else
+                        {
+                            PIXEL10_70
+                        }
+                        PIXEL11_22
+                        break;
+                    }
+                case 154:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                        }
+                        else
+                        {
+                            PIXEL00_70
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_10
+                        }
+                        else
+                        {
+                            PIXEL01_70
+                        }
+                        PIXEL10_22
+                        PIXEL11_12
+                        break;
+                    }
+                case 114:
+                    {
+                        PIXEL00_22
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_10
+                        }
+                        else
+                        {
+                            PIXEL01_70
+                        }
+                        PIXEL10_12
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL11_70
+                        }
+                        break;
+                    }
+                case 89:
+                    {
+                        PIXEL00_12
+                        PIXEL01_22
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_10
+                        }
+                        else
+                        {
+                            PIXEL10_70
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL11_70
+                        }
+                        break;
+                    }
+                case 90:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                        }
+                        else
+                        {
+                            PIXEL00_70
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_10
+                        }
+                        else
+                        {
+                            PIXEL01_70
+                        }
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_10
+                        }
+                        else
+                        {
+                            PIXEL10_70
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL11_70
+                        }
+                        break;
+                    }
+                case 55:
+                case 23:
+                    {
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL00_11
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL00_60
+                            PIXEL01_90
+                        }
+                        PIXEL10_20
+                        PIXEL11_21
+                        break;
+                    }
+                case 182:
+                case 150:
+                    {
+                        PIXEL00_22
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                            PIXEL11_12
+                        }
+                        else
+                        {
+                            PIXEL01_90
+                            PIXEL11_61
+                        }
+                        PIXEL10_20
+                        break;
+                    }
+                case 213:
+                case 212:
+                    {
+                        PIXEL00_20
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL01_11
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL01_60
+                            PIXEL11_90
+                        }
+                        PIXEL10_21
+                        break;
+                    }
+                case 241:
+                case 240:
+                    {
+                        PIXEL00_20
+                        PIXEL01_22
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL10_12
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL10_61
+                            PIXEL11_90
+                        }
+                        break;
+                    }
+                case 236:
+                case 232:
+                    {
+                        PIXEL00_21
+                        PIXEL01_20
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                            PIXEL11_11
+                        }
+                        else
+                        {
+                            PIXEL10_90
+                            PIXEL11_60
+                        }
+                        break;
+                    }
+                case 109:
+                case 105:
+                    {
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL00_12
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_61
+                            PIXEL10_90
+                        }
+                        PIXEL01_20
+                        PIXEL11_22
+                        break;
+                    }
+                case 171:
+                case 43:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL10_11
+                        }
+                        else
+                        {
+                            PIXEL00_90
+                            PIXEL10_60
+                        }
+                        PIXEL01_21
+                        PIXEL11_20
+                        break;
+                    }
+                case 143:
+                case 15:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_12
+                        }
+                        else
+                        {
+                            PIXEL00_90
+                            PIXEL01_61
+                        }
+                        PIXEL10_22
+                        PIXEL11_20
+                        break;
+                    }
+                case 124:
+                    {
+                        PIXEL00_21
+                        PIXEL01_11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        PIXEL11_10
+                        break;
+                    }
+                case 203:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_21
+                        PIXEL10_10
+                        PIXEL11_11
+                        break;
+                    }
+                case 62:
+                    {
+                        PIXEL00_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        PIXEL10_11
+                        PIXEL11_21
+                        break;
+                    }
+                case 211:
+                    {
+                        PIXEL00_11
+                        PIXEL01_10
+                        PIXEL10_21
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 118:
+                    {
+                        PIXEL00_22
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        PIXEL10_12
+                        PIXEL11_10
+                        break;
+                    }
+                case 217:
+                    {
+                        PIXEL00_12
+                        PIXEL01_22
+                        PIXEL10_10
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 110:
+                    {
+                        PIXEL00_10
+                        PIXEL01_12
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        PIXEL11_22
+                        break;
+                    }
+                case 155:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_10
+                        PIXEL10_22
+                        PIXEL11_12
+                        break;
+                    }
+                case 188:
+                    {
+                        PIXEL00_21
+                        PIXEL01_11
+                        PIXEL10_11
+                        PIXEL11_12
+                        break;
+                    }
+                case 185:
+                    {
+                        PIXEL00_12
+                        PIXEL01_22
+                        PIXEL10_11
+                        PIXEL11_12
+                        break;
+                    }
+                case 61:
+                    {
+                        PIXEL00_12
+                        PIXEL01_11
+                        PIXEL10_11
+                        PIXEL11_21
+                        break;
+                    }
+                case 157:
+                    {
+                        PIXEL00_12
+                        PIXEL01_11
+                        PIXEL10_22
+                        PIXEL11_12
+                        break;
+                    }
+                case 103:
+                    {
+                        PIXEL00_11
+                        PIXEL01_12
+                        PIXEL10_12
+                        PIXEL11_22
+                        break;
+                    }
+                case 227:
+                    {
+                        PIXEL00_11
+                        PIXEL01_21
+                        PIXEL10_12
+                        PIXEL11_11
+                        break;
+                    }
+                case 230:
+                    {
+                        PIXEL00_22
+                        PIXEL01_12
+                        PIXEL10_12
+                        PIXEL11_11
+                        break;
+                    }
+                case 199:
+                    {
+                        PIXEL00_11
+                        PIXEL01_12
+                        PIXEL10_21
+                        PIXEL11_11
+                        break;
+                    }
+                case 220:
+                    {
+                        PIXEL00_21
+                        PIXEL01_11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_10
+                        }
+                        else
+                        {
+                            PIXEL10_70
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 158:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                        }
+                        else
+                        {
+                            PIXEL00_70
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        PIXEL10_22
+                        PIXEL11_12
+                        break;
+                    }
+                case 234:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                        }
+                        else
+                        {
+                            PIXEL00_70
+                        }
+                        PIXEL01_21
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        PIXEL11_11
+                        break;
+                    }
+                case 242:
+                    {
+                        PIXEL00_22
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_10
+                        }
+                        else
+                        {
+                            PIXEL01_70
+                        }
+                        PIXEL10_12
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 59:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_10
+                        }
+                        else
+                        {
+                            PIXEL01_70
+                        }
+                        PIXEL10_11
+                        PIXEL11_21
+                        break;
+                    }
+                case 121:
+                    {
+                        PIXEL00_12
+                        PIXEL01_22
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL11_70
+                        }
+                        break;
+                    }
+                case 87:
+                    {
+                        PIXEL00_11
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        PIXEL10_21
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL11_70
+                        }
+                        break;
+                    }
+                case 79:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_12
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_10
+                        }
+                        else
+                        {
+                            PIXEL10_70
+                        }
+                        PIXEL11_22
+                        break;
+                    }
+                case 122:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                        }
+                        else
+                        {
+                            PIXEL00_70
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_10
+                        }
+                        else
+                        {
+                            PIXEL01_70
+                        }
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL11_70
+                        }
+                        break;
+                    }
+                case 94:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                        }
+                        else
+                        {
+                            PIXEL00_70
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_10
+                        }
+                        else
+                        {
+                            PIXEL10_70
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL11_70
+                        }
+                        break;
+                    }
+                case 218:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                        }
+                        else
+                        {
+                            PIXEL00_70
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_10
+                        }
+                        else
+                        {
+                            PIXEL01_70
+                        }
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_10
+                        }
+                        else
+                        {
+                            PIXEL10_70
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 91:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_10
+                        }
+                        else
+                        {
+                            PIXEL01_70
+                        }
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_10
+                        }
+                        else
+                        {
+                            PIXEL10_70
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL11_70
+                        }
+                        break;
+                    }
+                case 229:
+                    {
+                        PIXEL00_20
+                        PIXEL01_20
+                        PIXEL10_12
+                        PIXEL11_11
+                        break;
+                    }
+                case 167:
+                    {
+                        PIXEL00_11
+                        PIXEL01_12
+                        PIXEL10_20
+                        PIXEL11_20
+                        break;
+                    }
+                case 173:
+                    {
+                        PIXEL00_12
+                        PIXEL01_20
+                        PIXEL10_11
+                        PIXEL11_20
+                        break;
+                    }
+                case 181:
+                    {
+                        PIXEL00_20
+                        PIXEL01_11
+                        PIXEL10_20
+                        PIXEL11_12
+                        break;
+                    }
+                case 186:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                        }
+                        else
+                        {
+                            PIXEL00_70
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_10
+                        }
+                        else
+                        {
+                            PIXEL01_70
+                        }
+                        PIXEL10_11
+                        PIXEL11_12
+                        break;
+                    }
+                case 115:
+                    {
+                        PIXEL00_11
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_10
+                        }
+                        else
+                        {
+                            PIXEL01_70
+                        }
+                        PIXEL10_12
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL11_70
+                        }
+                        break;
+                    }
+                case 93:
+                    {
+                        PIXEL00_12
+                        PIXEL01_11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_10
+                        }
+                        else
+                        {
+                            PIXEL10_70
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL11_70
+                        }
+                        break;
+                    }
+                case 206:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                        }
+                        else
+                        {
+                            PIXEL00_70
+                        }
+                        PIXEL01_12
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_10
+                        }
+                        else
+                        {
+                            PIXEL10_70
+                        }
+                        PIXEL11_11
+                        break;
+                    }
+                case 205:
+                case 201:
+                    {
+                        PIXEL00_12
+                        PIXEL01_20
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_10
+                        }
+                        else
+                        {
+                            PIXEL10_70
+                        }
+                        PIXEL11_11
+                        break;
+                    }
+                case 174:
+                case 46:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_10
+                        }
+                        else
+                        {
+                            PIXEL00_70
+                        }
+                        PIXEL01_12
+                        PIXEL10_11
+                        PIXEL11_20
+                        break;
+                    }
+                case 179:
+                case 147:
+                    {
+                        PIXEL00_11
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_10
+                        }
+                        else
+                        {
+                            PIXEL01_70
+                        }
+                        PIXEL10_20
+                        PIXEL11_12
+                        break;
+                    }
+                case 117:
+                case 116:
+                    {
+                        PIXEL00_20
+                        PIXEL01_11
+                        PIXEL10_12
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_10
+                        }
+                        else
+                        {
+                            PIXEL11_70
+                        }
+                        break;
+                    }
+                case 189:
+                    {
+                        PIXEL00_12
+                        PIXEL01_11
+                        PIXEL10_11
+                        PIXEL11_12
+                        break;
+                    }
+                case 231:
+                    {
+                        PIXEL00_11
+                        PIXEL01_12
+                        PIXEL10_12
+                        PIXEL11_11
+                        break;
+                    }
+                case 126:
+                    {
+                        PIXEL00_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        PIXEL11_10
+                        break;
+                    }
+                case 219:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_10
+                        PIXEL10_10
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 125:
+                    {
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL00_12
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_61
+                            PIXEL10_90
+                        }
+                        PIXEL01_11
+                        PIXEL11_10
+                        break;
+                    }
+                case 221:
+                    {
+                        PIXEL00_12
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL01_11
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL01_60
+                            PIXEL11_90
+                        }
+                        PIXEL10_10
+                        break;
+                    }
+                case 207:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_12
+                        }
+                        else
+                        {
+                            PIXEL00_90
+                            PIXEL01_61
+                        }
+                        PIXEL10_10
+                        PIXEL11_11
+                        break;
+                    }
+                case 238:
+                    {
+                        PIXEL00_10
+                        PIXEL01_12
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                            PIXEL11_11
+                        }
+                        else
+                        {
+                            PIXEL10_90
+                            PIXEL11_60
+                        }
+                        break;
+                    }
+                case 190:
+                    {
+                        PIXEL00_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                            PIXEL11_12
+                        }
+                        else
+                        {
+                            PIXEL01_90
+                            PIXEL11_61
+                        }
+                        PIXEL10_11
+                        break;
+                    }
+                case 187:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL10_11
+                        }
+                        else
+                        {
+                            PIXEL00_90
+                            PIXEL10_60
+                        }
+                        PIXEL01_10
+                        PIXEL11_12
+                        break;
+                    }
+                case 243:
+                    {
+                        PIXEL00_11
+                        PIXEL01_10
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL10_12
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL10_61
+                            PIXEL11_90
+                        }
+                        break;
+                    }
+                case 119:
+                    {
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL00_11
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL00_60
+                            PIXEL01_90
+                        }
+                        PIXEL10_12
+                        PIXEL11_10
+                        break;
+                    }
+                case 237:
+                case 233:
+                    {
+                        PIXEL00_12
+                        PIXEL01_20
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_100
+                        }
+                        PIXEL11_11
+                        break;
+                    }
+                case 175:
+                case 47:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_100
+                        }
+                        PIXEL01_12
+                        PIXEL10_11
+                        PIXEL11_20
+                        break;
+                    }
+                case 183:
+                case 151:
+                    {
+                        PIXEL00_11
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_100
+                        }
+                        PIXEL10_20
+                        PIXEL11_12
+                        break;
+                    }
+                case 245:
+                case 244:
+                    {
+                        PIXEL00_20
+                        PIXEL01_11
+                        PIXEL10_12
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_100
+                        }
+                        break;
+                    }
+                case 250:
+                    {
+                        PIXEL00_10
+                        PIXEL01_10
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 123:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_10
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        PIXEL11_10
+                        break;
+                    }
+                case 95:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        PIXEL10_10
+                        PIXEL11_10
+                        break;
+                    }
+                case 222:
+                    {
+                        PIXEL00_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        PIXEL10_10
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 252:
+                    {
+                        PIXEL00_21
+                        PIXEL01_11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_100
+                        }
+                        break;
+                    }
+                case 249:
+                    {
+                        PIXEL00_12
+                        PIXEL01_22
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_100
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 235:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_21
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_100
+                        }
+                        PIXEL11_11
+                        break;
+                    }
+                case 111:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_100
+                        }
+                        PIXEL01_12
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        PIXEL11_22
+                        break;
+                    }
+                case 63:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_100
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        PIXEL10_11
+                        PIXEL11_21
+                        break;
+                    }
+                case 159:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_100
+                        }
+                        PIXEL10_22
+                        PIXEL11_12
+                        break;
+                    }
+                case 215:
+                    {
+                        PIXEL00_11
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_100
+                        }
+                        PIXEL10_21
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 246:
+                    {
+                        PIXEL00_22
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        PIXEL10_12
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_100
+                        }
+                        break;
+                    }
+                case 254:
+                    {
+                        PIXEL00_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_100
+                        }
+                        break;
+                    }
+                case 253:
+                    {
+                        PIXEL00_12
+                        PIXEL01_11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_100
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_100
+                        }
+                        break;
+                    }
+                case 251:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_10
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_100
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 239:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_100
+                        }
+                        PIXEL01_12
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_100
+                        }
+                        PIXEL11_11
+                        break;
+                    }
+                case 127:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_100
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_20
+                        }
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_20
+                        }
+                        PIXEL11_10
+                        break;
+                    }
+                case 191:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_100
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_100
+                        }
+                        PIXEL10_11
+                        PIXEL11_12
+                        break;
+                    }
+                case 223:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_100
+                        }
+                        PIXEL10_10
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_20
+                        }
+                        break;
+                    }
+                case 247:
+                    {
+                        PIXEL00_11
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_100
+                        }
+                        PIXEL10_12
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_100
+                        }
+                        break;
+                    }
+                case 255:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_100
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_0
+                        }
+                        else
+                        {
+                            PIXEL01_100
+                        }
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL10_100
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL11_100
+                        }
+                        break;
+                    }
+            }
+            sp++;
+            dp += 2;
+        }
+
+        sRowP += srb;
+        sp = (uint32_t *) sRowP;
+
+        dRowP += drb * 2;
+        dp = (uint32_t *) dRowP;
+    }
+}
+
+void HQX_CALLCONV hq2x_32( uint32_t * sp, uint32_t * dp, int Xres, int Yres )
+{
+    uint32_t rowBytesL = Xres * 4;
+    hq2x_32_rb(sp, rowBytesL, dp, rowBytesL * 2, Xres, Yres);
+}
diff --git a/Utilities/HQX/hq3x.cpp b/Utilities/HQX/hq3x.cpp
new file mode 100644
index 0000000..84f22fd
--- /dev/null
+++ b/Utilities/HQX/hq3x.cpp
@@ -0,0 +1,3788 @@
+/*
+ * Copyright (C) 2003 Maxim Stepin ( maxst@hiend3d.com )
+ *
+ * Copyright (C) 2010 Cameron Zemek ( grom@zeminvaders.net)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "../stdafx.h"
+#include <stdint.h>
+#include "common.h"
+#include "hqx.h"
+
+#define PIXEL00_1M  *dp = Interp1(w[5], w[1]);
+#define PIXEL00_1U  *dp = Interp1(w[5], w[2]);
+#define PIXEL00_1L  *dp = Interp1(w[5], w[4]);
+#define PIXEL00_2   *dp = Interp2(w[5], w[4], w[2]);
+#define PIXEL00_4   *dp = Interp4(w[5], w[4], w[2]);
+#define PIXEL00_5   *dp = Interp5(w[4], w[2]);
+#define PIXEL00_C   *dp   = w[5];
+
+#define PIXEL01_1   *(dp+1) = Interp1(w[5], w[2]);
+#define PIXEL01_3   *(dp+1) = Interp3(w[5], w[2]);
+#define PIXEL01_6   *(dp+1) = Interp1(w[2], w[5]);
+#define PIXEL01_C   *(dp+1) = w[5];
+
+#define PIXEL02_1M  *(dp+2) = Interp1(w[5], w[3]);
+#define PIXEL02_1U  *(dp+2) = Interp1(w[5], w[2]);
+#define PIXEL02_1R  *(dp+2) = Interp1(w[5], w[6]);
+#define PIXEL02_2   *(dp+2) = Interp2(w[5], w[2], w[6]);
+#define PIXEL02_4   *(dp+2) = Interp4(w[5], w[2], w[6]);
+#define PIXEL02_5   *(dp+2) = Interp5(w[2], w[6]);
+#define PIXEL02_C   *(dp+2) = w[5];
+
+#define PIXEL10_1   *(dp+dpL) = Interp1(w[5], w[4]);
+#define PIXEL10_3   *(dp+dpL) = Interp3(w[5], w[4]);
+#define PIXEL10_6   *(dp+dpL) = Interp1(w[4], w[5]);
+#define PIXEL10_C   *(dp+dpL) = w[5];
+
+#define PIXEL11     *(dp+dpL+1) = w[5];
+
+#define PIXEL12_1   *(dp+dpL+2) = Interp1(w[5], w[6]);
+#define PIXEL12_3   *(dp+dpL+2) = Interp3(w[5], w[6]);
+#define PIXEL12_6   *(dp+dpL+2) = Interp1(w[6], w[5]);
+#define PIXEL12_C   *(dp+dpL+2) = w[5];
+
+#define PIXEL20_1M  *(dp+dpL+dpL) = Interp1(w[5], w[7]);
+#define PIXEL20_1D  *(dp+dpL+dpL) = Interp1(w[5], w[8]);
+#define PIXEL20_1L  *(dp+dpL+dpL) = Interp1(w[5], w[4]);
+#define PIXEL20_2   *(dp+dpL+dpL) = Interp2(w[5], w[8], w[4]);
+#define PIXEL20_4   *(dp+dpL+dpL) = Interp4(w[5], w[8], w[4]);
+#define PIXEL20_5   *(dp+dpL+dpL) = Interp5(w[8], w[4]);
+#define PIXEL20_C   *(dp+dpL+dpL) = w[5];
+
+#define PIXEL21_1   *(dp+dpL+dpL+1) = Interp1(w[5], w[8]);
+#define PIXEL21_3   *(dp+dpL+dpL+1) = Interp3(w[5], w[8]);
+#define PIXEL21_6   *(dp+dpL+dpL+1) = Interp1(w[8], w[5]);
+#define PIXEL21_C   *(dp+dpL+dpL+1) = w[5];
+
+#define PIXEL22_1M  *(dp+dpL+dpL+2) = Interp1(w[5], w[9]);
+#define PIXEL22_1D  *(dp+dpL+dpL+2) = Interp1(w[5], w[8]);
+#define PIXEL22_1R  *(dp+dpL+dpL+2) = Interp1(w[5], w[6]);
+#define PIXEL22_2   *(dp+dpL+dpL+2) = Interp2(w[5], w[6], w[8]);
+#define PIXEL22_4   *(dp+dpL+dpL+2) = Interp4(w[5], w[6], w[8]);
+#define PIXEL22_5   *(dp+dpL+dpL+2) = Interp5(w[6], w[8]);
+#define PIXEL22_C   *(dp+dpL+dpL+2) = w[5];
+
+void HQX_CALLCONV hq3x_32_rb( uint32_t * sp, uint32_t srb, uint32_t * dp, uint32_t drb, int Xres, int Yres )
+{
+    int  i, j, k;
+    int  prevline, nextline;
+    uint32_t  w[10];
+    int dpL = (drb >> 2);
+    int spL = (srb >> 2);
+    uint8_t *sRowP = (uint8_t *) sp;
+    uint8_t *dRowP = (uint8_t *) dp;
+    uint32_t yuv1, yuv2;
+
+    //   +----+----+----+
+    //   |    |    |    |
+    //   | w1 | w2 | w3 |
+    //   +----+----+----+
+    //   |    |    |    |
+    //   | w4 | w5 | w6 |
+    //   +----+----+----+
+    //   |    |    |    |
+    //   | w7 | w8 | w9 |
+    //   +----+----+----+
+
+    for (j=0; j<Yres; j++)
+    {
+        if (j>0)      prevline = -spL; else prevline = 0;
+        if (j<Yres-1) nextline =  spL; else nextline = 0;
+
+        for (i=0; i<Xres; i++)
+        {
+            w[2] = *(sp + prevline);
+            w[5] = *sp;
+            w[8] = *(sp + nextline);
+
+            if (i>0)
+            {
+                w[1] = *(sp + prevline - 1);
+                w[4] = *(sp - 1);
+                w[7] = *(sp + nextline - 1);
+            }
+            else
+            {
+                w[1] = w[2];
+                w[4] = w[5];
+                w[7] = w[8];
+            }
+
+            if (i<Xres-1)
+            {
+                w[3] = *(sp + prevline + 1);
+                w[6] = *(sp + 1);
+                w[9] = *(sp + nextline + 1);
+            }
+            else
+            {
+                w[3] = w[2];
+                w[6] = w[5];
+                w[9] = w[8];
+            }
+
+            int pattern = 0;
+            int flag = 1;
+
+            yuv1 = rgb_to_yuv(w[5]);
+
+            for (k=1; k<=9; k++)
+            {
+                if (k==5) continue;
+
+                if ( w[k] != w[5] )
+                {
+                    yuv2 = rgb_to_yuv(w[k]);
+                    if (yuv_diff(yuv1, yuv2))
+                        pattern |= flag;
+                }
+                flag <<= 1;
+            }
+
+            switch (pattern)
+            {
+                case 0:
+                case 1:
+                case 4:
+                case 32:
+                case 128:
+                case 5:
+                case 132:
+                case 160:
+                case 33:
+                case 129:
+                case 36:
+                case 133:
+                case 164:
+                case 161:
+                case 37:
+                case 165:
+                    {
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 2:
+                case 34:
+                case 130:
+                case 162:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 16:
+                case 17:
+                case 48:
+                case 49:
+                    {
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 64:
+                case 65:
+                case 68:
+                case 69:
+                    {
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 8:
+                case 12:
+                case 136:
+                case 140:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 3:
+                case 35:
+                case 131:
+                case 163:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 6:
+                case 38:
+                case 134:
+                case 166:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 20:
+                case 21:
+                case 52:
+                case 53:
+                    {
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 144:
+                case 145:
+                case 176:
+                case 177:
+                    {
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 192:
+                case 193:
+                case 196:
+                case 197:
+                    {
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 96:
+                case 97:
+                case 100:
+                case 101:
+                    {
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1L
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 40:
+                case 44:
+                case 168:
+                case 172:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 9:
+                case 13:
+                case 137:
+                case 141:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 18:
+                case 50:
+                    {
+                        PIXEL00_1M
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_1M
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL01_3
+                            PIXEL02_4
+                            PIXEL12_3
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 80:
+                case 81:
+                    {
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_1M
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL21_C
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL12_3
+                            PIXEL21_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 72:
+                case 76:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL11
+                        PIXEL12_1
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_1M
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL10_3
+                            PIXEL20_4
+                            PIXEL21_3
+                        }
+                        PIXEL22_1M
+                        break;
+                    }
+                case 10:
+                case 138:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                            PIXEL01_C
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL01_3
+                            PIXEL10_3
+                        }
+                        PIXEL02_1M
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 66:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 24:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 7:
+                case 39:
+                case 135:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 148:
+                case 149:
+                case 180:
+                    {
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 224:
+                case 228:
+                case 225:
+                    {
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1L
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 41:
+                case 169:
+                case 45:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 22:
+                case 54:
+                    {
+                        PIXEL00_1M
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL01_3
+                            PIXEL02_4
+                            PIXEL12_3
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 208:
+                case 209:
+                    {
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_1M
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_3
+                            PIXEL21_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 104:
+                case 108:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL11
+                        PIXEL12_1
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL10_3
+                            PIXEL20_4
+                            PIXEL21_3
+                        }
+                        PIXEL22_1M
+                        break;
+                    }
+                case 11:
+                case 139:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL01_3
+                            PIXEL10_3
+                        }
+                        PIXEL02_1M
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 19:
+                case 51:
+                    {
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL00_1L
+                            PIXEL01_C
+                            PIXEL02_1M
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                            PIXEL01_6
+                            PIXEL02_5
+                            PIXEL12_1
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 146:
+                case 178:
+                    {
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_1M
+                            PIXEL12_C
+                            PIXEL22_1D
+                        }
+                        else
+                        {
+                            PIXEL01_1
+                            PIXEL02_5
+                            PIXEL12_6
+                            PIXEL22_2
+                        }
+                        PIXEL00_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_2
+                        PIXEL21_1
+                        break;
+                    }
+                case 84:
+                case 85:
+                    {
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL02_1U
+                            PIXEL12_C
+                            PIXEL21_C
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                            PIXEL12_6
+                            PIXEL21_1
+                            PIXEL22_5
+                        }
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_1M
+                        break;
+                    }
+                case 112:
+                case 113:
+                    {
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL20_1L
+                            PIXEL21_C
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL12_1
+                            PIXEL20_2
+                            PIXEL21_6
+                            PIXEL22_5
+                        }
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        break;
+                    }
+                case 200:
+                case 204:
+                    {
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_1M
+                            PIXEL21_C
+                            PIXEL22_1R
+                        }
+                        else
+                        {
+                            PIXEL10_1
+                            PIXEL20_5
+                            PIXEL21_6
+                            PIXEL22_2
+                        }
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL11
+                        PIXEL12_1
+                        break;
+                    }
+                case 73:
+                case 77:
+                    {
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL00_1U
+                            PIXEL10_C
+                            PIXEL20_1M
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                            PIXEL10_6
+                            PIXEL20_5
+                            PIXEL21_1
+                        }
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 42:
+                case 170:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                            PIXEL01_C
+                            PIXEL10_C
+                            PIXEL20_1D
+                        }
+                        else
+                        {
+                            PIXEL00_5
+                            PIXEL01_1
+                            PIXEL10_6
+                            PIXEL20_2
+                        }
+                        PIXEL02_1M
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 14:
+                case 142:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                            PIXEL01_C
+                            PIXEL02_1R
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_5
+                            PIXEL01_6
+                            PIXEL02_2
+                            PIXEL10_1
+                        }
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 67:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 70:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 28:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 152:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 194:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 98:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1L
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 56:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 25:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 26:
+                case 31:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL10_3
+                        }
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL02_4
+                            PIXEL12_3
+                        }
+                        PIXEL11
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 82:
+                case 214:
+                    {
+                        PIXEL00_1M
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                        }
+                        else
+                        {
+                            PIXEL01_3
+                            PIXEL02_4
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL21_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 88:
+                case 248:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                        }
+                        else
+                        {
+                            PIXEL10_3
+                            PIXEL20_4
+                        }
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 74:
+                case 107:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL01_3
+                        }
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL20_4
+                            PIXEL21_3
+                        }
+                        PIXEL22_1M
+                        break;
+                    }
+                case 27:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL01_3
+                            PIXEL10_3
+                        }
+                        PIXEL02_1M
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 86:
+                    {
+                        PIXEL00_1M
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL01_3
+                            PIXEL02_4
+                            PIXEL12_3
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_1M
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 216:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL20_1M
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_3
+                            PIXEL21_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 106:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL11
+                        PIXEL12_1
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL10_3
+                            PIXEL20_4
+                            PIXEL21_3
+                        }
+                        PIXEL22_1M
+                        break;
+                    }
+                case 30:
+                    {
+                        PIXEL00_1M
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL01_3
+                            PIXEL02_4
+                            PIXEL12_3
+                        }
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 210:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_1M
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_3
+                            PIXEL21_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 120:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL11
+                        PIXEL12_C
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL10_3
+                            PIXEL20_4
+                            PIXEL21_3
+                        }
+                        PIXEL22_1M
+                        break;
+                    }
+                case 75:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL01_3
+                            PIXEL10_3
+                        }
+                        PIXEL02_1M
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 29:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 198:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 184:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 99:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1L
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 57:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 71:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 156:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 226:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1L
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 60:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 195:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 102:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1L
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 153:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 58:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_1M
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 83:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_1M
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 92:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_1M
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 202:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_1M
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 78:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_1M
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 154:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_1M
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 114:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_1M
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1L
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 89:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_1M
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 90:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_1M
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_1M
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 55:
+                case 23:
+                    {
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL00_1L
+                            PIXEL01_C
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                            PIXEL01_6
+                            PIXEL02_5
+                            PIXEL12_1
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 182:
+                case 150:
+                    {
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                            PIXEL12_C
+                            PIXEL22_1D
+                        }
+                        else
+                        {
+                            PIXEL01_1
+                            PIXEL02_5
+                            PIXEL12_6
+                            PIXEL22_2
+                        }
+                        PIXEL00_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_2
+                        PIXEL21_1
+                        break;
+                    }
+                case 213:
+                case 212:
+                    {
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL02_1U
+                            PIXEL12_C
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                            PIXEL12_6
+                            PIXEL21_1
+                            PIXEL22_5
+                        }
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_1M
+                        break;
+                    }
+                case 241:
+                case 240:
+                    {
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL20_1L
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_1
+                            PIXEL20_2
+                            PIXEL21_6
+                            PIXEL22_5
+                        }
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        break;
+                    }
+                case 236:
+                case 232:
+                    {
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                            PIXEL21_C
+                            PIXEL22_1R
+                        }
+                        else
+                        {
+                            PIXEL10_1
+                            PIXEL20_5
+                            PIXEL21_6
+                            PIXEL22_2
+                        }
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL11
+                        PIXEL12_1
+                        break;
+                    }
+                case 109:
+                case 105:
+                    {
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL00_1U
+                            PIXEL10_C
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                            PIXEL10_6
+                            PIXEL20_5
+                            PIXEL21_1
+                        }
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 171:
+                case 43:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                            PIXEL10_C
+                            PIXEL20_1D
+                        }
+                        else
+                        {
+                            PIXEL00_5
+                            PIXEL01_1
+                            PIXEL10_6
+                            PIXEL20_2
+                        }
+                        PIXEL02_1M
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 143:
+                case 15:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                            PIXEL02_1R
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_5
+                            PIXEL01_6
+                            PIXEL02_2
+                            PIXEL10_1
+                        }
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 124:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL11
+                        PIXEL12_C
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL10_3
+                            PIXEL20_4
+                            PIXEL21_3
+                        }
+                        PIXEL22_1M
+                        break;
+                    }
+                case 203:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL01_3
+                            PIXEL10_3
+                        }
+                        PIXEL02_1M
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 62:
+                    {
+                        PIXEL00_1M
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL01_3
+                            PIXEL02_4
+                            PIXEL12_3
+                        }
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 211:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_1M
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_3
+                            PIXEL21_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 118:
+                    {
+                        PIXEL00_1M
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL01_3
+                            PIXEL02_4
+                            PIXEL12_3
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_1L
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 217:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL20_1M
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_3
+                            PIXEL21_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 110:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL11
+                        PIXEL12_1
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL10_3
+                            PIXEL20_4
+                            PIXEL21_3
+                        }
+                        PIXEL22_1M
+                        break;
+                    }
+                case 155:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL01_3
+                            PIXEL10_3
+                        }
+                        PIXEL02_1M
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 188:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 185:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 61:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 157:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 103:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1L
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 227:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1L
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 230:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1L
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 199:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 220:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_C
+                        PIXEL11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_1M
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_3
+                            PIXEL21_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 158:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL01_3
+                            PIXEL02_4
+                            PIXEL12_3
+                        }
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 234:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL11
+                        PIXEL12_1
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL10_3
+                            PIXEL20_4
+                            PIXEL21_3
+                        }
+                        PIXEL22_1R
+                        break;
+                    }
+                case 242:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_1M
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_1L
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_3
+                            PIXEL21_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 59:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL01_3
+                            PIXEL10_3
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_1M
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 121:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL11
+                        PIXEL12_C
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL10_3
+                            PIXEL20_4
+                            PIXEL21_3
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 87:
+                    {
+                        PIXEL00_1L
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL01_3
+                            PIXEL02_4
+                            PIXEL12_3
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_1M
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 79:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL01_3
+                            PIXEL10_3
+                        }
+                        PIXEL02_1R
+                        PIXEL11
+                        PIXEL12_1
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_1M
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 122:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_1M
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL11
+                        PIXEL12_C
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL10_3
+                            PIXEL20_4
+                            PIXEL21_3
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 94:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL01_3
+                            PIXEL02_4
+                            PIXEL12_3
+                        }
+                        PIXEL10_C
+                        PIXEL11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_1M
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 218:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_1M
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL10_C
+                        PIXEL11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_1M
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_3
+                            PIXEL21_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 91:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL01_3
+                            PIXEL10_3
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_1M
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL11
+                        PIXEL12_C
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_1M
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 229:
+                    {
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1L
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 167:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 173:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 181:
+                    {
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 186:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_1M
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 115:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_1M
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1L
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 93:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_1M
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 206:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_1M
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 205:
+                case 201:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_1M
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 174:
+                case 46:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_1M
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 179:
+                case 147:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_1M
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 117:
+                case 116:
+                    {
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1L
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_1M
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 189:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 231:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1L
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 126:
+                    {
+                        PIXEL00_1M
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL01_3
+                            PIXEL02_4
+                            PIXEL12_3
+                        }
+                        PIXEL11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL10_3
+                            PIXEL20_4
+                            PIXEL21_3
+                        }
+                        PIXEL22_1M
+                        break;
+                    }
+                case 219:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL01_3
+                            PIXEL10_3
+                        }
+                        PIXEL02_1M
+                        PIXEL11
+                        PIXEL20_1M
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_3
+                            PIXEL21_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 125:
+                    {
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL00_1U
+                            PIXEL10_C
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                            PIXEL10_6
+                            PIXEL20_5
+                            PIXEL21_1
+                        }
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 221:
+                    {
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL02_1U
+                            PIXEL12_C
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                            PIXEL12_6
+                            PIXEL21_1
+                            PIXEL22_5
+                        }
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL20_1M
+                        break;
+                    }
+                case 207:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                            PIXEL02_1R
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_5
+                            PIXEL01_6
+                            PIXEL02_2
+                            PIXEL10_1
+                        }
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1M
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 238:
+                    {
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                            PIXEL21_C
+                            PIXEL22_1R
+                        }
+                        else
+                        {
+                            PIXEL10_1
+                            PIXEL20_5
+                            PIXEL21_6
+                            PIXEL22_2
+                        }
+                        PIXEL00_1M
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL11
+                        PIXEL12_1
+                        break;
+                    }
+                case 190:
+                    {
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                            PIXEL12_C
+                            PIXEL22_1D
+                        }
+                        else
+                        {
+                            PIXEL01_1
+                            PIXEL02_5
+                            PIXEL12_6
+                            PIXEL22_2
+                        }
+                        PIXEL00_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL20_1D
+                        PIXEL21_1
+                        break;
+                    }
+                case 187:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                            PIXEL10_C
+                            PIXEL20_1D
+                        }
+                        else
+                        {
+                            PIXEL00_5
+                            PIXEL01_1
+                            PIXEL10_6
+                            PIXEL20_2
+                        }
+                        PIXEL02_1M
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 243:
+                    {
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL20_1L
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_1
+                            PIXEL20_2
+                            PIXEL21_6
+                            PIXEL22_5
+                        }
+                        PIXEL00_1L
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL10_1
+                        PIXEL11
+                        break;
+                    }
+                case 119:
+                    {
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL00_1L
+                            PIXEL01_C
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                            PIXEL01_6
+                            PIXEL02_5
+                            PIXEL12_1
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL20_1L
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 237:
+                case 233:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_2
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_C
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 175:
+                case 47:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_2
+                        break;
+                    }
+                case 183:
+                case 151:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_C
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_2
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 245:
+                case 244:
+                    {
+                        PIXEL00_2
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1L
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 250:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_C
+                        PIXEL02_1M
+                        PIXEL11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                        }
+                        else
+                        {
+                            PIXEL10_3
+                            PIXEL20_4
+                        }
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 123:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL01_3
+                        }
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL20_4
+                            PIXEL21_3
+                        }
+                        PIXEL22_1M
+                        break;
+                    }
+                case 95:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL10_3
+                        }
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL02_4
+                            PIXEL12_3
+                        }
+                        PIXEL11
+                        PIXEL20_1M
+                        PIXEL21_C
+                        PIXEL22_1M
+                        break;
+                    }
+                case 222:
+                    {
+                        PIXEL00_1M
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                        }
+                        else
+                        {
+                            PIXEL01_3
+                            PIXEL02_4
+                        }
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL21_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 252:
+                    {
+                        PIXEL00_1M
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL11
+                        PIXEL12_C
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                        }
+                        else
+                        {
+                            PIXEL10_3
+                            PIXEL20_4
+                        }
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 249:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_C
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 235:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL01_3
+                        }
+                        PIXEL02_1M
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_C
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 111:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL20_4
+                            PIXEL21_3
+                        }
+                        PIXEL22_1M
+                        break;
+                    }
+                case 63:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL02_4
+                            PIXEL12_3
+                        }
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_1M
+                        break;
+                    }
+                case 159:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL10_3
+                        }
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_C
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 215:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_C
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1M
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL21_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 246:
+                    {
+                        PIXEL00_1M
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                        }
+                        else
+                        {
+                            PIXEL01_3
+                            PIXEL02_4
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1L
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 254:
+                    {
+                        PIXEL00_1M
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                        }
+                        else
+                        {
+                            PIXEL01_3
+                            PIXEL02_4
+                        }
+                        PIXEL11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                        }
+                        else
+                        {
+                            PIXEL10_3
+                            PIXEL20_4
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_3
+                            PIXEL21_3
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 253:
+                    {
+                        PIXEL00_1U
+                        PIXEL01_1
+                        PIXEL02_1U
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_C
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 251:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL01_3
+                        }
+                        PIXEL02_1M
+                        PIXEL11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL10_C
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL10_3
+                            PIXEL20_2
+                            PIXEL21_3
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL12_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL12_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 239:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        PIXEL02_1R
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_1
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_C
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        PIXEL22_1R
+                        break;
+                    }
+                case 127:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL01_C
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                            PIXEL01_3
+                            PIXEL10_3
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL02_4
+                            PIXEL12_3
+                        }
+                        PIXEL11
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_C
+                            PIXEL21_C
+                        }
+                        else
+                        {
+                            PIXEL20_4
+                            PIXEL21_3
+                        }
+                        PIXEL22_1M
+                        break;
+                    }
+                case 191:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_C
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1D
+                        PIXEL21_1
+                        PIXEL22_1D
+                        break;
+                    }
+                case 223:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                            PIXEL10_C
+                        }
+                        else
+                        {
+                            PIXEL00_4
+                            PIXEL10_3
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL01_C
+                            PIXEL02_C
+                            PIXEL12_C
+                        }
+                        else
+                        {
+                            PIXEL01_3
+                            PIXEL02_2
+                            PIXEL12_3
+                        }
+                        PIXEL11
+                        PIXEL20_1M
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL21_C
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL21_3
+                            PIXEL22_4
+                        }
+                        break;
+                    }
+                case 247:
+                    {
+                        PIXEL00_1L
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_C
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL10_1
+                        PIXEL11
+                        PIXEL12_C
+                        PIXEL20_1L
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+                case 255:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_C
+                        }
+                        else
+                        {
+                            PIXEL00_2
+                        }
+                        PIXEL01_C
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_C
+                        }
+                        else
+                        {
+                            PIXEL02_2
+                        }
+                        PIXEL10_C
+                        PIXEL11
+                        PIXEL12_C
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_C
+                        }
+                        else
+                        {
+                            PIXEL20_2
+                        }
+                        PIXEL21_C
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_C
+                        }
+                        else
+                        {
+                            PIXEL22_2
+                        }
+                        break;
+                    }
+            }
+            sp++;
+            dp += 3;
+        }
+
+        sRowP += srb;
+        sp = (uint32_t *) sRowP;
+
+        dRowP += drb * 3;
+        dp = (uint32_t *) dRowP;
+    }
+}
+
+void HQX_CALLCONV hq3x_32( uint32_t * sp, uint32_t * dp, int Xres, int Yres )
+{
+    uint32_t rowBytesL = Xres * 4;
+    hq3x_32_rb(sp, rowBytesL, dp, rowBytesL * 3, Xres, Yres);
+}
diff --git a/Utilities/HQX/hq4x.cpp b/Utilities/HQX/hq4x.cpp
new file mode 100644
index 0000000..0bfe3d9
--- /dev/null
+++ b/Utilities/HQX/hq4x.cpp
@@ -0,0 +1,5234 @@
+/*
+ * Copyright (C) 2003 Maxim Stepin ( maxst@hiend3d.com )
+ *
+ * Copyright (C) 2010 Cameron Zemek ( grom@zeminvaders.net)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "../stdafx.h"
+#include <stdint.h>
+#include "common.h"
+#include "hqx.h"
+
+#define PIXEL00_0     *dp = w[5];
+#define PIXEL00_11    *dp = Interp1(w[5], w[4]);
+#define PIXEL00_12    *dp = Interp1(w[5], w[2]);
+#define PIXEL00_20    *dp = Interp2(w[5], w[2], w[4]);
+#define PIXEL00_50    *dp = Interp5(w[2], w[4]);
+#define PIXEL00_80    *dp = Interp8(w[5], w[1]);
+#define PIXEL00_81    *dp = Interp8(w[5], w[4]);
+#define PIXEL00_82    *dp = Interp8(w[5], w[2]);
+#define PIXEL01_0     *(dp+1) = w[5];
+#define PIXEL01_10    *(dp+1) = Interp1(w[5], w[1]);
+#define PIXEL01_12    *(dp+1) = Interp1(w[5], w[2]);
+#define PIXEL01_14    *(dp+1) = Interp1(w[2], w[5]);
+#define PIXEL01_21    *(dp+1) = Interp2(w[2], w[5], w[4]);
+#define PIXEL01_31    *(dp+1) = Interp3(w[5], w[4]);
+#define PIXEL01_50    *(dp+1) = Interp5(w[2], w[5]);
+#define PIXEL01_60    *(dp+1) = Interp6(w[5], w[2], w[4]);
+#define PIXEL01_61    *(dp+1) = Interp6(w[5], w[2], w[1]);
+#define PIXEL01_82    *(dp+1) = Interp8(w[5], w[2]);
+#define PIXEL01_83    *(dp+1) = Interp8(w[2], w[4]);
+#define PIXEL02_0     *(dp+2) = w[5];
+#define PIXEL02_10    *(dp+2) = Interp1(w[5], w[3]);
+#define PIXEL02_11    *(dp+2) = Interp1(w[5], w[2]);
+#define PIXEL02_13    *(dp+2) = Interp1(w[2], w[5]);
+#define PIXEL02_21    *(dp+2) = Interp2(w[2], w[5], w[6]);
+#define PIXEL02_32    *(dp+2) = Interp3(w[5], w[6]);
+#define PIXEL02_50    *(dp+2) = Interp5(w[2], w[5]);
+#define PIXEL02_60    *(dp+2) = Interp6(w[5], w[2], w[6]);
+#define PIXEL02_61    *(dp+2) = Interp6(w[5], w[2], w[3]);
+#define PIXEL02_81    *(dp+2) = Interp8(w[5], w[2]);
+#define PIXEL02_83    *(dp+2) = Interp8(w[2], w[6]);
+#define PIXEL03_0     *(dp+3) = w[5];
+#define PIXEL03_11    *(dp+3) = Interp1(w[5], w[2]);
+#define PIXEL03_12    *(dp+3) = Interp1(w[5], w[6]);
+#define PIXEL03_20    *(dp+3) = Interp2(w[5], w[2], w[6]);
+#define PIXEL03_50    *(dp+3) = Interp5(w[2], w[6]);
+#define PIXEL03_80    *(dp+3) = Interp8(w[5], w[3]);
+#define PIXEL03_81    *(dp+3) = Interp8(w[5], w[2]);
+#define PIXEL03_82    *(dp+3) = Interp8(w[5], w[6]);
+#define PIXEL10_0     *(dp+dpL) = w[5];
+#define PIXEL10_10    *(dp+dpL) = Interp1(w[5], w[1]);
+#define PIXEL10_11    *(dp+dpL) = Interp1(w[5], w[4]);
+#define PIXEL10_13    *(dp+dpL) = Interp1(w[4], w[5]);
+#define PIXEL10_21    *(dp+dpL) = Interp2(w[4], w[5], w[2]);
+#define PIXEL10_32    *(dp+dpL) = Interp3(w[5], w[2]);
+#define PIXEL10_50    *(dp+dpL) = Interp5(w[4], w[5]);
+#define PIXEL10_60    *(dp+dpL) = Interp6(w[5], w[4], w[2]);
+#define PIXEL10_61    *(dp+dpL) = Interp6(w[5], w[4], w[1]);
+#define PIXEL10_81    *(dp+dpL) = Interp8(w[5], w[4]);
+#define PIXEL10_83    *(dp+dpL) = Interp8(w[4], w[2]);
+#define PIXEL11_0     *(dp+dpL+1) = w[5];
+#define PIXEL11_30    *(dp+dpL+1) = Interp3(w[5], w[1]);
+#define PIXEL11_31    *(dp+dpL+1) = Interp3(w[5], w[4]);
+#define PIXEL11_32    *(dp+dpL+1) = Interp3(w[5], w[2]);
+#define PIXEL11_70    *(dp+dpL+1) = Interp7(w[5], w[4], w[2]);
+#define PIXEL12_0     *(dp+dpL+2) = w[5];
+#define PIXEL12_30    *(dp+dpL+2) = Interp3(w[5], w[3]);
+#define PIXEL12_31    *(dp+dpL+2) = Interp3(w[5], w[2]);
+#define PIXEL12_32    *(dp+dpL+2) = Interp3(w[5], w[6]);
+#define PIXEL12_70    *(dp+dpL+2) = Interp7(w[5], w[6], w[2]);
+#define PIXEL13_0     *(dp+dpL+3) = w[5];
+#define PIXEL13_10    *(dp+dpL+3) = Interp1(w[5], w[3]);
+#define PIXEL13_12    *(dp+dpL+3) = Interp1(w[5], w[6]);
+#define PIXEL13_14    *(dp+dpL+3) = Interp1(w[6], w[5]);
+#define PIXEL13_21    *(dp+dpL+3) = Interp2(w[6], w[5], w[2]);
+#define PIXEL13_31    *(dp+dpL+3) = Interp3(w[5], w[2]);
+#define PIXEL13_50    *(dp+dpL+3) = Interp5(w[6], w[5]);
+#define PIXEL13_60    *(dp+dpL+3) = Interp6(w[5], w[6], w[2]);
+#define PIXEL13_61    *(dp+dpL+3) = Interp6(w[5], w[6], w[3]);
+#define PIXEL13_82    *(dp+dpL+3) = Interp8(w[5], w[6]);
+#define PIXEL13_83    *(dp+dpL+3) = Interp8(w[6], w[2]);
+#define PIXEL20_0     *(dp+dpL+dpL) = w[5];
+#define PIXEL20_10    *(dp+dpL+dpL) = Interp1(w[5], w[7]);
+#define PIXEL20_12    *(dp+dpL+dpL) = Interp1(w[5], w[4]);
+#define PIXEL20_14    *(dp+dpL+dpL) = Interp1(w[4], w[5]);
+#define PIXEL20_21    *(dp+dpL+dpL) = Interp2(w[4], w[5], w[8]);
+#define PIXEL20_31    *(dp+dpL+dpL) = Interp3(w[5], w[8]);
+#define PIXEL20_50    *(dp+dpL+dpL) = Interp5(w[4], w[5]);
+#define PIXEL20_60    *(dp+dpL+dpL) = Interp6(w[5], w[4], w[8]);
+#define PIXEL20_61    *(dp+dpL+dpL) = Interp6(w[5], w[4], w[7]);
+#define PIXEL20_82    *(dp+dpL+dpL) = Interp8(w[5], w[4]);
+#define PIXEL20_83    *(dp+dpL+dpL) = Interp8(w[4], w[8]);
+#define PIXEL21_0     *(dp+dpL+dpL+1) = w[5];
+#define PIXEL21_30    *(dp+dpL+dpL+1) = Interp3(w[5], w[7]);
+#define PIXEL21_31    *(dp+dpL+dpL+1) = Interp3(w[5], w[8]);
+#define PIXEL21_32    *(dp+dpL+dpL+1) = Interp3(w[5], w[4]);
+#define PIXEL21_70    *(dp+dpL+dpL+1) = Interp7(w[5], w[4], w[8]);
+#define PIXEL22_0     *(dp+dpL+dpL+2) = w[5];
+#define PIXEL22_30    *(dp+dpL+dpL+2) = Interp3(w[5], w[9]);
+#define PIXEL22_31    *(dp+dpL+dpL+2) = Interp3(w[5], w[6]);
+#define PIXEL22_32    *(dp+dpL+dpL+2) = Interp3(w[5], w[8]);
+#define PIXEL22_70    *(dp+dpL+dpL+2) = Interp7(w[5], w[6], w[8]);
+#define PIXEL23_0     *(dp+dpL+dpL+3) = w[5];
+#define PIXEL23_10    *(dp+dpL+dpL+3) = Interp1(w[5], w[9]);
+#define PIXEL23_11    *(dp+dpL+dpL+3) = Interp1(w[5], w[6]);
+#define PIXEL23_13    *(dp+dpL+dpL+3) = Interp1(w[6], w[5]);
+#define PIXEL23_21    *(dp+dpL+dpL+3) = Interp2(w[6], w[5], w[8]);
+#define PIXEL23_32    *(dp+dpL+dpL+3) = Interp3(w[5], w[8]);
+#define PIXEL23_50    *(dp+dpL+dpL+3) = Interp5(w[6], w[5]);
+#define PIXEL23_60    *(dp+dpL+dpL+3) = Interp6(w[5], w[6], w[8]);
+#define PIXEL23_61    *(dp+dpL+dpL+3) = Interp6(w[5], w[6], w[9]);
+#define PIXEL23_81    *(dp+dpL+dpL+3) = Interp8(w[5], w[6]);
+#define PIXEL23_83    *(dp+dpL+dpL+3) = Interp8(w[6], w[8]);
+#define PIXEL30_0     *(dp+dpL+dpL+dpL) = w[5];
+#define PIXEL30_11    *(dp+dpL+dpL+dpL) = Interp1(w[5], w[8]);
+#define PIXEL30_12    *(dp+dpL+dpL+dpL) = Interp1(w[5], w[4]);
+#define PIXEL30_20    *(dp+dpL+dpL+dpL) = Interp2(w[5], w[8], w[4]);
+#define PIXEL30_50    *(dp+dpL+dpL+dpL) = Interp5(w[8], w[4]);
+#define PIXEL30_80    *(dp+dpL+dpL+dpL) = Interp8(w[5], w[7]);
+#define PIXEL30_81    *(dp+dpL+dpL+dpL) = Interp8(w[5], w[8]);
+#define PIXEL30_82    *(dp+dpL+dpL+dpL) = Interp8(w[5], w[4]);
+#define PIXEL31_0     *(dp+dpL+dpL+dpL+1) = w[5];
+#define PIXEL31_10    *(dp+dpL+dpL+dpL+1) = Interp1(w[5], w[7]);
+#define PIXEL31_11    *(dp+dpL+dpL+dpL+1) = Interp1(w[5], w[8]);
+#define PIXEL31_13    *(dp+dpL+dpL+dpL+1) = Interp1(w[8], w[5]);
+#define PIXEL31_21    *(dp+dpL+dpL+dpL+1) = Interp2(w[8], w[5], w[4]);
+#define PIXEL31_32    *(dp+dpL+dpL+dpL+1) = Interp3(w[5], w[4]);
+#define PIXEL31_50    *(dp+dpL+dpL+dpL+1) = Interp5(w[8], w[5]);
+#define PIXEL31_60    *(dp+dpL+dpL+dpL+1) = Interp6(w[5], w[8], w[4]);
+#define PIXEL31_61    *(dp+dpL+dpL+dpL+1) = Interp6(w[5], w[8], w[7]);
+#define PIXEL31_81    *(dp+dpL+dpL+dpL+1) = Interp8(w[5], w[8]);
+#define PIXEL31_83    *(dp+dpL+dpL+dpL+1) = Interp8(w[8], w[4]);
+#define PIXEL32_0     *(dp+dpL+dpL+dpL+2) = w[5];
+#define PIXEL32_10    *(dp+dpL+dpL+dpL+2) = Interp1(w[5], w[9]);
+#define PIXEL32_12    *(dp+dpL+dpL+dpL+2) = Interp1(w[5], w[8]);
+#define PIXEL32_14    *(dp+dpL+dpL+dpL+2) = Interp1(w[8], w[5]);
+#define PIXEL32_21    *(dp+dpL+dpL+dpL+2) = Interp2(w[8], w[5], w[6]);
+#define PIXEL32_31    *(dp+dpL+dpL+dpL+2) = Interp3(w[5], w[6]);
+#define PIXEL32_50    *(dp+dpL+dpL+dpL+2) = Interp5(w[8], w[5]);
+#define PIXEL32_60    *(dp+dpL+dpL+dpL+2) = Interp6(w[5], w[8], w[6]);
+#define PIXEL32_61    *(dp+dpL+dpL+dpL+2) = Interp6(w[5], w[8], w[9]);
+#define PIXEL32_82    *(dp+dpL+dpL+dpL+2) = Interp8(w[5], w[8]);
+#define PIXEL32_83    *(dp+dpL+dpL+dpL+2) = Interp8(w[8], w[6]);
+#define PIXEL33_0     *(dp+dpL+dpL+dpL+3) = w[5];
+#define PIXEL33_11    *(dp+dpL+dpL+dpL+3) = Interp1(w[5], w[6]);
+#define PIXEL33_12    *(dp+dpL+dpL+dpL+3) = Interp1(w[5], w[8]);
+#define PIXEL33_20    *(dp+dpL+dpL+dpL+3) = Interp2(w[5], w[8], w[6]);
+#define PIXEL33_50    *(dp+dpL+dpL+dpL+3) = Interp5(w[8], w[6]);
+#define PIXEL33_80    *(dp+dpL+dpL+dpL+3) = Interp8(w[5], w[9]);
+#define PIXEL33_81    *(dp+dpL+dpL+dpL+3) = Interp8(w[5], w[6]);
+#define PIXEL33_82    *(dp+dpL+dpL+dpL+3) = Interp8(w[5], w[8]);
+
+void HQX_CALLCONV hq4x_32_rb( uint32_t * sp, uint32_t srb, uint32_t * dp, uint32_t drb, int Xres, int Yres )
+{
+    int  i, j, k;
+    int  prevline, nextline;
+    uint32_t w[10];
+    int dpL = (drb >> 2);
+    int spL = (srb >> 2);
+    uint8_t *sRowP = (uint8_t *) sp;
+    uint8_t *dRowP = (uint8_t *) dp;
+    uint32_t yuv1, yuv2;
+
+    //   +----+----+----+
+    //   |    |    |    |
+    //   | w1 | w2 | w3 |
+    //   +----+----+----+
+    //   |    |    |    |
+    //   | w4 | w5 | w6 |
+    //   +----+----+----+
+    //   |    |    |    |
+    //   | w7 | w8 | w9 |
+    //   +----+----+----+
+
+    for (j=0; j<Yres; j++)
+    {
+        if (j>0)      prevline = -spL; else prevline = 0;
+        if (j<Yres-1) nextline =  spL; else nextline = 0;
+
+        for (i=0; i<Xres; i++)
+        {
+            w[2] = *(sp + prevline);
+            w[5] = *sp;
+            w[8] = *(sp + nextline);
+
+            if (i>0)
+            {
+                w[1] = *(sp + prevline - 1);
+                w[4] = *(sp - 1);
+                w[7] = *(sp + nextline - 1);
+            }
+            else
+            {
+                w[1] = w[2];
+                w[4] = w[5];
+                w[7] = w[8];
+            }
+
+            if (i<Xres-1)
+            {
+                w[3] = *(sp + prevline + 1);
+                w[6] = *(sp + 1);
+                w[9] = *(sp + nextline + 1);
+            }
+            else
+            {
+                w[3] = w[2];
+                w[6] = w[5];
+                w[9] = w[8];
+            }
+
+            int pattern = 0;
+            int flag = 1;
+
+            yuv1 = rgb_to_yuv(w[5]);
+
+            for (k=1; k<=9; k++)
+            {
+                if (k==5) continue;
+
+                if ( w[k] != w[5] )
+                {
+                    yuv2 = rgb_to_yuv(w[k]);
+                    if (yuv_diff(yuv1, yuv2))
+                        pattern |= flag;
+                }
+                flag <<= 1;
+            }
+
+            switch (pattern)
+            {
+                case 0:
+                case 1:
+                case 4:
+                case 32:
+                case 128:
+                case 5:
+                case 132:
+                case 160:
+                case 33:
+                case 129:
+                case 36:
+                case 133:
+                case 164:
+                case 161:
+                case 37:
+                case 165:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_70
+                        PIXEL13_60
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 2:
+                case 34:
+                case 130:
+                case 162:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 16:
+                case 17:
+                case 48:
+                case 49:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 64:
+                case 65:
+                case 68:
+                case 69:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_70
+                        PIXEL13_60
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL30_80
+                        PIXEL31_10
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 8:
+                case 12:
+                case 136:
+                case 140:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_70
+                        PIXEL13_60
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 3:
+                case 35:
+                case 131:
+                case 163:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 6:
+                case 38:
+                case 134:
+                case 166:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 20:
+                case 21:
+                case 52:
+                case 53:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_31
+                        PIXEL13_31
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 144:
+                case 145:
+                case 176:
+                case 177:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 192:
+                case 193:
+                case 196:
+                case 197:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_70
+                        PIXEL13_60
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL30_80
+                        PIXEL31_10
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 96:
+                case 97:
+                case 100:
+                case 101:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_70
+                        PIXEL13_60
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 40:
+                case 44:
+                case 168:
+                case 172:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_70
+                        PIXEL13_60
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 9:
+                case 13:
+                case 137:
+                case 141:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_70
+                        PIXEL13_60
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 18:
+                case 50:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL12_0
+                            PIXEL13_50
+                        }
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 80:
+                case 81:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_61
+                        PIXEL21_30
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL22_0
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 72:
+                case 76:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_70
+                        PIXEL13_60
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL21_0
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 10:
+                case 138:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL10_10
+                            PIXEL11_30
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                            PIXEL11_0
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 66:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL30_80
+                        PIXEL31_10
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 24:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 7:
+                case 39:
+                case 135:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 148:
+                case 149:
+                case 180:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_31
+                        PIXEL13_31
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 224:
+                case 228:
+                case 225:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_70
+                        PIXEL13_60
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 41:
+                case 169:
+                case 45:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_70
+                        PIXEL13_60
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 22:
+                case 54:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_0
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 208:
+                case 209:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 104:
+                case 108:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_70
+                        PIXEL13_60
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 11:
+                case 139:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL11_0
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 19:
+                case 51:
+                    {
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL00_81
+                            PIXEL01_31
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                        }
+                        else
+                        {
+                            PIXEL00_12
+                            PIXEL01_14
+                            PIXEL02_83
+                            PIXEL03_50
+                            PIXEL12_70
+                            PIXEL13_21
+                        }
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 146:
+                case 178:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                            PIXEL23_32
+                            PIXEL33_82
+                        }
+                        else
+                        {
+                            PIXEL02_21
+                            PIXEL03_50
+                            PIXEL12_70
+                            PIXEL13_83
+                            PIXEL23_13
+                            PIXEL33_11
+                        }
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_32
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_82
+                        break;
+                    }
+                case 84:
+                case 85:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_81
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL03_81
+                            PIXEL13_31
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL03_12
+                            PIXEL13_14
+                            PIXEL22_70
+                            PIXEL23_83
+                            PIXEL32_21
+                            PIXEL33_50
+                        }
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_31
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 112:
+                case 113:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_82
+                        PIXEL21_32
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL30_82
+                            PIXEL31_32
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL22_70
+                            PIXEL23_21
+                            PIXEL30_11
+                            PIXEL31_13
+                            PIXEL32_83
+                            PIXEL33_50
+                        }
+                        break;
+                    }
+                case 200:
+                case 204:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_70
+                        PIXEL13_60
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                            PIXEL32_31
+                            PIXEL33_81
+                        }
+                        else
+                        {
+                            PIXEL20_21
+                            PIXEL21_70
+                            PIXEL30_50
+                            PIXEL31_83
+                            PIXEL32_14
+                            PIXEL33_12
+                        }
+                        PIXEL22_31
+                        PIXEL23_81
+                        break;
+                    }
+                case 73:
+                case 77:
+                    {
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL00_82
+                            PIXEL10_32
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                        }
+                        else
+                        {
+                            PIXEL00_11
+                            PIXEL10_13
+                            PIXEL20_83
+                            PIXEL21_70
+                            PIXEL30_50
+                            PIXEL31_21
+                        }
+                        PIXEL01_82
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL11_32
+                        PIXEL12_70
+                        PIXEL13_60
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 42:
+                case 170:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL10_10
+                            PIXEL11_30
+                            PIXEL20_31
+                            PIXEL30_81
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_21
+                            PIXEL10_83
+                            PIXEL11_70
+                            PIXEL20_14
+                            PIXEL30_12
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL21_31
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL31_81
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 14:
+                case 142:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL02_32
+                            PIXEL03_82
+                            PIXEL10_10
+                            PIXEL11_30
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_83
+                            PIXEL02_13
+                            PIXEL03_11
+                            PIXEL10_21
+                            PIXEL11_70
+                        }
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 67:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL30_80
+                        PIXEL31_10
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 70:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL30_80
+                        PIXEL31_10
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 28:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_31
+                        PIXEL13_31
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 152:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 194:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL30_80
+                        PIXEL31_10
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 98:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 56:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 25:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 26:
+                case 31:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL11_0
+                        PIXEL12_0
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 82:
+                case 214:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_0
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 88:
+                case 248:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_30
+                        PIXEL13_10
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        break;
+                    }
+                case 74:
+                case 107:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL11_0
+                        PIXEL12_30
+                        PIXEL13_61
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 27:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL11_0
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 86:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_0
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_80
+                        PIXEL31_10
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 216:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 106:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_30
+                        PIXEL13_61
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 30:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_0
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 210:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 120:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_30
+                        PIXEL13_10
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 75:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL11_0
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL30_80
+                        PIXEL31_10
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 29:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_31
+                        PIXEL13_31
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 198:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL30_80
+                        PIXEL31_10
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 184:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 99:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 57:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 71:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL30_80
+                        PIXEL31_10
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 156:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_31
+                        PIXEL13_31
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 226:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 60:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_31
+                        PIXEL13_31
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 195:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL30_80
+                        PIXEL31_10
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 102:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 153:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 58:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL10_10
+                            PIXEL11_30
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                            PIXEL01_12
+                            PIXEL10_11
+                            PIXEL11_0
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                        }
+                        else
+                        {
+                            PIXEL02_11
+                            PIXEL03_20
+                            PIXEL12_0
+                            PIXEL13_12
+                        }
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 83:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                        }
+                        else
+                        {
+                            PIXEL02_11
+                            PIXEL03_20
+                            PIXEL12_0
+                            PIXEL13_12
+                        }
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL20_61
+                        PIXEL21_30
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL22_0
+                            PIXEL23_11
+                            PIXEL32_12
+                            PIXEL33_20
+                        }
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 92:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_31
+                        PIXEL13_31
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                        }
+                        else
+                        {
+                            PIXEL20_12
+                            PIXEL21_0
+                            PIXEL30_20
+                            PIXEL31_11
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL22_0
+                            PIXEL23_11
+                            PIXEL32_12
+                            PIXEL33_20
+                        }
+                        break;
+                    }
+                case 202:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL10_10
+                            PIXEL11_30
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                            PIXEL01_12
+                            PIXEL10_11
+                            PIXEL11_0
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL12_30
+                        PIXEL13_61
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                        }
+                        else
+                        {
+                            PIXEL20_12
+                            PIXEL21_0
+                            PIXEL30_20
+                            PIXEL31_11
+                        }
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 78:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL10_10
+                            PIXEL11_30
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                            PIXEL01_12
+                            PIXEL10_11
+                            PIXEL11_0
+                        }
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL12_32
+                        PIXEL13_82
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                        }
+                        else
+                        {
+                            PIXEL20_12
+                            PIXEL21_0
+                            PIXEL30_20
+                            PIXEL31_11
+                        }
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 154:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL10_10
+                            PIXEL11_30
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                            PIXEL01_12
+                            PIXEL10_11
+                            PIXEL11_0
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                        }
+                        else
+                        {
+                            PIXEL02_11
+                            PIXEL03_20
+                            PIXEL12_0
+                            PIXEL13_12
+                        }
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 114:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                        }
+                        else
+                        {
+                            PIXEL02_11
+                            PIXEL03_20
+                            PIXEL12_0
+                            PIXEL13_12
+                        }
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL20_82
+                        PIXEL21_32
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL22_0
+                            PIXEL23_11
+                            PIXEL32_12
+                            PIXEL33_20
+                        }
+                        PIXEL30_82
+                        PIXEL31_32
+                        break;
+                    }
+                case 89:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_30
+                        PIXEL13_10
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                        }
+                        else
+                        {
+                            PIXEL20_12
+                            PIXEL21_0
+                            PIXEL30_20
+                            PIXEL31_11
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL22_0
+                            PIXEL23_11
+                            PIXEL32_12
+                            PIXEL33_20
+                        }
+                        break;
+                    }
+                case 90:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL10_10
+                            PIXEL11_30
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                            PIXEL01_12
+                            PIXEL10_11
+                            PIXEL11_0
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                        }
+                        else
+                        {
+                            PIXEL02_11
+                            PIXEL03_20
+                            PIXEL12_0
+                            PIXEL13_12
+                        }
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                        }
+                        else
+                        {
+                            PIXEL20_12
+                            PIXEL21_0
+                            PIXEL30_20
+                            PIXEL31_11
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL22_0
+                            PIXEL23_11
+                            PIXEL32_12
+                            PIXEL33_20
+                        }
+                        break;
+                    }
+                case 55:
+                case 23:
+                    {
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL00_81
+                            PIXEL01_31
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL12_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL00_12
+                            PIXEL01_14
+                            PIXEL02_83
+                            PIXEL03_50
+                            PIXEL12_70
+                            PIXEL13_21
+                        }
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 182:
+                case 150:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL12_0
+                            PIXEL13_0
+                            PIXEL23_32
+                            PIXEL33_82
+                        }
+                        else
+                        {
+                            PIXEL02_21
+                            PIXEL03_50
+                            PIXEL12_70
+                            PIXEL13_83
+                            PIXEL23_13
+                            PIXEL33_11
+                        }
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_32
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_82
+                        break;
+                    }
+                case 213:
+                case 212:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_81
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL03_81
+                            PIXEL13_31
+                            PIXEL22_0
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL03_12
+                            PIXEL13_14
+                            PIXEL22_70
+                            PIXEL23_83
+                            PIXEL32_21
+                            PIXEL33_50
+                        }
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_31
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 241:
+                case 240:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_82
+                        PIXEL21_32
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_0
+                            PIXEL23_0
+                            PIXEL30_82
+                            PIXEL31_32
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL22_70
+                            PIXEL23_21
+                            PIXEL30_11
+                            PIXEL31_13
+                            PIXEL32_83
+                            PIXEL33_50
+                        }
+                        break;
+                    }
+                case 236:
+                case 232:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_70
+                        PIXEL13_60
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL21_0
+                            PIXEL30_0
+                            PIXEL31_0
+                            PIXEL32_31
+                            PIXEL33_81
+                        }
+                        else
+                        {
+                            PIXEL20_21
+                            PIXEL21_70
+                            PIXEL30_50
+                            PIXEL31_83
+                            PIXEL32_14
+                            PIXEL33_12
+                        }
+                        PIXEL22_31
+                        PIXEL23_81
+                        break;
+                    }
+                case 109:
+                case 105:
+                    {
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL00_82
+                            PIXEL10_32
+                            PIXEL20_0
+                            PIXEL21_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL00_11
+                            PIXEL10_13
+                            PIXEL20_83
+                            PIXEL21_70
+                            PIXEL30_50
+                            PIXEL31_21
+                        }
+                        PIXEL01_82
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL11_32
+                        PIXEL12_70
+                        PIXEL13_60
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 171:
+                case 43:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                            PIXEL11_0
+                            PIXEL20_31
+                            PIXEL30_81
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_21
+                            PIXEL10_83
+                            PIXEL11_70
+                            PIXEL20_14
+                            PIXEL30_12
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL21_31
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL31_81
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 143:
+                case 15:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL02_32
+                            PIXEL03_82
+                            PIXEL10_0
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_83
+                            PIXEL02_13
+                            PIXEL03_11
+                            PIXEL10_21
+                            PIXEL11_70
+                        }
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 124:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_31
+                        PIXEL13_31
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 203:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL11_0
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL30_80
+                        PIXEL31_10
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 62:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_0
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 211:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 118:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_0
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 217:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 110:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_32
+                        PIXEL13_82
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 155:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL11_0
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 188:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_31
+                        PIXEL13_31
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 185:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 61:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_31
+                        PIXEL13_31
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 157:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_31
+                        PIXEL13_31
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 103:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 227:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 230:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 199:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL30_80
+                        PIXEL31_10
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 220:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_31
+                        PIXEL13_31
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                        }
+                        else
+                        {
+                            PIXEL20_12
+                            PIXEL21_0
+                            PIXEL30_20
+                            PIXEL31_11
+                        }
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        break;
+                    }
+                case 158:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL10_10
+                            PIXEL11_30
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                            PIXEL01_12
+                            PIXEL10_11
+                            PIXEL11_0
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL12_0
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 234:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL10_10
+                            PIXEL11_30
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                            PIXEL01_12
+                            PIXEL10_11
+                            PIXEL11_0
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL12_30
+                        PIXEL13_61
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 242:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                        }
+                        else
+                        {
+                            PIXEL02_11
+                            PIXEL03_20
+                            PIXEL12_0
+                            PIXEL13_12
+                        }
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        PIXEL30_82
+                        PIXEL31_32
+                        break;
+                    }
+                case 59:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                        }
+                        else
+                        {
+                            PIXEL02_11
+                            PIXEL03_20
+                            PIXEL12_0
+                            PIXEL13_12
+                        }
+                        PIXEL11_0
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 121:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_30
+                        PIXEL13_10
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL22_0
+                            PIXEL23_11
+                            PIXEL32_12
+                            PIXEL33_20
+                        }
+                        break;
+                    }
+                case 87:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_0
+                        PIXEL20_61
+                        PIXEL21_30
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL22_0
+                            PIXEL23_11
+                            PIXEL32_12
+                            PIXEL33_20
+                        }
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 79:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL11_0
+                        PIXEL12_32
+                        PIXEL13_82
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                        }
+                        else
+                        {
+                            PIXEL20_12
+                            PIXEL21_0
+                            PIXEL30_20
+                            PIXEL31_11
+                        }
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 122:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL10_10
+                            PIXEL11_30
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                            PIXEL01_12
+                            PIXEL10_11
+                            PIXEL11_0
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                        }
+                        else
+                        {
+                            PIXEL02_11
+                            PIXEL03_20
+                            PIXEL12_0
+                            PIXEL13_12
+                        }
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL22_0
+                            PIXEL23_11
+                            PIXEL32_12
+                            PIXEL33_20
+                        }
+                        break;
+                    }
+                case 94:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL10_10
+                            PIXEL11_30
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                            PIXEL01_12
+                            PIXEL10_11
+                            PIXEL11_0
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL12_0
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                        }
+                        else
+                        {
+                            PIXEL20_12
+                            PIXEL21_0
+                            PIXEL30_20
+                            PIXEL31_11
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL22_0
+                            PIXEL23_11
+                            PIXEL32_12
+                            PIXEL33_20
+                        }
+                        break;
+                    }
+                case 218:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL10_10
+                            PIXEL11_30
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                            PIXEL01_12
+                            PIXEL10_11
+                            PIXEL11_0
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                        }
+                        else
+                        {
+                            PIXEL02_11
+                            PIXEL03_20
+                            PIXEL12_0
+                            PIXEL13_12
+                        }
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                        }
+                        else
+                        {
+                            PIXEL20_12
+                            PIXEL21_0
+                            PIXEL30_20
+                            PIXEL31_11
+                        }
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        break;
+                    }
+                case 91:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                        }
+                        else
+                        {
+                            PIXEL02_11
+                            PIXEL03_20
+                            PIXEL12_0
+                            PIXEL13_12
+                        }
+                        PIXEL11_0
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                        }
+                        else
+                        {
+                            PIXEL20_12
+                            PIXEL21_0
+                            PIXEL30_20
+                            PIXEL31_11
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL22_0
+                            PIXEL23_11
+                            PIXEL32_12
+                            PIXEL33_20
+                        }
+                        break;
+                    }
+                case 229:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_70
+                        PIXEL13_60
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 167:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 173:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_70
+                        PIXEL13_60
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 181:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_31
+                        PIXEL13_31
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 186:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL10_10
+                            PIXEL11_30
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                            PIXEL01_12
+                            PIXEL10_11
+                            PIXEL11_0
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                        }
+                        else
+                        {
+                            PIXEL02_11
+                            PIXEL03_20
+                            PIXEL12_0
+                            PIXEL13_12
+                        }
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 115:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                        }
+                        else
+                        {
+                            PIXEL02_11
+                            PIXEL03_20
+                            PIXEL12_0
+                            PIXEL13_12
+                        }
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL20_82
+                        PIXEL21_32
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL22_0
+                            PIXEL23_11
+                            PIXEL32_12
+                            PIXEL33_20
+                        }
+                        PIXEL30_82
+                        PIXEL31_32
+                        break;
+                    }
+                case 93:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_31
+                        PIXEL13_31
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                        }
+                        else
+                        {
+                            PIXEL20_12
+                            PIXEL21_0
+                            PIXEL30_20
+                            PIXEL31_11
+                        }
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL22_0
+                            PIXEL23_11
+                            PIXEL32_12
+                            PIXEL33_20
+                        }
+                        break;
+                    }
+                case 206:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL10_10
+                            PIXEL11_30
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                            PIXEL01_12
+                            PIXEL10_11
+                            PIXEL11_0
+                        }
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL12_32
+                        PIXEL13_82
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                        }
+                        else
+                        {
+                            PIXEL20_12
+                            PIXEL21_0
+                            PIXEL30_20
+                            PIXEL31_11
+                        }
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 205:
+                case 201:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_70
+                        PIXEL13_60
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_10
+                            PIXEL21_30
+                            PIXEL30_80
+                            PIXEL31_10
+                        }
+                        else
+                        {
+                            PIXEL20_12
+                            PIXEL21_0
+                            PIXEL30_20
+                            PIXEL31_11
+                        }
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 174:
+                case 46:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_80
+                            PIXEL01_10
+                            PIXEL10_10
+                            PIXEL11_30
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                            PIXEL01_12
+                            PIXEL10_11
+                            PIXEL11_0
+                        }
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 179:
+                case 147:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_10
+                            PIXEL03_80
+                            PIXEL12_30
+                            PIXEL13_10
+                        }
+                        else
+                        {
+                            PIXEL02_11
+                            PIXEL03_20
+                            PIXEL12_0
+                            PIXEL13_12
+                        }
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 117:
+                case 116:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_31
+                        PIXEL13_31
+                        PIXEL20_82
+                        PIXEL21_32
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_30
+                            PIXEL23_10
+                            PIXEL32_10
+                            PIXEL33_80
+                        }
+                        else
+                        {
+                            PIXEL22_0
+                            PIXEL23_11
+                            PIXEL32_12
+                            PIXEL33_20
+                        }
+                        PIXEL30_82
+                        PIXEL31_32
+                        break;
+                    }
+                case 189:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_31
+                        PIXEL13_31
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 231:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 126:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_0
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 219:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL11_0
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 125:
+                    {
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL00_82
+                            PIXEL10_32
+                            PIXEL20_0
+                            PIXEL21_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL00_11
+                            PIXEL10_13
+                            PIXEL20_83
+                            PIXEL21_70
+                            PIXEL30_50
+                            PIXEL31_21
+                        }
+                        PIXEL01_82
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL11_32
+                        PIXEL12_31
+                        PIXEL13_31
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 221:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_81
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL03_81
+                            PIXEL13_31
+                            PIXEL22_0
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL03_12
+                            PIXEL13_14
+                            PIXEL22_70
+                            PIXEL23_83
+                            PIXEL32_21
+                            PIXEL33_50
+                        }
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_31
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 207:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL02_32
+                            PIXEL03_82
+                            PIXEL10_0
+                            PIXEL11_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_83
+                            PIXEL02_13
+                            PIXEL03_11
+                            PIXEL10_21
+                            PIXEL11_70
+                        }
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_31
+                        PIXEL23_81
+                        PIXEL30_80
+                        PIXEL31_10
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 238:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_32
+                        PIXEL13_82
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL21_0
+                            PIXEL30_0
+                            PIXEL31_0
+                            PIXEL32_31
+                            PIXEL33_81
+                        }
+                        else
+                        {
+                            PIXEL20_21
+                            PIXEL21_70
+                            PIXEL30_50
+                            PIXEL31_83
+                            PIXEL32_14
+                            PIXEL33_12
+                        }
+                        PIXEL22_31
+                        PIXEL23_81
+                        break;
+                    }
+                case 190:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL12_0
+                            PIXEL13_0
+                            PIXEL23_32
+                            PIXEL33_82
+                        }
+                        else
+                        {
+                            PIXEL02_21
+                            PIXEL03_50
+                            PIXEL12_70
+                            PIXEL13_83
+                            PIXEL23_13
+                            PIXEL33_11
+                        }
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_32
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_82
+                        break;
+                    }
+                case 187:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                            PIXEL11_0
+                            PIXEL20_31
+                            PIXEL30_81
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_21
+                            PIXEL10_83
+                            PIXEL11_70
+                            PIXEL20_14
+                            PIXEL30_12
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL21_31
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL31_81
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 243:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_82
+                        PIXEL21_32
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL22_0
+                            PIXEL23_0
+                            PIXEL30_82
+                            PIXEL31_32
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL22_70
+                            PIXEL23_21
+                            PIXEL30_11
+                            PIXEL31_13
+                            PIXEL32_83
+                            PIXEL33_50
+                        }
+                        break;
+                    }
+                case 119:
+                    {
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL00_81
+                            PIXEL01_31
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL12_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL00_12
+                            PIXEL01_14
+                            PIXEL02_83
+                            PIXEL03_50
+                            PIXEL12_70
+                            PIXEL13_21
+                        }
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 237:
+                case 233:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_60
+                        PIXEL03_20
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_70
+                        PIXEL13_60
+                        PIXEL20_0
+                        PIXEL21_0
+                        PIXEL22_31
+                        PIXEL23_81
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL30_0
+                        }
+                        else
+                        {
+                            PIXEL30_20
+                        }
+                        PIXEL31_0
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 175:
+                case 47:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_0
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_0
+                        PIXEL11_0
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_70
+                        PIXEL23_60
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_60
+                        PIXEL33_20
+                        break;
+                    }
+                case 183:
+                case 151:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_0
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL03_0
+                        }
+                        else
+                        {
+                            PIXEL03_20
+                        }
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_0
+                        PIXEL13_0
+                        PIXEL20_60
+                        PIXEL21_70
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_20
+                        PIXEL31_60
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 245:
+                case 244:
+                    {
+                        PIXEL00_20
+                        PIXEL01_60
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_60
+                        PIXEL11_70
+                        PIXEL12_31
+                        PIXEL13_31
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_0
+                        PIXEL23_0
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL33_20
+                        }
+                        break;
+                    }
+                case 250:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_30
+                        PIXEL13_10
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        break;
+                    }
+                case 123:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL11_0
+                        PIXEL12_30
+                        PIXEL13_10
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 95:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL11_0
+                        PIXEL12_0
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_80
+                        PIXEL31_10
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 222:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_0
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 252:
+                    {
+                        PIXEL00_80
+                        PIXEL01_61
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_31
+                        PIXEL13_31
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        PIXEL22_0
+                        PIXEL23_0
+                        PIXEL32_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL33_20
+                        }
+                        break;
+                    }
+                case 249:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_61
+                        PIXEL03_80
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_0
+                        PIXEL21_0
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL30_0
+                        }
+                        else
+                        {
+                            PIXEL30_20
+                        }
+                        PIXEL31_0
+                        break;
+                    }
+                case 235:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL11_0
+                        PIXEL12_30
+                        PIXEL13_61
+                        PIXEL20_0
+                        PIXEL21_0
+                        PIXEL22_31
+                        PIXEL23_81
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL30_0
+                        }
+                        else
+                        {
+                            PIXEL30_20
+                        }
+                        PIXEL31_0
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 111:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_0
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_0
+                        PIXEL11_0
+                        PIXEL12_32
+                        PIXEL13_82
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        PIXEL22_30
+                        PIXEL23_61
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 63:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_0
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL10_0
+                        PIXEL11_0
+                        PIXEL12_0
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_61
+                        PIXEL33_80
+                        break;
+                    }
+                case 159:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        PIXEL02_0
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL03_0
+                        }
+                        else
+                        {
+                            PIXEL03_20
+                        }
+                        PIXEL11_0
+                        PIXEL12_0
+                        PIXEL13_0
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_80
+                        PIXEL31_61
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 215:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_0
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL03_0
+                        }
+                        else
+                        {
+                            PIXEL03_20
+                        }
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_0
+                        PIXEL13_0
+                        PIXEL20_61
+                        PIXEL21_30
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 246:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL10_61
+                        PIXEL11_30
+                        PIXEL12_0
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_0
+                        PIXEL23_0
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL33_20
+                        }
+                        break;
+                    }
+                case 254:
+                    {
+                        PIXEL00_80
+                        PIXEL01_10
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL10_10
+                        PIXEL11_30
+                        PIXEL12_0
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        PIXEL22_0
+                        PIXEL23_0
+                        PIXEL32_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL33_20
+                        }
+                        break;
+                    }
+                case 253:
+                    {
+                        PIXEL00_82
+                        PIXEL01_82
+                        PIXEL02_81
+                        PIXEL03_81
+                        PIXEL10_32
+                        PIXEL11_32
+                        PIXEL12_31
+                        PIXEL13_31
+                        PIXEL20_0
+                        PIXEL21_0
+                        PIXEL22_0
+                        PIXEL23_0
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL30_0
+                        }
+                        else
+                        {
+                            PIXEL30_20
+                        }
+                        PIXEL31_0
+                        PIXEL32_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL33_20
+                        }
+                        break;
+                    }
+                case 251:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        PIXEL02_10
+                        PIXEL03_80
+                        PIXEL11_0
+                        PIXEL12_30
+                        PIXEL13_10
+                        PIXEL20_0
+                        PIXEL21_0
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL30_0
+                        }
+                        else
+                        {
+                            PIXEL30_20
+                        }
+                        PIXEL31_0
+                        break;
+                    }
+                case 239:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_0
+                        PIXEL02_32
+                        PIXEL03_82
+                        PIXEL10_0
+                        PIXEL11_0
+                        PIXEL12_32
+                        PIXEL13_82
+                        PIXEL20_0
+                        PIXEL21_0
+                        PIXEL22_31
+                        PIXEL23_81
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL30_0
+                        }
+                        else
+                        {
+                            PIXEL30_20
+                        }
+                        PIXEL31_0
+                        PIXEL32_31
+                        PIXEL33_81
+                        break;
+                    }
+                case 127:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_0
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL02_0
+                            PIXEL03_0
+                            PIXEL13_0
+                        }
+                        else
+                        {
+                            PIXEL02_50
+                            PIXEL03_50
+                            PIXEL13_50
+                        }
+                        PIXEL10_0
+                        PIXEL11_0
+                        PIXEL12_0
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL20_0
+                            PIXEL30_0
+                            PIXEL31_0
+                        }
+                        else
+                        {
+                            PIXEL20_50
+                            PIXEL30_50
+                            PIXEL31_50
+                        }
+                        PIXEL21_0
+                        PIXEL22_30
+                        PIXEL23_10
+                        PIXEL32_10
+                        PIXEL33_80
+                        break;
+                    }
+                case 191:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_0
+                        PIXEL02_0
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL03_0
+                        }
+                        else
+                        {
+                            PIXEL03_20
+                        }
+                        PIXEL10_0
+                        PIXEL11_0
+                        PIXEL12_0
+                        PIXEL13_0
+                        PIXEL20_31
+                        PIXEL21_31
+                        PIXEL22_32
+                        PIXEL23_32
+                        PIXEL30_81
+                        PIXEL31_81
+                        PIXEL32_82
+                        PIXEL33_82
+                        break;
+                    }
+                case 223:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                            PIXEL01_0
+                            PIXEL10_0
+                        }
+                        else
+                        {
+                            PIXEL00_50
+                            PIXEL01_50
+                            PIXEL10_50
+                        }
+                        PIXEL02_0
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL03_0
+                        }
+                        else
+                        {
+                            PIXEL03_20
+                        }
+                        PIXEL11_0
+                        PIXEL12_0
+                        PIXEL13_0
+                        PIXEL20_10
+                        PIXEL21_30
+                        PIXEL22_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL23_0
+                            PIXEL32_0
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL23_50
+                            PIXEL32_50
+                            PIXEL33_50
+                        }
+                        PIXEL30_80
+                        PIXEL31_10
+                        break;
+                    }
+                case 247:
+                    {
+                        PIXEL00_81
+                        PIXEL01_31
+                        PIXEL02_0
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL03_0
+                        }
+                        else
+                        {
+                            PIXEL03_20
+                        }
+                        PIXEL10_81
+                        PIXEL11_31
+                        PIXEL12_0
+                        PIXEL13_0
+                        PIXEL20_82
+                        PIXEL21_32
+                        PIXEL22_0
+                        PIXEL23_0
+                        PIXEL30_82
+                        PIXEL31_32
+                        PIXEL32_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL33_20
+                        }
+                        break;
+                    }
+                case 255:
+                    {
+                        if (Diff(w[4], w[2]))
+                        {
+                            PIXEL00_0
+                        }
+                        else
+                        {
+                            PIXEL00_20
+                        }
+                        PIXEL01_0
+                        PIXEL02_0
+                        if (Diff(w[2], w[6]))
+                        {
+                            PIXEL03_0
+                        }
+                        else
+                        {
+                            PIXEL03_20
+                        }
+                        PIXEL10_0
+                        PIXEL11_0
+                        PIXEL12_0
+                        PIXEL13_0
+                        PIXEL20_0
+                        PIXEL21_0
+                        PIXEL22_0
+                        PIXEL23_0
+                        if (Diff(w[8], w[4]))
+                        {
+                            PIXEL30_0
+                        }
+                        else
+                        {
+                            PIXEL30_20
+                        }
+                        PIXEL31_0
+                        PIXEL32_0
+                        if (Diff(w[6], w[8]))
+                        {
+                            PIXEL33_0
+                        }
+                        else
+                        {
+                            PIXEL33_20
+                        }
+                        break;
+                    }
+            }
+            sp++;
+            dp += 4;
+        }
+
+        sRowP += srb;
+        sp = (uint32_t *) sRowP;
+
+        dRowP += drb * 4;
+        dp = (uint32_t *) dRowP;
+    }
+}
+
+void HQX_CALLCONV hq4x_32( uint32_t * sp, uint32_t * dp, int Xres, int Yres )
+{
+    uint32_t rowBytesL = Xres * 4;
+    hq4x_32_rb(sp, rowBytesL, dp, rowBytesL * 4, Xres, Yres);
+}
diff --git a/Utilities/HQX/hqx.h b/Utilities/HQX/hqx.h
new file mode 100644
index 0000000..e20d671
--- /dev/null
+++ b/Utilities/HQX/hqx.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2003 Maxim Stepin ( maxst@hiend3d.com )
+ *
+ * Copyright (C) 2010 Cameron Zemek ( grom@zeminvaders.net)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __HQX_H_
+#define __HQX_H_
+
+#include <stdint.h>
+
+#if defined( __GNUC__ )
+    #ifdef __MINGW32__
+        #define HQX_CALLCONV __stdcall
+    #else
+        #define HQX_CALLCONV
+    #endif
+#else
+    #define HQX_CALLCONV
+#endif
+
+#if defined(_WIN32)
+    #ifdef DLL_EXPORT
+        #define HQX_API __declspec(dllexport)
+    #else
+        #define HQX_API __declspec(dllimport)
+    #endif
+#else
+    #define HQX_API
+#endif
+
+void HQX_CALLCONV hqxInit(void);
+void HQX_CALLCONV hqx(uint32_t scale, uint32_t * src, uint32_t * dest, int width, int height);
+
+void HQX_CALLCONV hq2x_32( uint32_t * src, uint32_t * dest, int width, int height );
+void HQX_CALLCONV hq3x_32( uint32_t * src, uint32_t * dest, int width, int height );
+void HQX_CALLCONV hq4x_32( uint32_t * src, uint32_t * dest, int width, int height );
+
+void HQX_CALLCONV hq2x_32_rb( uint32_t * src, uint32_t src_rowBytes, uint32_t * dest, uint32_t dest_rowBytes, int width, int height );
+void HQX_CALLCONV hq3x_32_rb( uint32_t * src, uint32_t src_rowBytes, uint32_t * dest, uint32_t dest_rowBytes, int width, int height );
+void HQX_CALLCONV hq4x_32_rb( uint32_t * src, uint32_t src_rowBytes, uint32_t * dest, uint32_t dest_rowBytes, int width, int height );
+
+#endif
diff --git a/Utilities/HQX/init.cpp b/Utilities/HQX/init.cpp
new file mode 100644
index 0000000..be9de48
--- /dev/null
+++ b/Utilities/HQX/init.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2010 Cameron Zemek ( grom@zeminvaders.net)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "../stdafx.h"
+#include <stdint.h>
+#include "hqx.h"
+
+uint32_t   RGBtoYUV[16777216];
+uint32_t   YUV1, YUV2;
+
+void HQX_CALLCONV hqxInit(void)
+{
+    /* Initalize RGB to YUV lookup table */
+    uint32_t c, r, g, b, y, u, v;
+    for (c = 0; c < 16777215; c++) {
+        r = (c & 0xFF0000) >> 16;
+        g = (c & 0x00FF00) >> 8;
+        b = c & 0x0000FF;
+        y = (uint32_t)(0.299*r + 0.587*g + 0.114*b);
+        u = (uint32_t)(-0.169*r - 0.331*g + 0.5*b) + 128;
+        v = (uint32_t)(0.5*r - 0.419*g - 0.081*b) + 128;
+        RGBtoYUV[c] = (y << 16) + (u << 8) + v;
+    }
+}
+
+void HQX_CALLCONV hqx(uint32_t scale, uint32_t * src, uint32_t * dest, int width, int height)
+{
+	switch(scale) {
+		case 2: hq2x_32(src, dest, width, height); break;
+		case 3: hq3x_32(src, dest, width, height); break;
+		case 4: hq4x_32(src, dest, width, height); break;
+	}
+}
\ No newline at end of file
diff --git a/Utilities/HexUtilities.cpp b/Utilities/HexUtilities.cpp
new file mode 100644
index 0000000..0697b33
--- /dev/null
+++ b/Utilities/HexUtilities.cpp
@@ -0,0 +1,75 @@
+#include "stdafx.h"
+#include "HexUtilities.h"
+
+const vector<string> HexUtilities::_hexCache = { {
+	"00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "0A", "0B", "0C", "0D", "0E", "0F",
+	"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "1A", "1B", "1C", "1D", "1E", "1F",
+	"20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "2A", "2B", "2C", "2D", "2E", "2F",
+	"30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "3A", "3B", "3C", "3D", "3E", "3F",
+	"40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "4A", "4B", "4C", "4D", "4E", "4F",
+	"50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "5A", "5B", "5C", "5D", "5E", "5F",
+	"60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "6A", "6B", "6C", "6D", "6E", "6F",
+	"70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "7A", "7B", "7C", "7D", "7E", "7F",
+	"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "8A", "8B", "8C", "8D", "8E", "8F",
+	"90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "9A", "9B", "9C", "9D", "9E", "9F",
+	"A0", "A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "AA", "AB", "AC", "AD", "AE", "AF",
+	"B0", "B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9", "BA", "BB", "BC", "BD", "BE", "BF",
+	"C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9", "CA", "CB", "CC", "CD", "CE", "CF",
+	"D0", "D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "DA", "DB", "DC", "DD", "DE", "DF",
+	"E0", "E1", "E2", "E3", "E4", "E5", "E6", "E7", "E8", "E9", "EA", "EB", "EC", "ED", "EE", "EF",
+	"F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "FA", "FB", "FC", "FD", "FE", "FF"
+} };
+
+int HexUtilities::FromHex(string hex)
+{
+	int value = 0;
+	for(size_t i = 0, len = hex.size(); i < len; i++) {
+		value <<= 4;
+		if(hex[i] >= '0' && hex[i] <= '9') {
+			value |= hex[i] - '0';
+		} else if(hex[i] >= 'A' && hex[i] <= 'F') {
+			value |= hex[i] - 'A' + 10;
+		} else if(hex[i] >= 'a' && hex[i] <= 'f') {
+			value |= hex[i] - 'a' + 10;
+		}
+	}
+	return value;
+}
+
+string HexUtilities::ToHex(uint8_t value)
+{
+	return _hexCache[value];
+}
+
+string HexUtilities::ToHex(uint16_t value)
+{
+	return _hexCache[value >> 8] + _hexCache[value & 0xFF];
+}
+
+string HexUtilities::ToHex(int32_t value, bool fullSize)
+{
+	return HexUtilities::ToHex((uint32_t)value, fullSize);
+}
+
+string HexUtilities::ToHex(uint32_t value, bool fullSize)
+{
+	if(fullSize || value > 0xFFFFFF) {
+		return _hexCache[value >> 24] + _hexCache[(value >> 16) & 0xFF] + _hexCache[(value >> 8) & 0xFF] + _hexCache[value & 0xFF];
+	} else if(value <= 0xFF) {
+		return ToHex((uint8_t)value);
+	} else if(value <= 0xFFFF) {
+		return ToHex((uint16_t)value);
+	} else {
+		return _hexCache[(value >> 16) & 0xFF] + _hexCache[(value >> 8) & 0xFF] + _hexCache[value & 0xFF];
+	}
+}
+
+string HexUtilities::ToHex(vector<uint8_t> &data)
+{
+	string result;
+	result.reserve(data.size() * 2);
+	for(uint8_t value : data) {
+		result += HexUtilities::ToHex(value);
+	}
+	return result;
+}
\ No newline at end of file
diff --git a/Utilities/HexUtilities.h b/Utilities/HexUtilities.h
new file mode 100644
index 0000000..b6f1a98
--- /dev/null
+++ b/Utilities/HexUtilities.h
@@ -0,0 +1,17 @@
+#pragma once
+#include "stdafx.h"
+
+class HexUtilities
+{
+private:
+	const static vector<string> _hexCache;
+
+public:
+	static string ToHex(uint8_t value);
+	static string ToHex(uint16_t value);
+	static string ToHex(uint32_t value, bool fullSize = false);
+	static string ToHex(int32_t value, bool fullSize = false);
+	static string ToHex(vector<uint8_t> &data);
+
+	static int FromHex(string hex);
+};
\ No newline at end of file
diff --git a/Utilities/IpsPatcher.cpp b/Utilities/IpsPatcher.cpp
new file mode 100644
index 0000000..5d9dd4e
--- /dev/null
+++ b/Utilities/IpsPatcher.cpp
@@ -0,0 +1,188 @@
+#include "stdafx.h"
+#include <assert.h>
+#include <cstring>
+#include <sstream>
+#include "IpsPatcher.h"
+
+class IpsRecord
+{
+public:
+	uint32_t Address = 0;
+	uint16_t Length = 0;
+	vector<uint8_t> Replacement;
+
+	//For RLE records (when length == 0)
+	uint16_t RepeatCount = 0;
+	uint8_t Value = 0;
+
+	bool ReadRecord(std::istream &ipsFile)
+	{
+		uint8_t buffer[3];
+
+		ipsFile.read((char*)buffer, 3);
+		if(memcmp(buffer, "EOF", 3) == 0) {
+			//EOF reached
+			return false;
+		} else {
+			Address = buffer[2] | (buffer[1] << 8) | (buffer[0] << 16);
+
+			ipsFile.read((char*)buffer, 2);
+			Length = buffer[1] | (buffer[0] << 8);
+
+			if(Length == 0) {
+				//RLE record
+				ipsFile.read((char*)buffer, 3);
+				RepeatCount = buffer[1] | (buffer[0] << 8);
+				Value = buffer[2];
+			} else {
+				Replacement.resize(Length);
+				ipsFile.read((char*)Replacement.data(), Length);
+			}
+			return true;
+		}
+	}
+
+	void WriteRecord(vector<uint8_t> &output)
+	{
+		output.push_back((Address >> 16) & 0xFF);
+		output.push_back((Address >> 8) & 0xFF);
+		output.push_back(Address & 0xFF);
+
+		output.push_back((Length >> 8) & 0xFF);
+		output.push_back(Length & 0xFF);
+
+		if(Length == 0) {
+			output.push_back((RepeatCount >> 8) & 0xFF);
+			output.push_back(RepeatCount & 0xFF);
+			output.push_back(Value);
+		} else {
+			output.insert(output.end(), Replacement.data(), Replacement.data() + Replacement.size());
+		}
+	}
+};
+
+bool IpsPatcher::PatchBuffer(string ipsFilepath, vector<uint8_t> &input, vector<uint8_t> &output)
+{
+	ifstream ipsFile(ipsFilepath, std::ios::in | std::ios::binary);
+	if(ipsFile) {
+		return PatchBuffer(ipsFile, input, output);
+	}
+	return false;
+}
+
+bool IpsPatcher::PatchBuffer(vector<uint8_t> &ipsData, vector<uint8_t> &input, vector<uint8_t> &output)
+{
+	std::stringstream ss;
+	ss.write((char*)ipsData.data(), ipsData.size());
+	return PatchBuffer(ss, input, output);
+}
+
+bool IpsPatcher::PatchBuffer(std::istream &ipsFile, vector<uint8_t> &input, vector<uint8_t> &output)
+{
+	char header[5];
+	ipsFile.read((char*)&header, 5);
+	if(memcmp((char*)&header, "PATCH", 5) != 0) {
+		//Invalid ips file
+		return false;
+	}
+
+	vector<IpsRecord> records;
+	int32_t truncateOffset = -1;
+	size_t maxOutputSize = input.size();
+	while(!ipsFile.eof()) {
+		IpsRecord record;
+		if(record.ReadRecord(ipsFile)) {
+			if(record.Address + record.Length + record.RepeatCount > maxOutputSize) {
+				maxOutputSize = record.Address + record.Length + record.RepeatCount;
+			}
+			records.push_back(record);
+		} else {
+			//EOF, try to read truncate offset record if it exists
+			uint8_t buffer[3];
+			ipsFile.read((char*)buffer, 3);
+			if(!ipsFile.eof()) {
+				truncateOffset = buffer[2] | (buffer[1] << 8) | (buffer[0] << 16);
+			}
+			break;
+		}
+	}
+
+	output.resize(maxOutputSize);
+	std::copy(input.begin(), input.end(), output.begin());
+
+	for(IpsRecord record : records) {
+		if(record.Length == 0) {
+			std::fill(&output[record.Address], &output[record.Address]+record.RepeatCount, record.Value);
+		} else {
+			std::copy(record.Replacement.begin(), record.Replacement.end(), output.begin()+record.Address);
+		}
+	}
+
+	if(truncateOffset != -1 && (int32_t)output.size() > truncateOffset) {
+		output.resize(truncateOffset);
+	}
+
+	return true;
+}
+
+vector<uint8_t> IpsPatcher::CreatePatch(vector<uint8_t> originalData, vector<uint8_t> newData)
+{
+	assert(originalData.size() == newData.size());
+
+	vector<uint8_t> patchFile;
+	uint8_t header[5] = { 'P', 'A', 'T', 'C', 'H' };
+	patchFile.insert(patchFile.end(), header, header + sizeof(header));
+
+	size_t i = 0, len = originalData.size();
+	while(i < len) {
+		while(i < len && originalData[i] == newData[i]) {
+			i++;
+		}
+		if(i < len) {
+			IpsRecord patchRecord;
+			uint8_t rleByte = newData[i];
+			uint8_t rleCount = 0;
+			bool createRleRecord = false;
+			patchRecord.Address = (uint32_t)i;
+			patchRecord.Length = 0;
+			while(i < len && patchRecord.Length < 65535 && originalData[i] != newData[i]) {
+				if(newData[i] == rleByte) {
+					rleCount++;
+				} else if(createRleRecord) {
+					break;
+				} else {
+					rleByte = newData[i];
+					rleCount = 1;
+				}
+
+				patchRecord.Length++;
+				i++;
+
+				if((patchRecord.Length == rleCount && rleCount > 3) || rleCount > 13) {
+					//Making a RLE entry would probably save space, so write the current entry and create a RLE entry after it
+					if(patchRecord.Length == rleCount) {
+						//Same character since the start of this entry, make the RLE entry now
+						createRleRecord = true;
+					} else {
+						patchRecord.Length -= rleCount;
+						i -= rleCount;
+						break;
+					}
+				}
+			}
+			if(createRleRecord) {
+				patchRecord.Length = 0;
+				patchRecord.RepeatCount = rleCount;
+				patchRecord.Value = rleByte;
+			} else {
+				patchRecord.Replacement = vector<uint8_t>(&newData[patchRecord.Address], &newData[patchRecord.Address + patchRecord.Length]);
+			}
+			patchRecord.WriteRecord(patchFile);
+		}
+	}
+
+	uint8_t endOfFile[3] = { 'E', 'O', 'F' };
+	patchFile.insert(patchFile.end(), endOfFile, endOfFile + sizeof(endOfFile));
+
+	return patchFile;
+}
\ No newline at end of file
diff --git a/Utilities/IpsPatcher.h b/Utilities/IpsPatcher.h
new file mode 100644
index 0000000..d88081a
--- /dev/null
+++ b/Utilities/IpsPatcher.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "stdafx.h"
+
+class IpsPatcher
+{
+public:
+	static bool PatchBuffer(string ipsFilepath, vector<uint8_t> &input, vector<uint8_t> &output);
+	static bool PatchBuffer(vector<uint8_t>& ipsData, vector<uint8_t>& input, vector<uint8_t>& output);
+	static bool PatchBuffer(std::istream &ipsFile, vector<uint8_t> &input, vector<uint8_t> &output);
+	static vector<uint8_t> CreatePatch(vector<uint8_t> originalData, vector<uint8_t> newData);
+};
\ No newline at end of file
diff --git a/Utilities/KreedSaiEagle/2xSai.cpp b/Utilities/KreedSaiEagle/2xSai.cpp
new file mode 100644
index 0000000..d35e055
--- /dev/null
+++ b/Utilities/KreedSaiEagle/2xSai.cpp
@@ -0,0 +1,168 @@
+/* This is a heavily modified version of the file used in RetroArch */
+
+/*  RetroArch - A frontend for libretro.
+ *  Copyright (C) 2010-2014 - Hans-Kristian Arntzen
+ *  Copyright (C) 2011-2014 - Daniel De Matteis
+ *
+ *  RetroArch is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with RetroArch.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "../stdafx.h"
+
+#define twoxsai_interpolate_xrgb8888(A, B) ((((A) & 0xFEFEFEFE) >> 1) + (((B) & 0xFEFEFEFE) >> 1) + ((A) & (B) & 0x01010101))
+
+#define twoxsai_interpolate2_xrgb8888(A, B, C, D) ((((A) & 0xFCFCFCFC) >> 2) + (((B) & 0xFCFCFCFC) >> 2) + (((C) & 0xFCFCFCFC) >> 2) + (((D) & 0xFCFCFCFC) >> 2) + (((((A) & 0x03030303) + ((B) & 0x03030303) + ((C) & 0x03030303) + ((D) & 0x03030303)) >> 2) & 0x03030303))
+
+#define twoxsai_result(A, B, C, D) (((A) != (C) || (A) != (D)) - ((B) != (C) || (B) != (D)));
+
+#define twoxsai_declare_variables(typename_t, in) \
+         typename_t product, product1, product2; \
+         typename_t colorI = *(in - prevline - prevcolumn); \
+         typename_t colorE = *(in - prevline + 0); \
+         typename_t colorF = *(in - prevline + nextcolumn); \
+         typename_t colorJ = *(in - prevline + nextcolumn2); \
+         typename_t colorG = *(in - prevcolumn); \
+         typename_t colorA = *(in + 0); \
+         typename_t colorB = *(in + nextcolumn); \
+         typename_t colorK = *(in + nextcolumn2); \
+         typename_t colorH = *(in + nextline - prevcolumn); \
+         typename_t colorC = *(in + nextline + 0); \
+         typename_t colorD = *(in + nextline + nextcolumn); \
+         typename_t colorL = *(in + nextline + nextcolumn2); \
+         typename_t colorM = *(in + nextline2 - prevcolumn); \
+         typename_t colorN = *(in + nextline2 + 0); \
+         typename_t colorO = *(in + nextline2 + nextcolumn); \
+
+#ifndef twoxsai_function
+#define twoxsai_function(result_cb, interpolate_cb, interpolate2_cb) \
+         if (colorA == colorD && colorB != colorC) \
+         { \
+            if ((colorA == colorE && colorB == colorL) || (colorA == colorC && colorA == colorF && colorB != colorE && colorB == colorJ)) \
+               product = colorA; \
+            else \
+            { \
+               product = interpolate_cb(colorA, colorB); \
+            } \
+            if ((colorA == colorG && colorC == colorO) || (colorA == colorB && colorA == colorH && colorG != colorC && colorC == colorM)) \
+               product1 = colorA; \
+            else \
+            { \
+               product1 = interpolate_cb(colorA, colorC); \
+            } \
+            product2 = colorA; \
+         } else if (colorB == colorC && colorA != colorD) \
+         { \
+            if ((colorB == colorF && colorA == colorH) || (colorB == colorE && colorB == colorD && colorA != colorF && colorA == colorI)) \
+               product = colorB; \
+            else \
+            { \
+               product = interpolate_cb(colorA, colorB); \
+            } \
+            if ((colorC == colorH && colorA == colorF) || (colorC == colorG && colorC == colorD && colorA != colorH && colorA == colorI)) \
+               product1 = colorC; \
+            else \
+            { \
+               product1 = interpolate_cb(colorA, colorC); \
+            } \
+            product2 = colorB; \
+         } \
+         else if (colorA == colorD && colorB == colorC) \
+         { \
+            if (colorA == colorB) \
+            { \
+               product  = colorA; \
+               product1 = colorA; \
+               product2 = colorA; \
+            } \
+            else \
+            { \
+               int r = 0; \
+               product1 = interpolate_cb(colorA, colorC); \
+               product  = interpolate_cb(colorA, colorB); \
+               r += result_cb(colorA, colorB, colorG, colorE); \
+               r += result_cb(colorB, colorA, colorK, colorF); \
+               r += result_cb(colorB, colorA, colorH, colorN); \
+               r += result_cb(colorA, colorB, colorL, colorO); \
+               if (r > 0) \
+                  product2 = colorA; \
+               else if (r < 0) \
+                  product2 = colorB; \
+               else \
+               { \
+                  product2 = interpolate2_cb(colorA, colorB, colorC, colorD); \
+               } \
+            } \
+         } \
+         else \
+         { \
+            product2 = interpolate2_cb(colorA, colorB, colorC, colorD); \
+            if (colorA == colorC && colorA == colorF && colorB != colorE && colorB == colorJ) \
+               product = colorA; \
+            else if (colorB == colorE && colorB == colorD && colorA != colorF && colorA == colorI) \
+               product = colorB; \
+            else \
+            { \
+               product = interpolate_cb(colorA, colorB); \
+            } \
+            if (colorA == colorB && colorA == colorH && colorG != colorC && colorC == colorM) \
+               product1 = colorA; \
+            else if (colorC == colorG && colorC == colorD && colorA != colorH && colorA == colorI) \
+               product1 = colorC; \
+            else \
+            { \
+               product1 = interpolate_cb(colorA, colorC); \
+            } \
+         } \
+         out[0] = colorA; \
+         out[1] = product; \
+         out[dst_stride] = product1; \
+         out[dst_stride + 1] = product2; \
+         ++in; \
+         out += 2
+#endif
+
+void twoxsai_generic_xrgb8888(unsigned width, unsigned height, uint32_t *src, unsigned src_stride, uint32_t *dst, unsigned dst_stride)
+{
+   unsigned finish;
+	int y = 0;
+	int x = 0;
+	for(; height; height--) {
+		uint32_t *in = (uint32_t*)src;
+		uint32_t *out = (uint32_t*)dst;
+
+		int prevline = (y > 0 ? src_stride : 0);
+		int nextline = (height > 1 ? src_stride : 0);
+		int nextline2 = (height > 2 ? src_stride * 2 : nextline);
+
+		for(finish = width; finish; finish -= 1) {
+			int prevcolumn = (x > 0 ? 1 : 0);
+			int nextcolumn = (finish > 1 ? 1 : 0);
+			int nextcolumn2 = (finish > 2 ? 2 : nextcolumn);
+			twoxsai_declare_variables(uint32_t, in);
+
+			/*
+			 * Map of the pixels:           I|E F|J
+			 *                              G|A B|K
+			 *                              H|C D|L
+			 *                              M|N O|P
+			 */
+
+			twoxsai_function(twoxsai_result, twoxsai_interpolate_xrgb8888, twoxsai_interpolate2_xrgb8888);
+			x++;
+		}
+
+		src += src_stride;
+		dst += 2 * dst_stride;
+		y++;
+		x = 0;
+	}
+}
diff --git a/Utilities/KreedSaiEagle/SaiEagle.h b/Utilities/KreedSaiEagle/SaiEagle.h
new file mode 100644
index 0000000..129f6b1
--- /dev/null
+++ b/Utilities/KreedSaiEagle/SaiEagle.h
@@ -0,0 +1,7 @@
+#pragma once
+#include "../stdafx.h"
+
+extern void supertwoxsai_generic_xrgb8888(unsigned width, unsigned height, uint32_t *src, unsigned src_stride, uint32_t *dst, unsigned dst_stride);
+extern void twoxsai_generic_xrgb8888(unsigned width, unsigned height, uint32_t *src, unsigned src_stride, uint32_t *dst, unsigned dst_stride);
+extern void supereagle_generic_xrgb8888(unsigned width, unsigned height, uint32_t *src, unsigned src_stride, uint32_t *dst, unsigned dst_stride);
+
diff --git a/Utilities/KreedSaiEagle/Super2xSai.cpp b/Utilities/KreedSaiEagle/Super2xSai.cpp
new file mode 100644
index 0000000..ee9d7b2
--- /dev/null
+++ b/Utilities/KreedSaiEagle/Super2xSai.cpp
@@ -0,0 +1,145 @@
+/* This is a heavily modified version of the file used in RetroArch */
+
+/*  RetroArch - A frontend for libretro.
+*  Copyright (C) 2010-2014 - Hans-Kristian Arntzen
+*  Copyright (C) 2010-2014 - Daniel De Matteis
+*
+*  RetroArch is free software: you can redistribute it and/or modify it under the terms
+*  of the GNU General Public License as published by the Free Software Found-
+*  ation, either version 3 of the License, or (at your option) any later version.
+*
+*  RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+*  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+*  PURPOSE.  See the GNU General Public License for more details.
+*
+*  You should have received a copy of the GNU General Public License along with RetroArch.
+*  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "../stdafx.h"
+
+#define supertwoxsai_interpolate_xrgb8888(A, B) ((((A) & 0xFEFEFEFE) >> 1) + (((B) & 0xFEFEFEFE) >> 1) + ((A) & (B) & 0x01010101))
+
+#define supertwoxsai_interpolate2_xrgb8888(A, B, C, D) ((((A) & 0xFCFCFCFC) >> 2) + (((B) & 0xFCFCFCFC) >> 2) + (((C) & 0xFCFCFCFC) >> 2) + (((D) & 0xFCFCFCFC) >> 2) + (((((A) & 0x03030303) + ((B) & 0x03030303) + ((C) & 0x03030303) + ((D) & 0x03030303)) >> 2) & 0x03030303))
+
+#define supertwoxsai_result(A, B, C, D) (((A) != (C) || (A) != (D)) - ((B) != (C) || (B) != (D)))
+
+#ifndef supertwoxsai_declare_variables
+#define supertwoxsai_declare_variables(typename_t, in) \
+         typename_t product1a, product1b, product2a, product2b; \
+         const typename_t colorB0 = *(in - prevline - prevcolumn); \
+         const typename_t colorB1 = *(in - prevline + 0); \
+         const typename_t colorB2 = *(in - prevline + nextcolumn); \
+         const typename_t colorB3 = *(in - prevline + nextcolumn2); \
+         const typename_t color4  = *(in - prevcolumn); \
+         const typename_t color5  = *(in + 0); \
+         const typename_t color6  = *(in + nextcolumn); \
+         const typename_t colorS2 = *(in + nextcolumn2); \
+         const typename_t color1  = *(in + nextline - prevcolumn); \
+         const typename_t color2  = *(in + nextline + 0); \
+         const typename_t color3  = *(in + nextline + nextcolumn); \
+         const typename_t colorS1 = *(in + nextline + nextcolumn2); \
+         const typename_t colorA0 = *(in + nextline2 - prevcolumn); \
+         const typename_t colorA1 = *(in + nextline2 + 0); \
+         const typename_t colorA2 = *(in + nextline2 + nextcolumn); \
+         const typename_t colorA3 = *(in + nextline2 + nextcolumn2)
+#endif
+
+#ifndef supertwoxsai_function
+#define supertwoxsai_function(result_cb, interpolate_cb, interpolate2_cb) \
+         if (color2 == color6 && color5 != color3) \
+            product2b = product1b = color2; \
+         else if (color5 == color3 && color2 != color6) \
+            product2b = product1b = color5; \
+         else if (color5 == color3 && color2 == color6) \
+         { \
+            int r = 0; \
+            r += result_cb(color6, color5, color1, colorA1); \
+            r += result_cb(color6, color5, color4, colorB1); \
+            r += result_cb(color6, color5, colorA2, colorS1); \
+            r += result_cb(color6, color5, colorB2, colorS2); \
+            if (r > 0) \
+               product2b = product1b = color6; \
+            else if (r < 0) \
+               product2b = product1b = color5; \
+            else \
+               product2b = product1b = interpolate_cb(color5, color6); \
+         } \
+         else \
+         { \
+            if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0) \
+               product2b = interpolate2_cb(color3, color3, color3, color2); \
+            else if ((color5 == color2 && color2 == colorA2) & (colorA1 != color3 && color2 != colorA3)) \
+               product2b = interpolate2_cb(color2, color2, color2, color3); \
+            else \
+               product2b = interpolate_cb(color2, color3); \
+            if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0) \
+               product1b = interpolate2_cb(color6, color6, color6, color5); \
+            else if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3) \
+               product1b = interpolate2_cb(color6, color5, color5, color5); \
+            else \
+               product1b = interpolate_cb(color5, color6); \
+         } \
+         if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2) \
+         { \
+            product2a = interpolate_cb(color2, color5); \
+         } \
+         else if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0) \
+         { \
+            product2a = interpolate_cb(color2, color5); \
+         } \
+         else \
+            product2a = color2; \
+         if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2) \
+         { \
+            product1a = interpolate_cb(color2, color5); \
+         } \
+         else if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0) \
+         { \
+            product1a = interpolate_cb(color2, color5); \
+         } \
+         else \
+            product1a = color5; \
+         out[0] = product1a; \
+         out[1] = product1b; \
+         out[dst_stride] = product2a; \
+         out[dst_stride + 1] = product2b; \
+         ++in; \
+         out += 2
+#endif
+
+void supertwoxsai_generic_xrgb8888(unsigned width, unsigned height, uint32_t *src, unsigned src_stride, uint32_t *dst, unsigned dst_stride)
+{
+	unsigned finish;
+	int y = 0;
+	int x = 0;
+	for(; height; height--) {
+		uint32_t *in = (uint32_t*)src;
+		uint32_t *out = (uint32_t*)dst;
+
+		int prevline = (y > 0 ? src_stride : 0);
+		int nextline = (height > 1 ? src_stride : 0);
+		int nextline2 = (height > 2 ? src_stride * 2 : nextline);
+
+		for(finish = width; finish; finish -= 1) {
+			int prevcolumn = (x > 0 ? 1 : 0);
+			int nextcolumn = (finish > 1 ? 1 : 0);
+			int nextcolumn2 = (finish > 2 ? 2 : nextcolumn);
+			supertwoxsai_declare_variables(uint32_t, in);
+
+			//---------------------------    B1 B2
+			//                             4  5  6 S2
+			//                             1  2  3 S1
+			//                               A1 A2
+			//--------------------------------------
+
+			supertwoxsai_function(supertwoxsai_result, supertwoxsai_interpolate_xrgb8888, supertwoxsai_interpolate2_xrgb8888);
+			x++;
+		}
+
+		src += src_stride;
+		dst += 2 * dst_stride;
+		y++;
+		x = 0;
+	}
+}
\ No newline at end of file
diff --git a/Utilities/KreedSaiEagle/SuperEagle.cpp b/Utilities/KreedSaiEagle/SuperEagle.cpp
new file mode 100644
index 0000000..11c15d7
--- /dev/null
+++ b/Utilities/KreedSaiEagle/SuperEagle.cpp
@@ -0,0 +1,156 @@
+/* This is a heavily modified version of the file used in RetroArch */
+
+/*  RetroArch - A frontend for libretro.
+ *  Copyright (C) 2010-2014 - Hans-Kristian Arntzen
+ *  Copyright (C) 2011-2014 - Daniel De Matteis
+ *
+ *  RetroArch is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with RetroArch.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "../stdafx.h"
+
+#define supereagle_interpolate_xrgb8888(A, B) ((((A) & 0xFEFEFEFE) >> 1) + (((B) & 0xFEFEFEFE) >> 1) + ((A) & (B) & 0x01010101))
+
+#define supereagle_interpolate2_xrgb8888(A, B, C, D) ((((A) & 0xFCFCFCFC) >> 2) + (((B) & 0xFCFCFCFC) >> 2) + (((C) & 0xFCFCFCFC) >> 2) + (((D) & 0xFCFCFCFC) >> 2) + (((((A) & 0x03030303) + ((B) & 0x03030303) + ((C) & 0x03030303) + ((D) & 0x03030303)) >> 2) & 0x03030303))
+
+#define supereagle_result(A, B, C, D) (((A) != (C) || (A) != (D)) - ((B) != (C) || (B) != (D)));
+
+#define supereagle_declare_variables(typename_t, in) \
+         typename_t product1a, product1b, product2a, product2b; \
+         const typename_t colorB1 = *(in - prevline + 0); \
+         const typename_t colorB2 = *(in - prevline + nextcolumn); \
+         const typename_t color4  = *(in - prevcolumn); \
+         const typename_t color5  = *(in + 0); \
+         const typename_t color6  = *(in + nextcolumn); \
+         const typename_t colorS2 = *(in + nextcolumn2); \
+         const typename_t color1  = *(in + nextline - prevcolumn); \
+         const typename_t color2  = *(in + nextline + 0); \
+         const typename_t color3  = *(in + nextline + nextcolumn); \
+         const typename_t colorS1 = *(in + nextline + nextcolumn2); \
+         const typename_t colorA1 = *(in + nextline2 + 0); \
+         const typename_t colorA2 = *(in + nextline2 + nextcolumn)
+
+#ifndef supereagle_function
+#define supereagle_function(result_cb, interpolate_cb, interpolate2_cb) \
+         if (color2 == color6 && color5 != color3) \
+         { \
+            product1b = product2a = color2; \
+            if ((color1 == color2) || (color6 == colorB2)) \
+            { \
+               product1a = interpolate_cb(color2, color5); \
+               product1a = interpolate_cb(color2, product1a); \
+            } \
+            else \
+            { \
+               product1a = interpolate_cb(color5, color6); \
+            } \
+            if ((color6 == colorS2) || (color2 == colorA1)) \
+            { \
+               product2b = interpolate_cb(color2, color3); \
+               product2b = interpolate_cb(color2, product2b); \
+            } \
+            else \
+            { \
+               product2b = interpolate_cb(color2, color3); \
+            } \
+         } \
+         else if (color5 == color3 && color2 != color6) \
+         { \
+            product2b = product1a = color5; \
+            if ((colorB1 == color5) || (color3 == colorS1)) \
+            { \
+               product1b = interpolate_cb(color5, color6); \
+               product1b = interpolate_cb(color5, product1b); \
+            } \
+            else \
+            { \
+               product1b = interpolate_cb(color5, color6); \
+            } \
+            if ((color3 == colorA2) || (color4 == color5)) \
+            { \
+               product2a = interpolate_cb(color5, color2); \
+               product2a = interpolate_cb(color5, product2a); \
+            } \
+            else \
+            { \
+               product2a = interpolate_cb(color2, color3); \
+            } \
+         } \
+         else if (color5 == color3 && color2 == color6) \
+         { \
+            int r = 0; \
+            r += supereagle_result(color6, color5, color1, colorA1); \
+            r += supereagle_result(color6, color5, color4, colorB1); \
+            r += supereagle_result(color6, color5, colorA2, colorS1); \
+            r += supereagle_result(color6, color5, colorB2, colorS2); \
+            if (r > 0) \
+            { \
+               product1b = product2a = color2; \
+               product1a = product2b = interpolate_cb(color5, color6); \
+            } \
+            else if (r < 0) \
+            { \
+               product2b = product1a = color5; \
+               product1b = product2a = interpolate_cb(color5, color6); \
+            } \
+            else \
+            { \
+               product2b = product1a = color5; \
+               product1b = product2a = color2; \
+            } \
+         } \
+         else \
+         { \
+            product2b = product1a = interpolate_cb(color2, color6); \
+            product2b = interpolate2_cb(color3, color3, color3, product2b); \
+            product1a = interpolate2_cb(color5, color5, color5, product1a); \
+            product2a = product1b = interpolate_cb(color5, color3); \
+            product2a = interpolate2_cb(color2, color2, color2, product2a); \
+            product1b = interpolate2_cb(color6, color6, color6, product1b); \
+         } \
+         out[0] = product1a; \
+         out[1] = product1b; \
+         out[dst_stride] = product2a; \
+         out[dst_stride + 1] = product2b; \
+         ++in; \
+         out += 2
+#endif
+
+void supereagle_generic_xrgb8888(unsigned width, unsigned height, uint32_t *src, unsigned src_stride, uint32_t *dst, unsigned dst_stride)
+{
+   unsigned finish;
+	int y = 0;
+	int x = 0;
+	for(; height; height--) {
+		uint32_t *in = (uint32_t*)src;
+		uint32_t *out = (uint32_t*)dst;
+
+		int prevline = (y > 0 ? src_stride : 0);
+		int nextline = (height > 1 ? src_stride : 0);
+		int nextline2 = (height > 2 ? src_stride * 2 : nextline);
+
+		for(finish = width; finish; finish -= 1) {
+			int prevcolumn = (x > 0 ? 1 : 0);
+			int nextcolumn = (finish > 1 ? 1 : 0);
+			int nextcolumn2 = (finish > 2 ? 2 : nextcolumn);
+			supereagle_declare_variables(uint32_t, in);
+
+			supereagle_function(supereagle_result, supereagle_interpolate_xrgb8888, supereagle_interpolate2_xrgb8888);
+			x++;
+		}
+
+		src += src_stride;
+		dst += 2 * dst_stride;
+		y++;
+		x = 0;
+	}
+}
\ No newline at end of file
diff --git a/Utilities/LowPassFilter.h b/Utilities/LowPassFilter.h
new file mode 100644
index 0000000..4550ecb
--- /dev/null
+++ b/Utilities/LowPassFilter.h
@@ -0,0 +1,37 @@
+#pragma once
+#include "stdafx.h"
+#include <assert.h>
+#include <numeric>
+
+class LowPassFilter
+{
+private:
+	uint8_t _prevSampleCounter = 0;
+	int16_t _prevSamplesLeft[10] = { 0,0,0,0,0,0,0,0,0,0 };
+	int16_t _prevSamplesRight[10] = { 0,0,0,0,0,0,0,0,0,0 };
+
+	void UpdateSample(int16_t *buffer, size_t index, int strength, double volume, int16_t *_prevSamples)
+	{
+		if(strength > 0) {
+			int32_t sum = std::accumulate(_prevSamples, _prevSamples + strength, 0);
+			buffer[index] = (int16_t)((sum + buffer[index]) / (strength + 1) * volume);
+			_prevSamples[_prevSampleCounter] = buffer[index];
+		} else {
+			buffer[index] = (int16_t)(buffer[index] * volume);
+		}
+	}
+
+public:
+	void ApplyFilter(int16_t *buffer, size_t sampleCount, int strength, double volume = 1.0f)
+	{
+		assert(strength <= 10);
+
+		for(size_t i = 0; i < sampleCount*2; i+=2) {
+			UpdateSample(buffer, i, strength, volume, _prevSamplesLeft);
+			UpdateSample(buffer, i+1, strength, volume, _prevSamplesRight);
+			if(strength > 0) {
+				_prevSampleCounter = (_prevSampleCounter + 1) % strength;
+			}
+		}
+	}
+};
\ No newline at end of file
diff --git a/Utilities/PNGHelper.cpp b/Utilities/PNGHelper.cpp
new file mode 100644
index 0000000..eaf6cf9
--- /dev/null
+++ b/Utilities/PNGHelper.cpp
@@ -0,0 +1,612 @@
+#include "stdafx.h"
+#include <sstream>
+#include "PNGHelper.h"
+#include "miniz.h"
+
+bool PNGHelper::WritePNG(std::stringstream &stream, uint32_t* buffer, uint32_t xSize, uint32_t ySize, uint32_t bitsPerPixel)
+{
+	size_t pngSize = 0;
+
+	//ARGB -> ABGR
+	uint32_t size = xSize * ySize * bitsPerPixel / 8 / 4;
+	for(uint32_t i = 0; i < size; i++) {
+		buffer[i] = (buffer[i] & 0xFF00FF00) | ((buffer[i] & 0xFF0000) >> 16) | ((buffer[i] & 0xFF) << 16);
+	}
+
+	void *pngData = tdefl_write_image_to_png_file_in_memory_ex(buffer, xSize, ySize, bitsPerPixel / 8, &pngSize, MZ_DEFAULT_LEVEL, MZ_FALSE);
+	if(!pngData) {
+		std::cout << "tdefl_write_image_to_png_file_in_memory_ex() failed!" << std::endl;
+		return false;
+	} else {
+		stream.write((char*)pngData, pngSize);
+		mz_free(pngData);
+		return true;
+	}
+}
+
+bool PNGHelper::WritePNG(string filename, uint32_t* buffer, uint32_t xSize, uint32_t ySize, uint32_t bitsPerPixel)
+{
+	std::stringstream stream;
+	if(WritePNG(stream, buffer, xSize, ySize, bitsPerPixel)) {
+		ofstream file(filename, std::ios::out | std::ios::binary);
+		if(file.good()) {
+			file << stream.rdbuf();
+		}
+		file.close();
+		return true;
+	}
+	return false;
+}
+
+bool PNGHelper::ReadPNG(vector<uint8_t> input, vector<uint8_t> &output, uint32_t &pngWidth, uint32_t &pngHeight)
+{
+	unsigned long width = 0;
+	unsigned long height = 0;
+
+	pngWidth = 0;
+	pngHeight = 0;
+
+	if(DecodePNG(output, width, height, input.data(), input.size()) == 0) {
+		uint32_t *pngDataPtr = (uint32_t*)output.data();
+		for(size_t i = 0, len = output.size() / 4; i < len; i++) {
+			//ABGR to ARGB
+			pngDataPtr[i] = (pngDataPtr[i] & 0xFF00FF00) | ((pngDataPtr[i] & 0xFF0000) >> 16) | ((pngDataPtr[i] & 0xFF) << 16);
+		}
+		pngWidth = width;
+		pngHeight = height;
+
+		return true;
+	} else {
+		return false;
+	}
+} 
+
+bool PNGHelper::ReadPNG(string filename, vector<uint8_t> &pngData, uint32_t &pngWidth, uint32_t &pngHeight)
+{
+	pngWidth = 0;
+	pngHeight = 0;
+
+	ifstream pngFile(filename, std::ios::in | std::ios::binary);
+	if(pngFile) {
+		pngFile.seekg(0, std::ios::end);
+		size_t fileSize = (size_t)pngFile.tellg();
+		pngFile.seekg(0, std::ios::beg);
+
+		vector<uint8_t> fileData(fileSize, 0);
+		pngFile.read((char*)fileData.data(), fileData.size());
+		return ReadPNG(fileData, pngData, pngWidth, pngHeight);
+	}
+
+	return false;
+}
+
+/*
+decodePNG: The picoPNG function, decodes a PNG file buffer in memory, into a raw pixel buffer.
+out_image: output parameter, this will contain the raw pixels after decoding.
+  By default the output is 32-bit RGBA color.
+  The std::vector is automatically resized to the correct size.
+image_width: output_parameter, this will contain the width of the image in pixels.
+image_height: output_parameter, this will contain the height of the image in pixels.
+in_png: pointer to the buffer of the PNG file in memory. To get it from a file on
+  disk, load it and store it in a memory buffer yourself first.
+in_size: size of the input PNG file in bytes.
+convert_to_rgba32: optional parameter, true by default.
+  Set to true to get the output in RGBA 32-bit (8 bit per channel) color format
+  no matter what color type the original PNG image had. This gives predictable,
+  useable data from any random input PNG.
+  Set to false to do no color conversion at all. The result then has the same data
+  type as the PNG image, which can range from 1 bit to 64 bits per pixel.
+  Information about the color type or palette colors are not provided. You need
+  to know this information yourself to be able to use the data so this only
+  works for trusted PNG files. Use LodePNG instead of picoPNG if you need this information.
+return: 0 if success, not 0 if some error occured.
+*/
+int PNGHelper::DecodePNG(vector<unsigned char>& out_image, unsigned long& image_width, unsigned long& image_height, const unsigned char* in_png, size_t in_size, bool convert_to_rgba32)
+{
+  // picoPNG version 20101224
+  // Copyright (c) 2005-2010 Lode Vandevenne
+  //
+  // This software is provided 'as-is', without any express or implied
+  // warranty. In no event will the authors be held liable for any damages
+  // arising from the use of this software.
+  //
+  // Permission is granted to anyone to use this software for any purpose,
+  // including commercial applications, and to alter it and redistribute it
+  // freely, subject to the following restrictions:
+  //
+  //     1. The origin of this software must not be misrepresented; you must not
+  //     claim that you wrote the original software. If you use this software
+  //     in a product, an acknowledgment in the product documentation would be
+  //     appreciated but is not required.
+  //     2. Altered source versions must be plainly marked as such, and must not be
+  //     misrepresented as being the original software.
+  //     3. This notice may not be removed or altered from any source distribution.
+  
+  // picoPNG is a PNG decoder in one C++ function of around 500 lines. Use picoPNG for
+  // programs that need only 1 .cpp file. Since it's a single function, it's very limited,
+  // it can convert a PNG to raw pixel data either converted to 32-bit RGBA color or
+  // with no color conversion at all. For anything more complex, another tiny library
+  // is available: LodePNG (lodepng.c(pp)), which is a single source and header file.
+  // Apologies for the compact code style, it's to make this tiny.
+  
+  static const unsigned long LENBASE[29] =  {3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258};
+  static const unsigned long LENEXTRA[29] = {0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,  4,  5,  5,  5,  5,  0};
+  static const unsigned long DISTBASE[30] =  {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};
+  static const unsigned long DISTEXTRA[30] = {0,0,0,0,1,1,2, 2, 3, 3, 4, 4, 5, 5,  6,  6,  7,  7,  8,  8,   9,   9,  10,  10,  11,  11,  12,   12,   13,   13};
+  static const unsigned long CLCL[19] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; //code length code lengths
+  struct Zlib //nested functions for zlib decompression
+  {
+    static unsigned long readBitFromStream(size_t& bitp, const unsigned char* bits) { unsigned long result = (bits[bitp >> 3] >> (bitp & 0x7)) & 1; bitp++; return result;}
+    static unsigned long readBitsFromStream(size_t& bitp, const unsigned char* bits, size_t nbits)
+    {
+      unsigned long result = 0;
+      for(size_t i = 0; i < nbits; i++) result += (readBitFromStream(bitp, bits)) << i;
+      return result;
+    }
+    struct HuffmanTree
+    {
+      int makeFromLengths(const std::vector<unsigned long>& bitlen, unsigned long maxbitlen)
+      { //make tree given the lengths
+        unsigned long numcodes = (unsigned long)(bitlen.size()), treepos = 0, nodefilled = 0;
+        std::vector<unsigned long> tree1d(numcodes), blcount(maxbitlen + 1, 0), nextcode(maxbitlen + 1, 0);
+        for(unsigned long bits = 0; bits < numcodes; bits++) blcount[bitlen[bits]]++; //count number of instances of each code length
+        for(unsigned long bits = 1; bits <= maxbitlen; bits++) nextcode[bits] = (nextcode[bits - 1] + blcount[bits - 1]) << 1;
+        for(unsigned long n = 0; n < numcodes; n++) if(bitlen[n] != 0) tree1d[n] = nextcode[bitlen[n]]++; //generate all the codes
+        tree2d.clear(); tree2d.resize(numcodes * 2, 32767); //32767 here means the tree2d isn't filled there yet
+        for(unsigned long n = 0; n < numcodes; n++) //the codes
+        for(unsigned long i = 0; i < bitlen[n]; i++) //the bits for this code
+        {
+          unsigned long bit = (tree1d[n] >> (bitlen[n] - i - 1)) & 1;
+          if(treepos > numcodes - 2) return 55;
+          if(tree2d[2 * treepos + bit] == 32767) //not yet filled in
+          {
+            if(i + 1 == bitlen[n]) { tree2d[2 * treepos + bit] = n; treepos = 0; } //last bit
+            else { tree2d[2 * treepos + bit] = ++nodefilled + numcodes; treepos = nodefilled; } //addresses are encoded as values > numcodes
+          }
+          else treepos = tree2d[2 * treepos + bit] - numcodes; //subtract numcodes from address to get address value
+        }
+        return 0;
+      }
+      int decode(bool& decoded, unsigned long& result, size_t& treepos, unsigned long bit) const
+      { //Decodes a symbol from the tree
+        unsigned long numcodes = (unsigned long)tree2d.size() / 2;
+        if(treepos >= numcodes) return 11; //error: you appeared outside the codetree
+        result = tree2d[2 * treepos + bit];
+        decoded = (result < numcodes);
+        treepos = decoded ? 0 : result - numcodes;
+        return 0;
+      }
+      std::vector<unsigned long> tree2d; //2D representation of a huffman tree: The one dimension is "0" or "1", the other contains all nodes and leaves of the tree.
+    };
+    struct Inflator
+    {
+      int error;
+      void inflate(std::vector<unsigned char>& out, const std::vector<unsigned char>& in, size_t inpos = 0)
+      {
+        size_t bp = 0, pos = 0; //bit pointer and byte pointer
+        error = 0;
+        unsigned long BFINAL = 0;
+        while(!BFINAL && !error)
+        {
+          if(bp >> 3 >= in.size()) { error = 52; return; } //error, bit pointer will jump past memory
+          BFINAL = readBitFromStream(bp, &in[inpos]);
+          unsigned long BTYPE = readBitFromStream(bp, &in[inpos]); BTYPE += 2 * readBitFromStream(bp, &in[inpos]);
+          if(BTYPE == 3) { error = 20; return; } //error: invalid BTYPE
+          else if(BTYPE == 0) inflateNoCompression(out, &in[inpos], bp, pos, in.size());
+          else inflateHuffmanBlock(out, &in[inpos], bp, pos, in.size(), BTYPE);
+        }
+        if(!error) out.resize(pos); //Only now we know the true size of out, resize it to that
+      }
+      void generateFixedTrees(HuffmanTree& tree, HuffmanTree& treeD) //get the tree of a deflated block with fixed tree
+      {
+        std::vector<unsigned long> bitlen(288, 8), bitlenD(32, 5);;
+        for(size_t i = 144; i <= 255; i++) bitlen[i] = 9;
+        for(size_t i = 256; i <= 279; i++) bitlen[i] = 7;
+        tree.makeFromLengths(bitlen, 15);
+        treeD.makeFromLengths(bitlenD, 15);
+      }
+      HuffmanTree codetree, codetreeD, codelengthcodetree; //the code tree for Huffman codes, dist codes, and code length codes
+      unsigned long huffmanDecodeSymbol(const unsigned char* in, size_t& bp, const HuffmanTree& codetree, size_t inlength)
+      { //decode a single symbol from given list of bits with given code tree. return value is the symbol
+        bool decoded; unsigned long ct;
+        for(size_t treepos = 0;;)
+        {
+          if((bp & 0x07) == 0 && (bp >> 3) > inlength) { error = 10; return 0; } //error: end reached without endcode
+          error = codetree.decode(decoded, ct, treepos, readBitFromStream(bp, in)); if(error) return 0; //stop, an error happened
+          if(decoded) return ct;
+        }
+      }
+      void getTreeInflateDynamic(HuffmanTree& tree, HuffmanTree& treeD, const unsigned char* in, size_t& bp, size_t inlength)
+      { //get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree
+        std::vector<unsigned long> bitlen(288, 0), bitlenD(32, 0);
+        if(bp >> 3 >= inlength - 2) { error = 49; return; } //the bit pointer is or will go past the memory
+        size_t HLIT =  readBitsFromStream(bp, in, 5) + 257; //number of literal/length codes + 257
+        size_t HDIST = readBitsFromStream(bp, in, 5) + 1; //number of dist codes + 1
+        size_t HCLEN = readBitsFromStream(bp, in, 4) + 4; //number of code length codes + 4
+        std::vector<unsigned long> codelengthcode(19); //lengths of tree to decode the lengths of the dynamic tree
+        for(size_t i = 0; i < 19; i++) codelengthcode[CLCL[i]] = (i < HCLEN) ? readBitsFromStream(bp, in, 3) : 0;
+        error = codelengthcodetree.makeFromLengths(codelengthcode, 7); if(error) return;
+        size_t i = 0, replength;
+        while(i < HLIT + HDIST)
+        {
+          unsigned long code = huffmanDecodeSymbol(in, bp, codelengthcodetree, inlength); if(error) return;
+          if(code <= 15)  { if(i < HLIT) bitlen[i++] = code; else bitlenD[i++ - HLIT] = code; } //a length code
+          else if(code == 16) //repeat previous
+          {
+            if(bp >> 3 >= inlength) { error = 50; return; } //error, bit pointer jumps past memory
+            replength = 3 + readBitsFromStream(bp, in, 2);
+            unsigned long value; //set value to the previous code
+            if((i - 1) < HLIT) value = bitlen[i - 1];
+            else value = bitlenD[i - HLIT - 1];
+            for(size_t n = 0; n < replength; n++) //repeat this value in the next lengths
+            {
+              if(i >= HLIT + HDIST) { error = 13; return; } //error: i is larger than the amount of codes
+              if(i < HLIT) bitlen[i++] = value; else bitlenD[i++ - HLIT] = value;
+            }
+          }
+          else if(code == 17) //repeat "0" 3-10 times
+          {
+            if(bp >> 3 >= inlength) { error = 50; return; } //error, bit pointer jumps past memory
+            replength = 3 + readBitsFromStream(bp, in, 3);
+            for(size_t n = 0; n < replength; n++) //repeat this value in the next lengths
+            {
+              if(i >= HLIT + HDIST) { error = 14; return; } //error: i is larger than the amount of codes
+              if(i < HLIT) bitlen[i++] = 0; else bitlenD[i++ - HLIT] = 0;
+            }
+          }
+          else if(code == 18) //repeat "0" 11-138 times
+          {
+            if(bp >> 3 >= inlength) { error = 50; return; } //error, bit pointer jumps past memory
+            replength = 11 + readBitsFromStream(bp, in, 7);
+            for(size_t n = 0; n < replength; n++) //repeat this value in the next lengths
+            {
+              if(i >= HLIT + HDIST) { error = 15; return; } //error: i is larger than the amount of codes
+              if(i < HLIT) bitlen[i++] = 0; else bitlenD[i++ - HLIT] = 0;
+            }
+          }
+          else { error = 16; return; } //error: somehow an unexisting code appeared. This can never happen.
+        }
+        if(bitlen[256] == 0) { error = 64; return; } //the length of the end code 256 must be larger than 0
+        error = tree.makeFromLengths(bitlen, 15); if(error) return; //now we've finally got HLIT and HDIST, so generate the code trees, and the function is done
+        error = treeD.makeFromLengths(bitlenD, 15); if(error) return;
+      }
+      void inflateHuffmanBlock(std::vector<unsigned char>& out, const unsigned char* in, size_t& bp, size_t& pos, size_t inlength, unsigned long btype) 
+      {
+        if(btype == 1) { generateFixedTrees(codetree, codetreeD); }
+        else if(btype == 2) { getTreeInflateDynamic(codetree, codetreeD, in, bp, inlength); if(error) return; }
+        for(;;)
+        {
+          unsigned long code = huffmanDecodeSymbol(in, bp, codetree, inlength); if(error) return;
+          if(code == 256) return; //end code
+          else if(code <= 255) //literal symbol
+          {
+            if(pos >= out.size()) out.resize((pos + 1) * 2); //reserve more room
+            out[pos++] = (unsigned char)(code);
+          }
+          else if(code >= 257 && code <= 285) //length code
+          {
+            size_t length = LENBASE[code - 257], numextrabits = LENEXTRA[code - 257];
+            if((bp >> 3) >= inlength) { error = 51; return; } //error, bit pointer will jump past memory
+            length += readBitsFromStream(bp, in, numextrabits);
+            unsigned long codeD = huffmanDecodeSymbol(in, bp, codetreeD, inlength); if(error) return;
+            if(codeD > 29) { error = 18; return; } //error: invalid dist code (30-31 are never used)
+            unsigned long dist = DISTBASE[codeD], numextrabitsD = DISTEXTRA[codeD];
+            if((bp >> 3) >= inlength) { error = 51; return; } //error, bit pointer will jump past memory
+            dist += readBitsFromStream(bp, in, numextrabitsD);
+            size_t start = pos, back = start - dist; //backwards
+            if(pos + length >= out.size()) out.resize((pos + length) * 2); //reserve more room
+            for(size_t i = 0; i < length; i++) { out[pos++] = out[back++]; if(back >= start) back = start - dist; }
+          }
+        }
+      }
+      void inflateNoCompression(std::vector<unsigned char>& out, const unsigned char* in, size_t& bp, size_t& pos, size_t inlength)
+      {
+        while((bp & 0x7) != 0) bp++; //go to first boundary of byte
+        size_t p = bp / 8;
+        if(p >= inlength - 4) { error = 52; return; } //error, bit pointer will jump past memory
+        unsigned long LEN = in[p] + 256 * in[p + 1], NLEN = in[p + 2] + 256 * in[p + 3]; p += 4;
+        if(LEN + NLEN != 65535) { error = 21; return; } //error: NLEN is not one's complement of LEN
+        if(pos + LEN >= out.size()) out.resize(pos + LEN);
+        if(p + LEN > inlength) { error = 23; return; } //error: reading outside of in buffer
+        for(unsigned long n = 0; n < LEN; n++) out[pos++] = in[p++]; //read LEN bytes of literal data
+        bp = p * 8;
+      }
+    };
+    int decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in) //returns error value
+    {
+      Inflator inflator;
+      if(in.size() < 2) { return 53; } //error, size of zlib data too small
+      if((in[0] * 256 + in[1]) % 31 != 0) { return 24; } //error: 256 * in[0] + in[1] must be a multiple of 31, the FCHECK value is supposed to be made that way
+      unsigned long CM = in[0] & 15, CINFO = (in[0] >> 4) & 15, FDICT = (in[1] >> 5) & 1;
+      if(CM != 8 || CINFO > 7) { return 25; } //error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec
+      if(FDICT != 0) { return 26; } //error: the specification of PNG says about the zlib stream: "The additional flags shall not specify a preset dictionary."
+      inflator.inflate(out, in, 2);
+      return inflator.error; //note: adler32 checksum was skipped and ignored
+    }
+  };
+  struct PNG //nested functions for PNG decoding
+  {
+    struct Info
+    {
+      unsigned long width, height, colorType, bitDepth, compressionMethod, filterMethod, interlaceMethod, key_r, key_g, key_b;
+      bool key_defined; //is a transparent color key given?
+      std::vector<unsigned char> palette;
+    } info;
+    int error;
+    void decode(std::vector<unsigned char>& out, const unsigned char* in, size_t size, bool convert_to_rgba32)
+    {
+      error = 0;
+      if(size == 0 || in == 0) { error = 48; return; } //the given data is empty
+      readPngHeader(&in[0], size); if(error) return;
+      size_t pos = 33; //first byte of the first chunk after the header
+      std::vector<unsigned char> idat; //the data from idat chunks
+      bool IEND = false;
+      info.key_defined = false;
+      while(!IEND) //loop through the chunks, ignoring unknown chunks and stopping at IEND chunk. IDAT data is put at the start of the in buffer
+      {
+        if(pos + 8 >= size) { error = 30; return; } //error: size of the in buffer too small to contain next chunk
+        size_t chunkLength = read32bitInt(&in[pos]); pos += 4;
+        if(chunkLength > 2147483647) { error = 63; return; }
+        if(pos + chunkLength >= size) { error = 35; return; } //error: size of the in buffer too small to contain next chunk
+        if(in[pos + 0] == 'I' && in[pos + 1] == 'D' && in[pos + 2] == 'A' && in[pos + 3] == 'T') //IDAT chunk, containing compressed image data
+        {
+          idat.insert(idat.end(), &in[pos + 4], &in[pos + 4 + chunkLength]);
+          pos += (4 + chunkLength);
+        }
+        else if(in[pos + 0] == 'I' && in[pos + 1] == 'E' && in[pos + 2] == 'N' && in[pos + 3] == 'D')  { pos += 4; IEND = true; }
+        else if(in[pos + 0] == 'P' && in[pos + 1] == 'L' && in[pos + 2] == 'T' && in[pos + 3] == 'E') //palette chunk (PLTE)
+        {
+          pos += 4; //go after the 4 letters
+          info.palette.resize(4 * (chunkLength / 3));
+          if(info.palette.size() > (4 * 256)) { error = 38; return; } //error: palette too big
+          for(size_t i = 0; i < info.palette.size(); i += 4)
+          {
+            for(size_t j = 0; j < 3; j++) info.palette[i + j] = in[pos++]; //RGB
+            info.palette[i + 3] = 255; //alpha
+          }
+        }
+        else if(in[pos + 0] == 't' && in[pos + 1] == 'R' && in[pos + 2] == 'N' && in[pos + 3] == 'S') //palette transparency chunk (tRNS)
+        {
+          pos += 4; //go after the 4 letters
+          if(info.colorType == 3)
+          {
+            if(4 * chunkLength > info.palette.size()) { error = 39; return; } //error: more alpha values given than there are palette entries
+            for(size_t i = 0; i < chunkLength; i++) info.palette[4 * i + 3] = in[pos++];
+          }
+          else if(info.colorType == 0)
+          {
+            if(chunkLength != 2) { error = 40; return; } //error: this chunk must be 2 bytes for greyscale image
+            info.key_defined = 1; info.key_r = info.key_g = info.key_b = 256 * in[pos] + in[pos + 1]; pos += 2;
+          }
+          else if(info.colorType == 2)
+          {
+            if(chunkLength != 6) { error = 41; return; } //error: this chunk must be 6 bytes for RGB image
+            info.key_defined = 1;
+            info.key_r = 256 * in[pos] + in[pos + 1]; pos += 2;
+            info.key_g = 256 * in[pos] + in[pos + 1]; pos += 2;
+            info.key_b = 256 * in[pos] + in[pos + 1]; pos += 2;
+          }
+          else { error = 42; return; } //error: tRNS chunk not allowed for other color models
+        }
+        else //it's not an implemented chunk type, so ignore it: skip over the data
+        {
+          if(!(in[pos + 0] & 32)) { error = 69; return; } //error: unknown critical chunk (5th bit of first byte of chunk type is 0)
+          pos += (chunkLength + 4); //skip 4 letters and uninterpreted data of unimplemented chunk
+        }
+        pos += 4; //step over CRC (which is ignored)
+      }
+      unsigned long bpp = getBpp(info);
+      std::vector<unsigned char> scanlines(((info.width * (info.height * bpp + 7)) / 8) + info.height); //now the out buffer will be filled
+      Zlib zlib; //decompress with the Zlib decompressor
+      error = zlib.decompress(scanlines, idat); if(error) return; //stop if the zlib decompressor returned an error
+      size_t bytewidth = (bpp + 7) / 8, outlength = (info.height * info.width * bpp + 7) / 8;
+      out.resize(outlength); //time to fill the out buffer
+      unsigned char* out_ = outlength ? &out[0] : 0; //use a regular pointer to the std::vector for faster code if compiled without optimization
+      if(info.interlaceMethod == 0) //no interlace, just filter
+      {
+        size_t linestart = 0, linelength = (info.width * bpp + 7) / 8; //length in bytes of a scanline, excluding the filtertype byte
+        if(bpp >= 8) //byte per byte
+        for(unsigned long y = 0; y < info.height; y++)
+        {
+          unsigned long filterType = scanlines[linestart];
+          const unsigned char* prevline = (y == 0) ? 0 : &out_[(y - 1) * info.width * bytewidth];
+          unFilterScanline(&out_[linestart - y], &scanlines[linestart + 1], prevline, bytewidth, filterType,  linelength); if(error) return;
+          linestart += (1 + linelength); //go to start of next scanline
+        }
+        else //less than 8 bits per pixel, so fill it up bit per bit
+        {
+          std::vector<unsigned char> templine((info.width * bpp + 7) >> 3); //only used if bpp < 8
+          for(size_t y = 0, obp = 0; y < info.height; y++)
+          {
+            unsigned long filterType = scanlines[linestart];
+            const unsigned char* prevline = (y == 0) ? 0 : &out_[(y - 1) * info.width * bytewidth];
+            unFilterScanline(&templine[0], &scanlines[linestart + 1], prevline, bytewidth, filterType, linelength); if(error) return;
+            for(size_t bp = 0; bp < info.width * bpp;) setBitOfReversedStream(obp, out_, readBitFromReversedStream(bp, &templine[0]));
+            linestart += (1 + linelength); //go to start of next scanline
+          }
+        }
+      }
+      else //interlaceMethod is 1 (Adam7)
+      {
+        size_t passw[7] = { (info.width + 7) / 8, (info.width + 3) / 8, (info.width + 3) / 4, (info.width + 1) / 4, (info.width + 1) / 2, (info.width + 0) / 2, (info.width + 0) / 1 };
+        size_t passh[7] = { (info.height + 7) / 8, (info.height + 7) / 8, (info.height + 3) / 8, (info.height + 3) / 4, (info.height + 1) / 4, (info.height + 1) / 2, (info.height + 0) / 2 };
+        size_t passstart[7] = {0};
+        size_t pattern[28] = {0,4,0,2,0,1,0,0,0,4,0,2,0,1,8,8,4,4,2,2,1,8,8,8,4,4,2,2}; //values for the adam7 passes
+        for(int i = 0; i < 6; i++) passstart[i + 1] = passstart[i] + passh[i] * ((passw[i] ? 1 : 0) + (passw[i] * bpp + 7) / 8);
+        std::vector<unsigned char> scanlineo((info.width * bpp + 7) / 8), scanlinen((info.width * bpp + 7) / 8); //"old" and "new" scanline
+        for(int i = 0; i < 7; i++)
+          adam7Pass(&out_[0], &scanlinen[0], &scanlineo[0], &scanlines[passstart[i]], info.width, pattern[i], pattern[i + 7], pattern[i + 14], pattern[i + 21], passw[i], passh[i], bpp);
+      }
+      if(convert_to_rgba32 && (info.colorType != 6 || info.bitDepth != 8)) //conversion needed
+      {
+        std::vector<unsigned char> data = out;
+        error = convert(out, &data[0], info, info.width, info.height);
+      }
+    }
+    void readPngHeader(const unsigned char* in, size_t inlength) //read the information from the header and store it in the Info
+    {
+      if(inlength < 29) { error = 27; return; } //error: the data length is smaller than the length of the header
+      if(in[0] != 137 || in[1] != 80 || in[2] != 78 || in[3] != 71 || in[4] != 13 || in[5] != 10 || in[6] != 26 || in[7] != 10) { error = 28; return; } //no PNG signature
+      if(in[12] != 'I' || in[13] != 'H' || in[14] != 'D' || in[15] != 'R') { error = 29; return; } //error: it doesn't start with a IHDR chunk!
+      info.width = read32bitInt(&in[16]); info.height = read32bitInt(&in[20]);
+      info.bitDepth = in[24]; info.colorType = in[25];
+      info.compressionMethod = in[26]; if(in[26] != 0) { error = 32; return; } //error: only compression method 0 is allowed in the specification
+      info.filterMethod = in[27]; if(in[27] != 0) { error = 33; return; } //error: only filter method 0 is allowed in the specification
+      info.interlaceMethod = in[28]; if(in[28] > 1) { error = 34; return; } //error: only interlace methods 0 and 1 exist in the specification
+      error = checkColorValidity(info.colorType, info.bitDepth);
+    }
+    void unFilterScanline(unsigned char* recon, const unsigned char* scanline, const unsigned char* precon, size_t bytewidth, unsigned long filterType, size_t length)
+    {
+      switch(filterType)
+      {
+        case 0: for(size_t i = 0; i < length; i++) recon[i] = scanline[i]; break;
+        case 1:
+          for(size_t i =         0; i < bytewidth; i++) recon[i] = scanline[i];
+          for(size_t i = bytewidth; i <    length; i++) recon[i] = scanline[i] + recon[i - bytewidth];
+          break;
+        case 2:
+          if(precon) for(size_t i = 0; i < length; i++) recon[i] = scanline[i] + precon[i];
+          else       for(size_t i = 0; i < length; i++) recon[i] = scanline[i];
+          break;
+        case 3:
+          if(precon)
+          {
+            for(size_t i =         0; i < bytewidth; i++) recon[i] = scanline[i] + precon[i] / 2;
+            for(size_t i = bytewidth; i <    length; i++) recon[i] = scanline[i] + ((recon[i - bytewidth] + precon[i]) / 2);
+          }
+          else
+          {
+            for(size_t i =         0; i < bytewidth; i++) recon[i] = scanline[i];
+            for(size_t i = bytewidth; i <    length; i++) recon[i] = scanline[i] + recon[i - bytewidth] / 2;
+          }
+          break;
+        case 4:
+          if(precon)
+          {
+            for(size_t i =         0; i < bytewidth; i++) recon[i] = scanline[i] + paethPredictor(0, precon[i], 0);
+            for(size_t i = bytewidth; i <    length; i++) recon[i] = scanline[i] + paethPredictor(recon[i - bytewidth], precon[i], precon[i - bytewidth]);
+          }
+          else
+          {
+            for(size_t i =         0; i < bytewidth; i++) recon[i] = scanline[i];
+            for(size_t i = bytewidth; i <    length; i++) recon[i] = scanline[i] + paethPredictor(recon[i - bytewidth], 0, 0);
+          }
+          break;
+        default: error = 36; return; //error: unexisting filter type given
+      }
+    }
+    void adam7Pass(unsigned char* out, unsigned char* linen, unsigned char* lineo, const unsigned char* in, unsigned long w, size_t passleft, size_t passtop, size_t spacex, size_t spacey, size_t passw, size_t passh, unsigned long bpp)
+    { //filter and reposition the pixels into the output when the image is Adam7 interlaced. This function can only do it after the full image is already decoded. The out buffer must have the correct allocated memory size already.
+      if(passw == 0) return;
+      size_t bytewidth = (bpp + 7) / 8, linelength = 1 + ((bpp * passw + 7) / 8);
+      for(unsigned long y = 0; y < passh; y++)
+      {
+        unsigned char filterType = in[y * linelength], *prevline = (y == 0) ? 0 : lineo;
+        unFilterScanline(linen, &in[y * linelength + 1], prevline, bytewidth, filterType, (w * bpp + 7) / 8); if(error) return;
+        if(bpp >= 8) for(size_t i = 0; i < passw; i++) for(size_t b = 0; b < bytewidth; b++) //b = current byte of this pixel
+          out[bytewidth * w * (passtop + spacey * y) + bytewidth * (passleft + spacex * i) + b] = linen[bytewidth * i + b];
+        else for(size_t i = 0; i < passw; i++)
+        {
+          size_t obp = bpp * w * (passtop + spacey * y) + bpp * (passleft + spacex * i), bp = i * bpp;
+          for(size_t b = 0; b < bpp; b++) setBitOfReversedStream(obp, out, readBitFromReversedStream(bp, &linen[0]));
+        }
+        unsigned char* temp = linen; linen = lineo; lineo = temp; //swap the two buffer pointers "line old" and "line new"
+      }
+    }
+    static unsigned long readBitFromReversedStream(size_t& bitp, const unsigned char* bits) { unsigned long result = (bits[bitp >> 3] >> (7 - (bitp & 0x7))) & 1; bitp++; return result;}
+    static unsigned long readBitsFromReversedStream(size_t& bitp, const unsigned char* bits, unsigned long nbits)
+    {
+      unsigned long result = 0;
+      for(size_t i = nbits - 1; i < nbits; i--) result += ((readBitFromReversedStream(bitp, bits)) << i);
+      return result;
+    }
+    void setBitOfReversedStream(size_t& bitp, unsigned char* bits, unsigned long bit) { bits[bitp >> 3] |=  (bit << (7 - (bitp & 0x7))); bitp++; }
+    unsigned long read32bitInt(const unsigned char* buffer) { return (buffer[0] << 24) | (buffer[1] << 16) | (buffer[2] << 8) | buffer[3]; }
+    int checkColorValidity(unsigned long colorType, unsigned long bd) //return type is a LodePNG error code
+    {
+      if((colorType == 2 || colorType == 4 || colorType == 6)) { if(!(bd == 8 || bd == 16)) return 37; else return 0; }
+      else if(colorType == 0) { if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 || bd == 16)) return 37; else return 0; }
+      else if(colorType == 3) { if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8            )) return 37; else return 0; }
+      else return 31; //unexisting color type
+    }
+    unsigned long getBpp(const Info& info)
+    {
+      if(info.colorType == 2) return (3 * info.bitDepth);
+      else if(info.colorType >= 4) return (info.colorType - 2) * info.bitDepth;
+      else return info.bitDepth;
+    }
+    int convert(std::vector<unsigned char>& out, const unsigned char* in, Info& infoIn, unsigned long w, unsigned long h)
+    { //converts from any color type to 32-bit. return value = LodePNG error code
+      size_t numpixels = w * h, bp = 0;
+      out.resize(numpixels * 4);
+      unsigned char* out_ = out.empty() ? 0 : &out[0]; //faster if compiled without optimization
+      if(infoIn.bitDepth == 8 && infoIn.colorType == 0) //greyscale
+      for(size_t i = 0; i < numpixels; i++)
+      {
+        out_[4 * i + 0] = out_[4 * i + 1] = out_[4 * i + 2] = in[i];
+        out_[4 * i + 3] = (infoIn.key_defined && in[i] == infoIn.key_r) ? 0 : 255;
+      }
+      else if(infoIn.bitDepth == 8 && infoIn.colorType == 2) //RGB color
+      for(size_t i = 0; i < numpixels; i++)
+      {
+        for(size_t c = 0; c < 3; c++) out_[4 * i + c] = in[3 * i + c];
+        out_[4 * i + 3] = (infoIn.key_defined == 1 && in[3 * i + 0] == infoIn.key_r && in[3 * i + 1] == infoIn.key_g && in[3 * i + 2] == infoIn.key_b) ? 0 : 255;
+      }
+      else if(infoIn.bitDepth == 8 && infoIn.colorType == 3) //indexed color (palette)
+      for(size_t i = 0; i < numpixels; i++)
+      {
+        if(4U * in[i] >= infoIn.palette.size()) return 46;
+        for(size_t c = 0; c < 4; c++) out_[4 * i + c] = infoIn.palette[4 * in[i] + c]; //get rgb colors from the palette
+      }
+      else if(infoIn.bitDepth == 8 && infoIn.colorType == 4) //greyscale with alpha
+      for(size_t i = 0; i < numpixels; i++)
+      {
+        out_[4 * i + 0] = out_[4 * i + 1] = out_[4 * i + 2] = in[2 * i + 0];
+        out_[4 * i + 3] = in[2 * i + 1];
+      }
+      else if(infoIn.bitDepth == 8 && infoIn.colorType == 6) for(size_t i = 0; i < numpixels; i++) for(size_t c = 0; c < 4; c++) out_[4 * i + c] = in[4 * i + c]; //RGB with alpha
+      else if(infoIn.bitDepth == 16 && infoIn.colorType == 0) //greyscale
+      for(size_t i = 0; i < numpixels; i++)
+      {
+        out_[4 * i + 0] = out_[4 * i + 1] = out_[4 * i + 2] = in[2 * i];
+        out_[4 * i + 3] = (infoIn.key_defined && 256U * in[i] + in[i + 1] == infoIn.key_r) ? 0 : 255;
+      }
+      else if(infoIn.bitDepth == 16 && infoIn.colorType == 2) //RGB color
+      for(size_t i = 0; i < numpixels; i++)
+      {
+        for(size_t c = 0; c < 3; c++) out_[4 * i + c] = in[6 * i + 2 * c];
+        out_[4 * i + 3] = (infoIn.key_defined && 256U*in[6*i+0]+in[6*i+1] == infoIn.key_r && 256U*in[6*i+2]+in[6*i+3] == infoIn.key_g && 256U*in[6*i+4]+in[6*i+5] == infoIn.key_b) ? 0 : 255;
+      }
+      else if(infoIn.bitDepth == 16 && infoIn.colorType == 4) //greyscale with alpha
+      for(size_t i = 0; i < numpixels; i++)
+      {
+        out_[4 * i + 0] = out_[4 * i + 1] = out_[4 * i + 2] = in[4 * i]; //most significant byte
+        out_[4 * i + 3] = in[4 * i + 2];
+      }
+      else if(infoIn.bitDepth == 16 && infoIn.colorType == 6) for(size_t i = 0; i < numpixels; i++) for(size_t c = 0; c < 4; c++) out_[4 * i + c] = in[8 * i + 2 * c]; //RGB with alpha
+      else if(infoIn.bitDepth < 8 && infoIn.colorType == 0) //greyscale
+      for(size_t i = 0; i < numpixels; i++)
+      {
+        unsigned long value = (readBitsFromReversedStream(bp, in, infoIn.bitDepth) * 255) / ((1 << infoIn.bitDepth) - 1); //scale value from 0 to 255
+        out_[4 * i + 0] = out_[4 * i + 1] = out_[4 * i + 2] = (unsigned char)(value);
+        out_[4 * i + 3] = (infoIn.key_defined && value && ((1U << infoIn.bitDepth) - 1U) == infoIn.key_r && ((1U << infoIn.bitDepth) - 1U)) ? 0 : 255;
+      }
+      else if(infoIn.bitDepth < 8 && infoIn.colorType == 3) //palette
+      for(size_t i = 0; i < numpixels; i++)
+      {
+        unsigned long value = readBitsFromReversedStream(bp, in, infoIn.bitDepth);
+        if(4 * value >= infoIn.palette.size()) return 47;
+        for(size_t c = 0; c < 4; c++) out_[4 * i + c] = infoIn.palette[4 * value + c]; //get rgb colors from the palette
+      }
+      return 0;
+    }
+    unsigned char paethPredictor(short a, short b, short c) //Paeth predicter, used by PNG filter type 4
+    {
+      short p = a + b - c, pa = p > a ? (p - a) : (a - p), pb = p > b ? (p - b) : (b - p), pc = p > c ? (p - c) : (c - p);
+      return (unsigned char)((pa <= pb && pa <= pc) ? a : pb <= pc ? b : c);
+    }
+  };
+  PNG decoder = { }; decoder.decode(out_image, in_png, in_size, convert_to_rgba32);
+  image_width = decoder.info.width; image_height = decoder.info.height;
+  return decoder.error;
+}
\ No newline at end of file
diff --git a/Utilities/PNGHelper.h b/Utilities/PNGHelper.h
new file mode 100644
index 0000000..531a709
--- /dev/null
+++ b/Utilities/PNGHelper.h
@@ -0,0 +1,14 @@
+#pragma once
+#include "stdafx.h"
+
+class PNGHelper
+{
+private:
+	static int DecodePNG(vector<unsigned char>& out_image, unsigned long& image_width, unsigned long& image_height, const unsigned char* in_png, size_t in_size, bool convert_to_rgba32 = true);
+
+public:
+	static bool WritePNG(std::stringstream &stream, uint32_t* buffer, uint32_t xSize, uint32_t ySize, uint32_t bitsPerPixel = 32);
+	static bool WritePNG(string filename, uint32_t* buffer, uint32_t xSize, uint32_t ySize, uint32_t bitsPerPixel = 32);
+	static bool ReadPNG(string filename, vector<uint8_t> &pngData, uint32_t &pngWidth, uint32_t &pngHeight);
+	static bool ReadPNG(vector<uint8_t> input, vector<uint8_t> &output, uint32_t &pngWidth, uint32_t &pngHeight);
+};
\ No newline at end of file
diff --git a/Utilities/PlatformUtilities.cpp b/Utilities/PlatformUtilities.cpp
new file mode 100644
index 0000000..d129c07
--- /dev/null
+++ b/Utilities/PlatformUtilities.cpp
@@ -0,0 +1,45 @@
+#include "stdafx.h"
+#include "PlatformUtilities.h"
+
+#if !defined(LIBRETRO) && defined(_WIN32)
+#include <Windows.h>
+#endif
+
+bool PlatformUtilities::_highResTimerEnabled = false;
+
+void PlatformUtilities::DisableScreensaver()
+{
+	//Prevent screensaver/etc from starting while using the emulator
+	//DirectInput devices apparently do not always count as user input
+	#if !defined(LIBRETRO) && defined(_WIN32)
+	SetThreadExecutionState(ES_SYSTEM_REQUIRED | ES_DISPLAY_REQUIRED | ES_CONTINUOUS);
+	#endif
+}
+
+void PlatformUtilities::EnableScreensaver()
+{
+	#if !defined(LIBRETRO) && defined(_WIN32)
+	SetThreadExecutionState(ES_CONTINUOUS);
+	#endif
+}
+
+void PlatformUtilities::EnableHighResolutionTimer()
+{
+#if !defined(LIBRETRO) && defined(_WIN32)
+	//Request a 1ms timer resolution on Windows while a game is running
+	if(!_highResTimerEnabled) {
+		timeBeginPeriod(1);
+		_highResTimerEnabled = true;
+	}
+	#endif
+}
+
+void PlatformUtilities::RestoreTimerResolution()
+{
+	#if !defined(LIBRETRO) && defined(_WIN32)
+	if(_highResTimerEnabled) {
+		timeEndPeriod(1);
+		_highResTimerEnabled = false;
+	}
+	#endif
+}
\ No newline at end of file
diff --git a/Utilities/PlatformUtilities.h b/Utilities/PlatformUtilities.h
new file mode 100644
index 0000000..e5a0fd1
--- /dev/null
+++ b/Utilities/PlatformUtilities.h
@@ -0,0 +1,15 @@
+#pragma once
+#include "stdafx.h"
+
+class PlatformUtilities
+{
+private:
+	static bool _highResTimerEnabled;
+
+public:
+	static void DisableScreensaver();
+	static void EnableScreensaver();
+
+	static void EnableHighResolutionTimer();
+	static void RestoreTimerResolution();
+};
\ No newline at end of file
diff --git a/Utilities/RawCodec.h b/Utilities/RawCodec.h
new file mode 100644
index 0000000..ea32899
--- /dev/null
+++ b/Utilities/RawCodec.h
@@ -0,0 +1,51 @@
+#pragma once
+#include "stdafx.h"
+#include <cstring>
+#include "BaseCodec.h"
+
+class RawCodec : public BaseCodec
+{
+private:
+	int _width = 0;
+	int _height = 0;
+	uint32_t _bufferSize = 0;
+	uint8_t* _buffer = nullptr;
+
+public:
+	virtual bool SetupCompress(int width, int height, uint32_t compressionLevel) override
+	{
+		_height = height;
+		_width = width;
+
+		_bufferSize = width * height * 3;
+		_buffer = new uint8_t[(_bufferSize + 1) & ~1];
+		memset(_buffer, 0, (_bufferSize + 1) & ~1);
+
+		return true;
+	}
+
+	virtual int CompressFrame(bool isKeyFrame, uint8_t *frameData, uint8_t** compressedData) override
+	{
+		*compressedData = _buffer;
+
+		//Convert raw frame to BMP/DIB format (row order is reversed)
+		uint8_t* buffer = _buffer;
+		frameData += (_height - 1) * _width * 4;
+		for(int y = 0; y < _height; y++) {
+			for(int x = 0; x < _width; x++) {
+				buffer[0] = frameData[0];
+				buffer[1] = frameData[1];
+				buffer[2] = frameData[2];
+				frameData += 4;
+				buffer += 3;
+			}
+			frameData -= _width * 2 * 4;
+		}
+		return _bufferSize;
+	}
+
+	virtual const char* GetFourCC() override
+	{
+		return "\0\0\0\0";
+	}
+};
\ No newline at end of file
diff --git a/Utilities/SZReader.cpp b/Utilities/SZReader.cpp
new file mode 100644
index 0000000..71daade
--- /dev/null
+++ b/Utilities/SZReader.cpp
@@ -0,0 +1,90 @@
+#include "stdafx.h"
+#include <algorithm>
+#include <cstring>
+#include "SZReader.h"
+#include "UTF8Util.h"
+#include "../SevenZip/7zMemBuffer.h"
+
+SZReader::SZReader()
+{
+}
+
+SZReader::~SZReader()
+{
+	SzArEx_Free(&_archive, &_allocImp);
+}
+
+bool SZReader::InternalLoadArchive(void* buffer, size_t size)
+{
+	if(_initialized) {
+		SzArEx_Free(&_archive, &_allocImp);
+		_initialized = false;
+	}
+
+	ISzAlloc allocImp{ SzAlloc, SzFree };
+	ISzAlloc allocTempImp{ SzAllocTemp, SzFreeTemp };
+
+	MemBufferInit(&_memBufferStream, &_lookStream, buffer, size);
+	CrcGenerateTable();
+	SzArEx_Init(&_archive);
+
+	return !SzArEx_Open(&_archive, &_lookStream.s, &allocImp, &allocTempImp);
+}
+
+bool SZReader::ExtractFile(string filename, vector<uint8_t> &output)
+{
+	bool result = false;
+	if(_initialized) {
+		char16_t *utf16Filename = (char16_t*)SzAlloc(nullptr, 2000);
+
+		uint32_t blockIndex = 0xFFFFFFFF;
+		uint8_t *outBuffer = 0;
+		size_t outBufferSize = 0;
+
+		for(uint32_t i = 0; i < _archive.NumFiles; i++) {
+			size_t offset = 0;
+			size_t outSizeProcessed = 0;
+			unsigned isDir = SzArEx_IsDir(&_archive, i);
+			if(isDir) {
+				continue;
+			}
+
+			SzArEx_GetFileNameUtf16(&_archive, i, (uint16_t*)utf16Filename);
+			string entryName = utf8::utf8::encode(std::u16string(utf16Filename));
+			if(filename == entryName) {
+				WRes res = SzArEx_Extract(&_archive, &_lookStream.s, i, &blockIndex, &outBuffer, &outBufferSize, &offset, &outSizeProcessed, &_allocImp, &_allocTempImp);
+				if(res == SZ_OK) {
+					output = vector<uint8_t>(outBuffer+offset, outBuffer+offset+outSizeProcessed);
+					result = true;
+				}
+				IAlloc_Free(&_allocImp, outBuffer);
+				break;
+			}
+		}
+		SzFree(nullptr, utf16Filename);
+	}
+
+	return result;
+}
+
+vector<string> SZReader::InternalGetFileList()
+{
+	vector<string> filenames;
+	char16_t *utf16Filename = (char16_t*)SzAlloc(nullptr, 2000);
+
+	if(_initialized) {
+		for(uint32_t i = 0; i < _archive.NumFiles; i++) {
+			unsigned isDir = SzArEx_IsDir(&_archive, i);
+			if(isDir) {
+				continue;
+			}
+
+			SzArEx_GetFileNameUtf16(&_archive, i, (uint16_t*)utf16Filename);
+			string filename = utf8::utf8::encode(std::u16string(utf16Filename));
+			filenames.push_back(filename);
+		}
+	}
+	SzFree(nullptr, utf16Filename);
+
+	return filenames;
+}
\ No newline at end of file
diff --git a/Utilities/SZReader.h b/Utilities/SZReader.h
new file mode 100644
index 0000000..9c3634f
--- /dev/null
+++ b/Utilities/SZReader.h
@@ -0,0 +1,28 @@
+#pragma once
+#include "stdafx.h"
+#include "ArchiveReader.h"
+#include "../SevenZip/7z.h"
+#include "../SevenZip/7zAlloc.h"
+#include "../SevenZip/7zCrc.h"
+#include "../SevenZip/7zTypes.h"
+#include "../SevenZip/7zMemBuffer.h"
+
+class SZReader : public ArchiveReader
+{
+private:
+	CMemBufferInStream _memBufferStream;
+	CLookToRead _lookStream;
+	CSzArEx _archive;
+	ISzAlloc _allocImp{ SzAlloc, SzFree };
+	ISzAlloc _allocTempImp{ SzAllocTemp, SzFreeTemp };
+
+protected:
+	bool InternalLoadArchive(void* buffer, size_t size);
+	vector<string> InternalGetFileList();
+
+public:
+	SZReader();
+	virtual ~SZReader();
+
+	bool ExtractFile(string filename, vector<uint8_t> &output);
+};
\ No newline at end of file
diff --git a/Utilities/Scale2x/scale2x.cpp b/Utilities/Scale2x/scale2x.cpp
new file mode 100644
index 0000000..43f31b1
--- /dev/null
+++ b/Utilities/Scale2x/scale2x.cpp
@@ -0,0 +1,484 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * This file contains a C and MMX implementation of the Scale2x effect.
+ *
+ * You can find an high level description of the effect at :
+ *
+ * http://www.scale2x.it/
+ */
+
+#include "../stdafx.h"
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "scale2x.h"
+
+#include <assert.h>
+
+/***************************************************************************/
+/* Scale2x C implementation */
+
+/**
+ * Define the macro USE_SCALE_RANDOMWRITE to enable
+ * an optimized version which writes memory in random order.
+ * This version is a little faster if you write in system memory.
+ * But it's a lot slower if you write in video memory.
+ * So, enable it only if you are sure to never write directly in video memory.
+ */
+/* #define USE_SCALE_RANDOMWRITE */
+
+static inline void scale2x_8_def_border(scale2x_uint8* dst, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count)
+{
+	assert(count >= 2);
+
+	/* first pixel */
+	if (src0[0] != src2[0] && src1[0] != src1[1]) {
+		dst[0] = src1[0] == src0[0] ? src0[0] : src1[0];
+		dst[1] = src1[1] == src0[0] ? src0[0] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+	}
+	++src0;
+	++src1;
+	++src2;
+	dst += 2;
+
+	/* central pixels */
+	count -= 2;
+	while (count) {
+		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+			dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0];
+			dst[1] = src1[1] == src0[0] ? src0[0] : src1[0];
+		} else {
+			dst[0] = src1[0];
+			dst[1] = src1[0];
+		}
+
+		++src0;
+		++src1;
+		++src2;
+		dst += 2;
+		--count;
+	}
+
+	/* last pixel */
+	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
+		dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0];
+		dst[1] = src1[0] == src0[0] ? src0[0] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+	}
+}
+
+static inline void scale2x_8_def_center(scale2x_uint8* dst, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count)
+{
+	assert(count >= 2);
+
+	/* first pixel */
+	if (src0[0] != src2[0] && src1[0] != src1[1]) {
+		dst[0] = src1[0];
+		dst[1] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+	}
+	++src0;
+	++src1;
+	++src2;
+	dst += 2;
+
+	/* central pixels */
+	count -= 2;
+	while (count) {
+		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+			dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+			dst[1] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+		} else {
+			dst[0] = src1[0];
+			dst[1] = src1[0];
+		}
+
+		++src0;
+		++src1;
+		++src2;
+		dst += 2;
+		--count;
+	}
+
+	/* last pixel */
+	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
+		dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+		dst[1] = src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+	}
+}
+
+static inline void scale2x_16_def_border(scale2x_uint16* dst, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count)
+{
+	assert(count >= 2);
+
+	/* first pixel */
+	if (src0[0] != src2[0] && src1[0] != src1[1]) {
+		dst[0] = src1[0] == src0[0] ? src0[0] : src1[0];
+		dst[1] = src1[1] == src0[0] ? src0[0] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+	}
+	++src0;
+	++src1;
+	++src2;
+	dst += 2;
+
+	/* central pixels */
+	count -= 2;
+	while (count) {
+		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+			dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0];
+			dst[1] = src1[1] == src0[0] ? src0[0] : src1[0];
+		} else {
+			dst[0] = src1[0];
+			dst[1] = src1[0];
+		}
+
+		++src0;
+		++src1;
+		++src2;
+		dst += 2;
+		--count;
+	}
+
+	/* last pixel */
+	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
+		dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0];
+		dst[1] = src1[0] == src0[0] ? src0[0] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+	}
+}
+
+static inline void scale2x_16_def_center(scale2x_uint16* dst, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count)
+{
+	assert(count >= 2);
+
+	/* first pixel */
+	if (src0[0] != src2[0] && src1[0] != src1[1]) {
+		dst[0] = src1[0];
+		dst[1] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+	}
+	++src0;
+	++src1;
+	++src2;
+	dst += 2;
+
+	/* central pixels */
+	count -= 2;
+	while (count) {
+		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+			dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+			dst[1] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+		} else {
+			dst[0] = src1[0];
+			dst[1] = src1[0];
+		}
+
+		++src0;
+		++src1;
+		++src2;
+		dst += 2;
+		--count;
+	}
+
+	/* last pixel */
+	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
+		dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+		dst[1] = src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+	}
+}
+
+static inline void scale2x_32_def_border(scale2x_uint32* dst, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count)
+{
+	assert(count >= 2);
+
+	/* first pixel */
+	if (src0[0] != src2[0] && src1[0] != src1[1]) {
+		dst[0] = src1[0] == src0[0] ? src0[0] : src1[0];
+		dst[1] = src1[1] == src0[0] ? src0[0] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+	}
+	++src0;
+	++src1;
+	++src2;
+	dst += 2;
+
+	/* central pixels */
+	count -= 2;
+	while (count) {
+		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+			dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0];
+			dst[1] = src1[1] == src0[0] ? src0[0] : src1[0];
+		} else {
+			dst[0] = src1[0];
+			dst[1] = src1[0];
+		}
+
+		++src0;
+		++src1;
+		++src2;
+		dst += 2;
+		--count;
+	}
+
+	/* last pixel */
+	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
+		dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0];
+		dst[1] = src1[0] == src0[0] ? src0[0] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+	}
+}
+
+static inline void scale2x_32_def_center(scale2x_uint32* dst, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count)
+{
+	assert(count >= 2);
+
+	/* first pixel */
+	if (src0[0] != src2[0] && src1[0] != src1[1]) {
+		dst[0] = src1[0];
+		dst[1] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+	}
+	++src0;
+	++src1;
+	++src2;
+	dst += 2;
+
+	/* central pixels */
+	count -= 2;
+	while (count) {
+		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+			dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+			dst[1] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+		} else {
+			dst[0] = src1[0];
+			dst[1] = src1[0];
+		}
+
+		++src0;
+		++src1;
+		++src2;
+		dst += 2;
+		--count;
+	}
+
+	/* last pixel */
+	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
+		dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+		dst[1] = src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+	}
+}
+
+/**
+ * Scale by a factor of 2 a row of pixels of 8 bits.
+ * The function is implemented in C.
+ * The pixels over the left and right borders are assumed of the same color of
+ * the pixels on the border.
+ * Note that the implementation is optimized to write data sequentially to
+ * maximize the bandwidth on video memory.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, double length in pixels.
+ * \param dst1 Second destination row, double length in pixels.
+ */
+void scale2x_8_def(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count)
+{
+#ifdef USE_SCALE_RANDOMWRITE
+	scale2x_8_def_whole(dst0, dst1, src0, src1, src2, count);
+#else
+	scale2x_8_def_border(dst0, src0, src1, src2, count);
+	scale2x_8_def_border(dst1, src2, src1, src0, count);
+#endif
+}
+
+/**
+ * Scale by a factor of 2 a row of pixels of 16 bits.
+ * This function operates like scale2x_8_def() but for 16 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, double length in pixels.
+ * \param dst1 Second destination row, double length in pixels.
+ */
+void scale2x_16_def(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count)
+{
+#ifdef USE_SCALE_RANDOMWRITE
+	scale2x_16_def_whole(dst0, dst1, src0, src1, src2, count);
+#else
+	scale2x_16_def_border(dst0, src0, src1, src2, count);
+	scale2x_16_def_border(dst1, src2, src1, src0, count);
+#endif
+}
+
+/**
+ * Scale by a factor of 2 a row of pixels of 32 bits.
+ * This function operates like scale2x_8_def() but for 32 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, double length in pixels.
+ * \param dst1 Second destination row, double length in pixels.
+ */
+void scale2x_32_def(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count)
+{
+#ifdef USE_SCALE_RANDOMWRITE
+	scale2x_32_def_whole(dst0, dst1, src0, src1, src2, count);
+#else
+	scale2x_32_def_border(dst0, src0, src1, src2, count);
+	scale2x_32_def_border(dst1, src2, src1, src0, count);
+#endif
+}
+
+/**
+ * Scale by a factor of 2x3 a row of pixels of 8 bits.
+ * \note Like scale2x_8_def();
+ */
+void scale2x3_8_def(scale2x_uint8* dst0, scale2x_uint8* dst1, scale2x_uint8* dst2, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count)
+{
+#ifdef USE_SCALE_RANDOMWRITE
+	scale2x_8_def_whole(dst0, dst2, src0, src1, src2, count);
+	scale2x_8_def_center(dst1, src0, src1, src2, count);
+#else
+	scale2x_8_def_border(dst0, src0, src1, src2, count);
+	scale2x_8_def_center(dst1, src0, src1, src2, count);
+	scale2x_8_def_border(dst2, src2, src1, src0, count);
+#endif
+}
+
+/**
+ * Scale by a factor of 2x3 a row of pixels of 16 bits.
+ * \note Like scale2x_16_def();
+ */
+void scale2x3_16_def(scale2x_uint16* dst0, scale2x_uint16* dst1, scale2x_uint16* dst2, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count)
+{
+#ifdef USE_SCALE_RANDOMWRITE
+	scale2x_16_def_whole(dst0, dst2, src0, src1, src2, count);
+	scale2x_16_def_center(dst1, src0, src1, src2, count);
+#else
+	scale2x_16_def_border(dst0, src0, src1, src2, count);
+	scale2x_16_def_center(dst1, src0, src1, src2, count);
+	scale2x_16_def_border(dst2, src2, src1, src0, count);
+#endif
+}
+
+/**
+ * Scale by a factor of 2x3 a row of pixels of 32 bits.
+ * \note Like scale2x_32_def();
+ */
+void scale2x3_32_def(scale2x_uint32* dst0, scale2x_uint32* dst1, scale2x_uint32* dst2, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count)
+{
+#ifdef USE_SCALE_RANDOMWRITE
+	scale2x_32_def_whole(dst0, dst2, src0, src1, src2, count);
+	scale2x_32_def_center(dst1, src0, src1, src2, count);
+#else
+	scale2x_32_def_border(dst0, src0, src1, src2, count);
+	scale2x_32_def_center(dst1, src0, src1, src2, count);
+	scale2x_32_def_border(dst2, src2, src1, src0, count);
+#endif
+}
+
+/**
+ * Scale by a factor of 2x4 a row of pixels of 8 bits.
+ * \note Like scale2x_8_def();
+ */
+void scale2x4_8_def(scale2x_uint8* dst0, scale2x_uint8* dst1, scale2x_uint8* dst2, scale2x_uint8* dst3, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count)
+{
+#ifdef USE_SCALE_RANDOMWRITE
+	scale2x_8_def_whole(dst0, dst3, src0, src1, src2, count);
+	scale2x_8_def_center(dst1, src0, src1, src2, count);
+	scale2x_8_def_center(dst2, src0, src1, src2, count);
+#else
+	scale2x_8_def_border(dst0, src0, src1, src2, count);
+	scale2x_8_def_center(dst1, src0, src1, src2, count);
+	scale2x_8_def_center(dst2, src0, src1, src2, count);
+	scale2x_8_def_border(dst3, src2, src1, src0, count);
+#endif
+}
+
+/**
+ * Scale by a factor of 2x4 a row of pixels of 16 bits.
+ * \note Like scale2x_16_def();
+ */
+void scale2x4_16_def(scale2x_uint16* dst0, scale2x_uint16* dst1, scale2x_uint16* dst2, scale2x_uint16* dst3, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count)
+{
+#ifdef USE_SCALE_RANDOMWRITE
+	scale2x_16_def_whole(dst0, dst3, src0, src1, src2, count);
+	scale2x_16_def_center(dst1, src0, src1, src2, count);
+	scale2x_16_def_center(dst2, src0, src1, src2, count);
+#else
+	scale2x_16_def_border(dst0, src0, src1, src2, count);
+	scale2x_16_def_center(dst1, src0, src1, src2, count);
+	scale2x_16_def_center(dst2, src0, src1, src2, count);
+	scale2x_16_def_border(dst3, src2, src1, src0, count);
+#endif
+}
+
+/**
+ * Scale by a factor of 2x4 a row of pixels of 32 bits.
+ * \note Like scale2x_32_def();
+ */
+void scale2x4_32_def(scale2x_uint32* dst0, scale2x_uint32* dst1, scale2x_uint32* dst2, scale2x_uint32* dst3, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count)
+{
+#ifdef USE_SCALE_RANDOMWRITE
+	scale2x_32_def_whole(dst0, dst3, src0, src1, src2, count);
+	scale2x_32_def_center(dst1, src0, src1, src2, count);
+	scale2x_32_def_center(dst2, src0, src1, src2, count);
+#else
+	scale2x_32_def_border(dst0, src0, src1, src2, count);
+	scale2x_32_def_center(dst1, src0, src1, src2, count);
+	scale2x_32_def_center(dst2, src0, src1, src2, count);
+	scale2x_32_def_border(dst3, src2, src1, src0, count);
+#endif
+}
diff --git a/Utilities/Scale2x/scale2x.h b/Utilities/Scale2x/scale2x.h
new file mode 100644
index 0000000..e1431c4
--- /dev/null
+++ b/Utilities/Scale2x/scale2x.h
@@ -0,0 +1,37 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __SCALE2X_H
+#define __SCALE2X_H
+
+typedef unsigned char scale2x_uint8;
+typedef unsigned short scale2x_uint16;
+typedef unsigned scale2x_uint32;
+
+void scale2x_8_def(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count);
+void scale2x_16_def(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count);
+void scale2x_32_def(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count);
+
+void scale2x3_8_def(scale2x_uint8* dst0, scale2x_uint8* dst1, scale2x_uint8* dst2, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count);
+void scale2x3_16_def(scale2x_uint16* dst0, scale2x_uint16* dst1, scale2x_uint16* dst2, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count);
+void scale2x3_32_def(scale2x_uint32* dst0, scale2x_uint32* dst1, scale2x_uint32* dst2, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count);
+
+void scale2x4_8_def(scale2x_uint8* dst0, scale2x_uint8* dst1, scale2x_uint8* dst2, scale2x_uint8* dst3, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count);
+void scale2x4_16_def(scale2x_uint16* dst0, scale2x_uint16* dst1, scale2x_uint16* dst2, scale2x_uint16* dst3, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count);
+void scale2x4_32_def(scale2x_uint32* dst0, scale2x_uint32* dst1, scale2x_uint32* dst2, scale2x_uint32* dst3, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count);
+
+#endif
+
diff --git a/Utilities/Scale2x/scale3x.cpp b/Utilities/Scale2x/scale3x.cpp
new file mode 100644
index 0000000..8bfe328
--- /dev/null
+++ b/Utilities/Scale2x/scale3x.cpp
@@ -0,0 +1,423 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * This file contains a C and MMX implementation of the Scale2x effect.
+ *
+ * You can find an high level description of the effect at :
+ *
+ * http://www.scale2x.it/
+ */
+
+#include "../stdafx.h"
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "scale3x.h"
+
+#include <assert.h>
+
+/***************************************************************************/
+/* Scale3x C implementation */
+
+/**
+ * Define the macro USE_SCALE_RANDOMWRITE to enable
+ * an optimized version which writes memory in random order.
+ * This version is a little faster if you write in system memory.
+ * But it's a lot slower if you write in video memory.
+ * So, enable it only if you are sure to never write directly in video memory.
+ */
+/* #define USE_SCALE_RANDOMWRITE */
+
+static inline void scale3x_8_def_border(scale3x_uint8* dst, const scale3x_uint8* src0, const scale3x_uint8* src1, const scale3x_uint8* src2, unsigned count)
+{
+	assert(count >= 2);
+
+	/* first pixel */
+	if (src0[0] != src2[0] && src1[0] != src1[1]) {
+		dst[0] = src1[0];
+		dst[1] = (src1[0] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[0]) ? src0[0] : src1[0];
+		dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+		dst[2] = src1[0];
+	}
+	++src0;
+	++src1;
+	++src2;
+	dst += 3;
+
+	/* central pixels */
+	count -= 2;
+	while (count) {
+		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+			dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
+			dst[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
+			dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
+		} else {
+			dst[0] = src1[0];
+			dst[1] = src1[0];
+			dst[2] = src1[0];
+		}
+
+		++src0;
+		++src1;
+		++src2;
+		dst += 3;
+		--count;
+	}
+
+	/* last pixel */
+	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
+		dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
+		dst[1] = (src1[-1] == src0[0] && src1[0] != src0[0]) || (src1[0] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
+		dst[2] = src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+		dst[2] = src1[0];
+	}
+}
+
+static inline void scale3x_8_def_center(scale3x_uint8* dst, const scale3x_uint8* src0, const scale3x_uint8* src1, const scale3x_uint8* src2, unsigned count)
+{
+	assert(count >= 2);
+
+	/* first pixel */
+	if (src0[0] != src2[0] && src1[0] != src1[1]) {
+		dst[0] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
+		dst[1] = src1[0];
+		dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+		dst[2] = src1[0];
+	}
+	++src0;
+	++src1;
+	++src2;
+	dst += 3;
+
+	/* central pixels */
+	count -= 2;
+	while (count) {
+		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+			dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+			dst[1] = src1[0];
+			dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+		} else {
+			dst[0] = src1[0];
+			dst[1] = src1[0];
+			dst[2] = src1[0];
+		}
+
+		++src0;
+		++src1;
+		++src2;
+		dst += 3;
+		--count;
+	}
+
+	/* last pixel */
+	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
+		dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+		dst[1] = src1[0];
+		dst[2] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+		dst[2] = src1[0];
+	}
+}
+
+static inline void scale3x_16_def_border(scale3x_uint16* dst, const scale3x_uint16* src0, const scale3x_uint16* src1, const scale3x_uint16* src2, unsigned count)
+{
+	assert(count >= 2);
+
+	/* first pixel */
+	if (src0[0] != src2[0] && src1[0] != src1[1]) {
+		dst[0] = src1[0];
+		dst[1] = (src1[0] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[0]) ? src0[0] : src1[0];
+		dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+		dst[2] = src1[0];
+	}
+	++src0;
+	++src1;
+	++src2;
+	dst += 3;
+
+	/* central pixels */
+	count -= 2;
+	while (count) {
+		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+			dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
+			dst[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
+			dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
+		} else {
+			dst[0] = src1[0];
+			dst[1] = src1[0];
+			dst[2] = src1[0];
+		}
+
+		++src0;
+		++src1;
+		++src2;
+		dst += 3;
+		--count;
+	}
+
+	/* last pixel */
+	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
+		dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
+		dst[1] = (src1[-1] == src0[0] && src1[0] != src0[0]) || (src1[0] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
+		dst[2] = src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+		dst[2] = src1[0];
+	}
+}
+
+static inline void scale3x_16_def_center(scale3x_uint16* dst, const scale3x_uint16* src0, const scale3x_uint16* src1, const scale3x_uint16* src2, unsigned count)
+{
+	assert(count >= 2);
+
+	/* first pixel */
+	if (src0[0] != src2[0] && src1[0] != src1[1]) {
+		dst[0] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
+		dst[1] = src1[0];
+		dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+		dst[2] = src1[0];
+	}
+	++src0;
+	++src1;
+	++src2;
+	dst += 3;
+
+	/* central pixels */
+	count -= 2;
+	while (count) {
+		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+			dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+			dst[1] = src1[0];
+			dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+		} else {
+			dst[0] = src1[0];
+			dst[1] = src1[0];
+			dst[2] = src1[0];
+		}
+
+		++src0;
+		++src1;
+		++src2;
+		dst += 3;
+		--count;
+	}
+
+	/* last pixel */
+	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
+		dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+		dst[1] = src1[0];
+		dst[2] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+		dst[2] = src1[0];
+	}
+}
+
+static inline void scale3x_32_def_border(scale3x_uint32* dst, const scale3x_uint32* src0, const scale3x_uint32* src1, const scale3x_uint32* src2, unsigned count)
+{
+	assert(count >= 2);
+
+	/* first pixel */
+	if (src0[0] != src2[0] && src1[0] != src1[1]) {
+		dst[0] = src1[0];
+		dst[1] = (src1[0] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[0]) ? src0[0] : src1[0];
+		dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+		dst[2] = src1[0];
+	}
+	++src0;
+	++src1;
+	++src2;
+	dst += 3;
+
+	/* central pixels */
+	count -= 2;
+	while (count) {
+		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+			dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
+			dst[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
+			dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
+		} else {
+			dst[0] = src1[0];
+			dst[1] = src1[0];
+			dst[2] = src1[0];
+		}
+
+		++src0;
+		++src1;
+		++src2;
+		dst += 3;
+		--count;
+	}
+
+	/* last pixel */
+	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
+		dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
+		dst[1] = (src1[-1] == src0[0] && src1[0] != src0[0]) || (src1[0] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
+		dst[2] = src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+		dst[2] = src1[0];
+	}
+}
+
+static inline void scale3x_32_def_center(scale3x_uint32* dst, const scale3x_uint32* src0, const scale3x_uint32* src1, const scale3x_uint32* src2, unsigned count)
+{
+	assert(count >= 2);
+
+	/* first pixel */
+	if (src0[0] != src2[0] && src1[0] != src1[1]) {
+		dst[0] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
+		dst[1] = src1[0];
+		dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+		dst[2] = src1[0];
+	}
+	++src0;
+	++src1;
+	++src2;
+	dst += 3;
+
+	/* central pixels */
+	count -= 2;
+	while (count) {
+		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+			dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+			dst[1] = src1[0];
+			dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+		} else {
+			dst[0] = src1[0];
+			dst[1] = src1[0];
+			dst[2] = src1[0];
+		}
+
+		++src0;
+		++src1;
+		++src2;
+		dst += 3;
+		--count;
+	}
+
+	/* last pixel */
+	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
+		dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+		dst[1] = src1[0];
+		dst[2] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
+	} else {
+		dst[0] = src1[0];
+		dst[1] = src1[0];
+		dst[2] = src1[0];
+	}
+}
+
+/**
+ * Scale by a factor of 3 a row of pixels of 8 bits.
+ * The function is implemented in C.
+ * The pixels over the left and right borders are assumed of the same color of
+ * the pixels on the border.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, triple length in pixels.
+ * \param dst1 Second destination row, triple length in pixels.
+ * \param dst2 Third destination row, triple length in pixels.
+ */
+void scale3x_8_def(scale3x_uint8* dst0, scale3x_uint8* dst1, scale3x_uint8* dst2, const scale3x_uint8* src0, const scale3x_uint8* src1, const scale3x_uint8* src2, unsigned count)
+{
+#ifdef USE_SCALE_RANDOMWRITE
+	scale3x_8_def_whole(dst0, dst1, dst2, src0, src1, src2, count);
+#else
+	scale3x_8_def_border(dst0, src0, src1, src2, count);
+	scale3x_8_def_center(dst1, src0, src1, src2, count);
+	scale3x_8_def_border(dst2, src2, src1, src0, count);
+#endif
+}
+
+/**
+ * Scale by a factor of 3 a row of pixels of 16 bits.
+ * This function operates like scale3x_8_def() but for 16 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, triple length in pixels.
+ * \param dst1 Second destination row, triple length in pixels.
+ * \param dst2 Third destination row, triple length in pixels.
+ */
+void scale3x_16_def(scale3x_uint16* dst0, scale3x_uint16* dst1, scale3x_uint16* dst2, const scale3x_uint16* src0, const scale3x_uint16* src1, const scale3x_uint16* src2, unsigned count)
+{
+#ifdef USE_SCALE_RANDOMWRITE
+	scale3x_16_def_whole(dst0, dst1, dst2, src0, src1, src2, count);
+#else
+	scale3x_16_def_border(dst0, src0, src1, src2, count);
+	scale3x_16_def_center(dst1, src0, src1, src2, count);
+	scale3x_16_def_border(dst2, src2, src1, src0, count);
+#endif
+}
+
+/**
+ * Scale by a factor of 3 a row of pixels of 32 bits.
+ * This function operates like scale3x_8_def() but for 32 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, triple length in pixels.
+ * \param dst1 Second destination row, triple length in pixels.
+ * \param dst2 Third destination row, triple length in pixels.
+ */
+void scale3x_32_def(scale3x_uint32* dst0, scale3x_uint32* dst1, scale3x_uint32* dst2, const scale3x_uint32* src0, const scale3x_uint32* src1, const scale3x_uint32* src2, unsigned count)
+{
+#ifdef USE_SCALE_RANDOMWRITE
+	scale3x_32_def_whole(dst0, dst1, dst2, src0, src1, src2, count);
+#else
+	scale3x_32_def_border(dst0, src0, src1, src2, count);
+	scale3x_32_def_center(dst1, src0, src1, src2, count);
+	scale3x_32_def_border(dst2, src2, src1, src0, count);
+#endif
+}
+
diff --git a/Utilities/Scale2x/scale3x.h b/Utilities/Scale2x/scale3x.h
new file mode 100644
index 0000000..895752a
--- /dev/null
+++ b/Utilities/Scale2x/scale3x.h
@@ -0,0 +1,29 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __SCALE3X_H
+#define __SCALE3X_H
+
+typedef unsigned char scale3x_uint8;
+typedef unsigned short scale3x_uint16;
+typedef unsigned scale3x_uint32;
+
+void scale3x_8_def(scale3x_uint8* dst0, scale3x_uint8* dst1, scale3x_uint8* dst2, const scale3x_uint8* src0, const scale3x_uint8* src1, const scale3x_uint8* src2, unsigned count);
+void scale3x_16_def(scale3x_uint16* dst0, scale3x_uint16* dst1, scale3x_uint16* dst2, const scale3x_uint16* src0, const scale3x_uint16* src1, const scale3x_uint16* src2, unsigned count);
+void scale3x_32_def(scale3x_uint32* dst0, scale3x_uint32* dst1, scale3x_uint32* dst2, const scale3x_uint32* src0, const scale3x_uint32* src1, const scale3x_uint32* src2, unsigned count);
+
+#endif
+
diff --git a/Utilities/Scale2x/scalebit.cpp b/Utilities/Scale2x/scalebit.cpp
new file mode 100644
index 0000000..e556dc3
--- /dev/null
+++ b/Utilities/Scale2x/scalebit.cpp
@@ -0,0 +1,473 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2003, 2004 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * This file contains an example implementation of the Scale effect
+ * applyed to a generic bitmap.
+ *
+ * You can find an high level description of the effect at :
+ *
+ * http://www.scale2x.it/
+ */
+
+#include "../stdafx.h"
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "scale2x.h"
+#include "scale3x.h"
+
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#endif
+
+#include <assert.h>
+#include <stdlib.h>
+
+#if !(defined(__MACH__) || defined(__FreeBSD__))
+#include <malloc.h>
+#endif
+
+#define SSDST(bits, num) (scale2x_uint##bits *)dst##num
+#define SSSRC(bits, num) (const scale2x_uint##bits *)src##num
+
+/**
+ * Apply the Scale2x effect on a group of rows. Used internally.
+ */
+static inline void stage_scale2x(void* dst0, void* dst1, const void* src0, const void* src1, const void* src2, unsigned pixel, unsigned pixel_per_row)
+{
+	switch (pixel) {
+		case 1 : scale2x_8_def(SSDST(8,0), SSDST(8,1), SSSRC(8,0), SSSRC(8,1), SSSRC(8,2), pixel_per_row); break;
+		case 2 : scale2x_16_def(SSDST(16,0), SSDST(16,1), SSSRC(16,0), SSSRC(16,1), SSSRC(16,2), pixel_per_row); break;
+		case 4 : scale2x_32_def(SSDST(32,0), SSDST(32,1), SSSRC(32,0), SSSRC(32,1), SSSRC(32,2), pixel_per_row); break;
+	}
+}
+
+/**
+ * Apply the Scale2x3 effect on a group of rows. Used internally.
+ */
+static inline void stage_scale2x3(void* dst0, void* dst1, void* dst2, const void* src0, const void* src1, const void* src2, unsigned pixel, unsigned pixel_per_row)
+{
+	switch (pixel) {
+		case 1 : scale2x3_8_def(SSDST(8,0), SSDST(8,1), SSDST(8,2), SSSRC(8,0), SSSRC(8,1), SSSRC(8,2), pixel_per_row); break;
+		case 2 : scale2x3_16_def(SSDST(16,0), SSDST(16,1), SSDST(16,2), SSSRC(16,0), SSSRC(16,1), SSSRC(16,2), pixel_per_row); break;
+		case 4 : scale2x3_32_def(SSDST(32,0), SSDST(32,1), SSDST(32,2), SSSRC(32,0), SSSRC(32,1), SSSRC(32,2), pixel_per_row); break;
+	}
+}
+
+/**
+ * Apply the Scale2x4 effect on a group of rows. Used internally.
+ */
+static inline void stage_scale2x4(void* dst0, void* dst1, void* dst2, void* dst3, const void* src0, const void* src1, const void* src2, unsigned pixel, unsigned pixel_per_row)
+{
+	switch (pixel) {
+		case 1 : scale2x4_8_def(SSDST(8,0), SSDST(8,1), SSDST(8,2), SSDST(8,3), SSSRC(8,0), SSSRC(8,1), SSSRC(8,2), pixel_per_row); break;
+		case 2 : scale2x4_16_def(SSDST(16,0), SSDST(16,1), SSDST(16,2), SSDST(16,3), SSSRC(16,0), SSSRC(16,1), SSSRC(16,2), pixel_per_row); break;
+		case 4 : scale2x4_32_def(SSDST(32,0), SSDST(32,1), SSDST(32,2), SSDST(32,3), SSSRC(32,0), SSSRC(32,1), SSSRC(32,2), pixel_per_row); break;
+	}
+}
+
+/**
+ * Apply the Scale3x effect on a group of rows. Used internally.
+ */
+static inline void stage_scale3x(void* dst0, void* dst1, void* dst2, const void* src0, const void* src1, const void* src2, unsigned pixel, unsigned pixel_per_row)
+{
+	switch (pixel) {
+		case 1 : scale3x_8_def(SSDST(8,0), SSDST(8,1), SSDST(8,2), SSSRC(8,0), SSSRC(8,1), SSSRC(8,2), pixel_per_row); break;
+		case 2 : scale3x_16_def(SSDST(16,0), SSDST(16,1), SSDST(16,2), SSSRC(16,0), SSSRC(16,1), SSSRC(16,2), pixel_per_row); break;
+		case 4 : scale3x_32_def(SSDST(32,0), SSDST(32,1), SSDST(32,2), SSSRC(32,0), SSSRC(32,1), SSSRC(32,2), pixel_per_row); break;
+	}
+}
+
+/**
+ * Apply the Scale4x effect on a group of rows. Used internally.
+ */
+static inline void stage_scale4x(void* dst0, void* dst1, void* dst2, void* dst3, const void* src0, const void* src1, const void* src2, const void* src3, unsigned pixel, unsigned pixel_per_row)
+{
+	stage_scale2x(dst0, dst1, src0, src1, src2, pixel, 2 * pixel_per_row);
+	stage_scale2x(dst2, dst3, src1, src2, src3, pixel, 2 * pixel_per_row);
+}
+
+#define SCDST(i) (dst+(i)*dst_slice)
+#define SCSRC(i) (src+(i)*src_slice)
+#define SCMID(i) (mid[(i)])
+
+/**
+ * Apply the Scale2x effect on a bitmap.
+ * The destination bitmap is filled with the scaled version of the source bitmap.
+ * The source bitmap isn't modified.
+ * The destination bitmap must be manually allocated before calling the function,
+ * note that the resulting size is exactly 2x2 times the size of the source bitmap.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+static void scale2x(void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+	unsigned char* dst = (unsigned char*)void_dst;
+	const unsigned char* src = (const unsigned char*)void_src;
+	unsigned count;
+
+	assert(height >= 2);
+
+	count = height;
+
+	stage_scale2x(SCDST(0), SCDST(1), SCSRC(0), SCSRC(0), SCSRC(1), pixel, width);
+
+	dst = SCDST(2);
+
+	count -= 2;
+	while (count) {
+		stage_scale2x(SCDST(0), SCDST(1), SCSRC(0), SCSRC(1), SCSRC(2), pixel, width);
+
+		dst = SCDST(2);
+		src = SCSRC(1);
+
+		--count;
+	}
+
+	stage_scale2x(SCDST(0), SCDST(1), SCSRC(0), SCSRC(1), SCSRC(1), pixel, width);
+}
+
+/**
+ * Apply the Scale2x3 effect on a bitmap.
+ * The destination bitmap is filled with the scaled version of the source bitmap.
+ * The source bitmap isn't modified.
+ * The destination bitmap must be manually allocated before calling the function,
+ * note that the resulting size is exactly 2x3 times the size of the source bitmap.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+static void scale2x3(void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+	unsigned char* dst = (unsigned char*)void_dst;
+	const unsigned char* src = (const unsigned char*)void_src;
+	unsigned count;
+
+	assert(height >= 2);
+
+	count = height;
+
+	stage_scale2x3(SCDST(0), SCDST(1), SCDST(2), SCSRC(0), SCSRC(0), SCSRC(1), pixel, width);
+
+	dst = SCDST(3);
+
+	count -= 2;
+	while (count) {
+		stage_scale2x3(SCDST(0), SCDST(1), SCDST(2), SCSRC(0), SCSRC(1), SCSRC(2), pixel, width);
+
+		dst = SCDST(3);
+		src = SCSRC(1);
+
+		--count;
+	}
+
+	stage_scale2x3(SCDST(0), SCDST(1), SCDST(2), SCSRC(0), SCSRC(1), SCSRC(1), pixel, width);
+}
+
+/**
+ * Apply the Scale2x4 effect on a bitmap.
+ * The destination bitmap is filled with the scaled version of the source bitmap.
+ * The source bitmap isn't modified.
+ * The destination bitmap must be manually allocated before calling the function,
+ * note that the resulting size is exactly 2x4 times the size of the source bitmap.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+static void scale2x4(void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+	unsigned char* dst = (unsigned char*)void_dst;
+	const unsigned char* src = (const unsigned char*)void_src;
+	unsigned count;
+
+	assert(height >= 2);
+
+	count = height;
+
+	stage_scale2x4(SCDST(0), SCDST(1), SCDST(2), SCDST(3), SCSRC(0), SCSRC(0), SCSRC(1), pixel, width);
+
+	dst = SCDST(4);
+
+	count -= 2;
+	while (count) {
+		stage_scale2x4(SCDST(0), SCDST(1), SCDST(2), SCDST(3), SCSRC(0), SCSRC(1), SCSRC(2), pixel, width);
+
+		dst = SCDST(4);
+		src = SCSRC(1);
+
+		--count;
+	}
+
+	stage_scale2x4(SCDST(0), SCDST(1), SCDST(2), SCDST(3), SCSRC(0), SCSRC(1), SCSRC(1), pixel, width);
+}
+
+/**
+ * Apply the Scale3x effect on a bitmap.
+ * The destination bitmap is filled with the scaled version of the source bitmap.
+ * The source bitmap isn't modified.
+ * The destination bitmap must be manually allocated before calling the function,
+ * note that the resulting size is exactly 3x3 times the size of the source bitmap.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+static void scale3x(void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+	unsigned char* dst = (unsigned char*)void_dst;
+	const unsigned char* src = (const unsigned char*)void_src;
+	unsigned count;
+
+	assert(height >= 2);
+
+	count = height;
+
+	stage_scale3x(SCDST(0), SCDST(1), SCDST(2), SCSRC(0), SCSRC(0), SCSRC(1), pixel, width);
+
+	dst = SCDST(3);
+
+	count -= 2;
+	while (count) {
+		stage_scale3x(SCDST(0), SCDST(1), SCDST(2), SCSRC(0), SCSRC(1), SCSRC(2), pixel, width);
+
+		dst = SCDST(3);
+		src = SCSRC(1);
+
+		--count;
+	}
+
+	stage_scale3x(SCDST(0), SCDST(1), SCDST(2), SCSRC(0), SCSRC(1), SCSRC(1), pixel, width);
+}
+
+/**
+ * Apply the Scale4x effect on a bitmap.
+ * The destination bitmap is filled with the scaled version of the source bitmap.
+ * The source bitmap isn't modified.
+ * The destination bitmap must be manually allocated before calling the function,
+ * note that the resulting size is exactly 4x4 times the size of the source bitmap.
+ * \note This function requires also a small buffer bitmap used internally to store
+ * intermediate results. This bitmap must have at least an horizontal size in bytes of 2*width*pixel,
+ * and a vertical size of 6 rows. The memory of this buffer must not be allocated
+ * in video memory because it's also read and not only written. Generally
+ * a heap (malloc) or a stack (alloca) buffer is the best choice.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_mid Pointer at the first pixel of the buffer bitmap.
+ * \param mid_slice Size in bytes of a buffer bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+static void scale4x_buf(void* void_dst, unsigned dst_slice, void* void_mid, unsigned mid_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+	unsigned char* dst = (unsigned char*)void_dst;
+	const unsigned char* src = (const unsigned char*)void_src;
+	unsigned count;
+	unsigned char* mid[6];
+
+	assert(height >= 4);
+
+	count = height;
+
+	/* set the 6 buffer pointers */
+	mid[0] = (unsigned char*)void_mid;
+	mid[1] = mid[0] + mid_slice;
+	mid[2] = mid[1] + mid_slice;
+	mid[3] = mid[2] + mid_slice;
+	mid[4] = mid[3] + mid_slice;
+	mid[5] = mid[4] + mid_slice;
+
+	stage_scale2x(SCMID(-2+6), SCMID(-1+6), SCSRC(0), SCSRC(0), SCSRC(1), pixel, width);
+	stage_scale2x(SCMID(0), SCMID(1), SCSRC(0), SCSRC(1), SCSRC(2), pixel, width);
+	stage_scale2x(SCMID(2), SCMID(3), SCSRC(1), SCSRC(2), SCSRC(3), pixel, width);
+	stage_scale4x(SCDST(0), SCDST(1), SCDST(2), SCDST(3), SCMID(-2+6), SCMID(-2+6), SCMID(-1+6), SCMID(0), pixel, width);
+
+	dst = SCDST(4);
+
+	stage_scale4x(SCDST(0), SCDST(1), SCDST(2), SCDST(3), SCMID(-1+6), SCMID(0), SCMID(1), SCMID(2), pixel, width);
+
+	dst = SCDST(4);
+
+	count -= 4;
+	while (count) {
+		unsigned char* tmp;
+
+		stage_scale2x(SCMID(4), SCMID(5), SCSRC(2), SCSRC(3), SCSRC(4), pixel, width);
+		stage_scale4x(SCDST(0), SCDST(1), SCDST(2), SCDST(3), SCMID(1), SCMID(2), SCMID(3), SCMID(4), pixel, width);
+
+		dst = SCDST(4);
+		src = SCSRC(1);
+
+		tmp = SCMID(0); /* shift by 2 position */
+		SCMID(0) = SCMID(2);
+		SCMID(2) = SCMID(4);
+		SCMID(4) = tmp;
+		tmp = SCMID(1);
+		SCMID(1) = SCMID(3);
+		SCMID(3) = SCMID(5);
+		SCMID(5) = tmp;
+
+		--count;
+	}
+
+	stage_scale2x(SCMID(4), SCMID(5), SCSRC(2), SCSRC(3), SCSRC(3), pixel, width);
+	stage_scale4x(SCDST(0), SCDST(1), SCDST(2), SCDST(3), SCMID(1), SCMID(2), SCMID(3), SCMID(4), pixel, width);
+
+	dst = SCDST(4);
+
+	stage_scale4x(SCDST(0), SCDST(1), SCDST(2), SCDST(3), SCMID(3), SCMID(4), SCMID(5), SCMID(5), pixel, width);
+}
+
+/**
+ * Apply the Scale4x effect on a bitmap.
+ * The destination bitmap is filled with the scaled version of the source bitmap.
+ * The source bitmap isn't modified.
+ * The destination bitmap must be manually allocated before calling the function,
+ * note that the resulting size is exactly 4x4 times the size of the source bitmap.
+ * \note This function operates like ::scale4x_buf() but the intermediate buffer is
+ * automatically allocated in the stack.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+static void scale4x(void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+	unsigned mid_slice;
+	void* mid;
+
+	mid_slice = 2 * pixel * width; /* required space for 1 row buffer */
+
+	mid_slice = (mid_slice + 0x7) & ~0x7; /* align to 8 bytes */
+
+#if HAVE_ALLOCA
+	mid = alloca(6 * mid_slice); /* allocate space for 6 row buffers */
+
+	assert(mid != 0); /* alloca should never fails */
+#else
+	mid = malloc(6 * mid_slice); /* allocate space for 6 row buffers */
+
+	if (!mid)
+		return;
+#endif
+
+	scale4x_buf(void_dst, dst_slice, mid, mid_slice, void_src, src_slice, pixel, width, height);
+
+#if !HAVE_ALLOCA
+	free(mid);
+#endif
+}
+
+/**
+ * Check if the scale implementation is applicable at the given arguments.
+ * \param scale Scale factor. 2, 203 (fox 2x3), 204 (for 2x4), 3 or 4.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ * \return
+ *   - -1 on precondition violated.
+ *   - 0 on success.
+ */
+int scale_precondition(unsigned scale, unsigned pixel, unsigned width, unsigned height)
+{
+	if (pixel != 1 && pixel != 2 && pixel != 4)
+		return -1;
+
+	switch (scale) {
+	case 202 :
+	case 203 :
+	case 204 :
+	case 2 :
+	case 303 :
+	case 3 :
+		if (height < 2)
+			return -1;
+		break;
+	case 404 :
+	case 4 :
+		if (height < 4)
+			return -1;
+		break;
+	default:
+		return -1;
+	}
+
+	if (width < 2)
+		return -1;
+
+	return 0;
+}
+
+/**
+ * Apply the Scale effect on a bitmap.
+ * This function is simply a common interface for ::scale2x(), ::scale3x() and ::scale4x().
+ * \param scale Scale factor. 2, 203 (fox 2x3), 204 (for 2x4), 3 or 4.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+void scale(unsigned scale, void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+	switch (scale) {
+	case 202 :
+	case 2 :
+		scale2x(void_dst, dst_slice, void_src, src_slice, pixel, width, height);
+		break;
+	case 203 :
+		scale2x3(void_dst, dst_slice, void_src, src_slice, pixel, width, height);
+		break;
+	case 204 :
+		scale2x4(void_dst, dst_slice, void_src, src_slice, pixel, width, height);
+		break;
+	case 303 :
+	case 3 :
+		scale3x(void_dst, dst_slice, void_src, src_slice, pixel, width, height);
+		break;
+	case 404 :
+	case 4 :
+		scale4x(void_dst, dst_slice, void_src, src_slice, pixel, width, height);
+		break;
+	}
+}
+
diff --git a/Utilities/Scale2x/scalebit.h b/Utilities/Scale2x/scalebit.h
new file mode 100644
index 0000000..29dd5f5
--- /dev/null
+++ b/Utilities/Scale2x/scalebit.h
@@ -0,0 +1,33 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2003 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * This file contains an example implementation of the Scale effect
+ * applyed to a generic bitmap.
+ *
+ * You can find an high level description of the effect at :
+ *
+ * http://www.scale2x.it/
+ */
+
+#ifndef __SCALEBIT_H
+#define __SCALEBIT_H
+
+int scale_precondition(unsigned scale, unsigned pixel, unsigned width, unsigned height);
+void scale(unsigned scale, void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height);
+
+#endif
+
diff --git a/Utilities/SimpleLock.cpp b/Utilities/SimpleLock.cpp
new file mode 100644
index 0000000..b3f6946
--- /dev/null
+++ b/Utilities/SimpleLock.cpp
@@ -0,0 +1,70 @@
+#include "stdafx.h"
+#include <assert.h>
+#include "SimpleLock.h"
+
+thread_local std::thread::id SimpleLock::_threadID = std::this_thread::get_id();
+
+SimpleLock::SimpleLock()
+{
+	_lock.clear();
+	_lockCount = 0;
+	_holderThreadID = std::thread::id();
+}
+
+SimpleLock::~SimpleLock()
+{
+}
+
+LockHandler SimpleLock::AcquireSafe()
+{
+	return LockHandler(this);
+}
+
+void SimpleLock::Acquire()
+{
+	if(_lockCount == 0 || _holderThreadID != _threadID) {
+		while(_lock.test_and_set());
+		_holderThreadID = _threadID;
+		_lockCount = 1;
+	} else {
+		//Same thread can acquire the same lock multiple times
+		_lockCount++;
+	}
+}
+
+bool SimpleLock::IsFree()
+{
+	return _lockCount == 0;
+}
+
+void SimpleLock::WaitForRelease()
+{
+	//Wait until we are able to grab a lock, and then release it again
+	Acquire();
+	Release();
+}
+
+void SimpleLock::Release()
+{
+	if(_lockCount > 0 && _holderThreadID == _threadID) {
+		_lockCount--;
+		if(_lockCount == 0) {
+			_holderThreadID = std::thread::id();
+			_lock.clear();
+		}
+	} else {
+		assert(false);
+	}
+}
+
+
+LockHandler::LockHandler(SimpleLock *lock)
+{
+	_lock = lock;
+	_lock->Acquire();
+}
+
+LockHandler::~LockHandler()
+{
+	_lock->Release();
+}
\ No newline at end of file
diff --git a/Utilities/SimpleLock.h b/Utilities/SimpleLock.h
new file mode 100644
index 0000000..630a3ab
--- /dev/null
+++ b/Utilities/SimpleLock.h
@@ -0,0 +1,36 @@
+#pragma once 
+#include "stdafx.h"
+#include <thread>
+
+class SimpleLock;
+
+class LockHandler
+{
+private:
+	SimpleLock *_lock;
+public:
+	LockHandler(SimpleLock *lock);
+	~LockHandler();
+};
+
+class SimpleLock
+{
+private:
+	thread_local static std::thread::id _threadID;
+
+	std::thread::id _holderThreadID;
+	uint32_t _lockCount;
+	atomic_flag _lock;
+
+public:
+	SimpleLock();
+	~SimpleLock();
+
+	LockHandler AcquireSafe();
+
+	void Acquire();
+	bool IsFree();
+	void WaitForRelease();
+	void Release();
+};
+
diff --git a/Utilities/Socket.cpp b/Utilities/Socket.cpp
new file mode 100644
index 0000000..65c7236
--- /dev/null
+++ b/Utilities/Socket.cpp
@@ -0,0 +1,367 @@
+#include "stdafx.h"
+#include <cstring>
+#include <thread>
+#include "Socket.h"
+
+#ifndef LIBRETRO
+#include "UPnPPortMapper.h"
+using namespace std;
+
+#ifdef _WIN32
+	#pragma comment(lib,"ws2_32.lib") //Winsock Library
+	#define WIN32_LEAN_AND_MEAN
+	#include <winsock2.h>
+	#include <Ws2tcpip.h>
+	#include <Windows.h>
+#else
+	#include <sys/types.h>
+	#include <sys/socket.h>
+	#include <sys/ioctl.h>
+	#include <netinet/in.h>
+	#include <arpa/inet.h>
+	#include <errno.h>
+	#include <netinet/tcp.h>
+	#include <netdb.h>
+	#include <unistd.h>
+
+	#define INVALID_SOCKET (uintptr_t)-1
+	#define SOCKET_ERROR -1
+	#define WSAGetLastError() errno
+	#define SOCKADDR_IN sockaddr_in
+	#define SOCKADDR sockaddr
+	#define TIMEVAL timeval
+	#define SD_SEND SHUT_WR
+	#define closesocket close
+	#define WSAEWOULDBLOCK EWOULDBLOCK
+	#define ioctlsocket ioctl
+#endif
+
+#define BUFFER_SIZE 200000
+
+Socket::Socket()
+{
+	_sendBuffer = new char[BUFFER_SIZE];
+	_bufferPosition = 0;
+
+	#ifdef _WIN32	
+		WSADATA wsaDat;
+		if(WSAStartup(MAKEWORD(2, 2), &wsaDat) != 0) {
+			std::cout << "WSAStartup failed." << std::endl;
+			SetConnectionErrorFlag();
+			return;
+		}
+		_cleanupWSA = true;
+	#endif
+
+	_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+	if(_socket == INVALID_SOCKET) {
+		std::cout << "Socket creation failed." << std::endl;
+		SetConnectionErrorFlag();
+	} else {
+		SetSocketOptions();
+	}
+}
+
+Socket::Socket(uintptr_t socket) 
+{
+	_socket = socket;
+
+	if(socket == INVALID_SOCKET) {
+		SetConnectionErrorFlag();
+	} else {
+		SetSocketOptions();
+	}
+
+	_sendBuffer = new char[BUFFER_SIZE];
+	_bufferPosition = 0;
+}
+
+Socket::~Socket()
+{
+	if(_UPnPPort != -1) {
+		UPnPPortMapper::RemoveNATPortMapping(_UPnPPort, IPProtocol::TCP);
+	}
+
+	if(_socket != INVALID_SOCKET) {
+		Close();
+	}
+
+	#ifdef _WIN32
+		if(_cleanupWSA) {
+			WSACleanup();
+		}
+	#endif
+
+	delete[] _sendBuffer;
+}
+
+void Socket::SetSocketOptions()
+{
+	//Non-blocking mode
+	u_long iMode = 1;
+	ioctlsocket(_socket, FIONBIO, &iMode);
+		
+	//Set send/recv buffers to 256k
+	int bufferSize = 0x40000;
+	setsockopt(_socket, SOL_SOCKET, SO_RCVBUF, (char*)&bufferSize, sizeof(int));
+	setsockopt(_socket, SOL_SOCKET, SO_SNDBUF, (char*)&bufferSize, sizeof(int));
+
+	//Disable nagle's algorithm to improve latency
+	u_long value = 1;
+	setsockopt(_socket, IPPROTO_TCP, TCP_NODELAY, (char*)&value, sizeof(value));	
+}
+
+void Socket::SetConnectionErrorFlag()
+{
+	_connectionError = true;
+}
+
+void Socket::Close()
+{
+	std::cout << "Socket closed." << std::endl;
+	shutdown(_socket, SD_SEND);
+	closesocket(_socket);
+	SetConnectionErrorFlag();
+}
+
+bool Socket::ConnectionError()
+{
+	return _connectionError;
+}
+
+void Socket::Bind(uint16_t port)
+{
+	SOCKADDR_IN serverInf;
+	serverInf.sin_family = AF_INET;
+	serverInf.sin_addr.s_addr = INADDR_ANY;
+	serverInf.sin_port = htons(port);
+
+	if(UPnPPortMapper::AddNATPortMapping(port, port, IPProtocol::TCP)) {
+		_UPnPPort = port;
+	}
+
+	if(::bind(_socket, (SOCKADDR*)(&serverInf), sizeof(serverInf)) == SOCKET_ERROR) {
+		std::cout << "Unable to bind socket." << std::endl;
+		SetConnectionErrorFlag();
+	}
+}
+
+bool Socket::Connect(const char* hostname, uint16_t port)
+{
+	// Resolve IP address for hostname
+	bool result = false;
+	addrinfo hint;
+	memset((void*)&hint, 0, sizeof(hint));
+	hint.ai_family = AF_INET;
+	hint.ai_protocol = IPPROTO_TCP;
+	hint.ai_socktype = SOCK_STREAM;
+	addrinfo *addrInfo;
+
+	if(getaddrinfo(hostname, std::to_string(port).c_str(), &hint, &addrInfo) != 0) {
+		std::cout << "Failed to resolve hostname." << std::endl;
+		SetConnectionErrorFlag();
+	} else {
+		//Set socket in non-blocking mode
+		u_long iMode = 1;
+		ioctlsocket(_socket, FIONBIO, &iMode);
+
+		// Attempt to connect to server
+		connect(_socket, addrInfo->ai_addr, (int)addrInfo->ai_addrlen);
+
+		fd_set writeSockets;
+		#ifdef _WIN32
+			writeSockets.fd_count = 1;
+			writeSockets.fd_array[0] = _socket;
+		#else		
+			FD_ZERO(&writeSockets);
+    		FD_SET(_socket, &writeSockets);
+		#endif		
+
+		//Timeout after 3 seconds
+		TIMEVAL timeout;
+		timeout.tv_sec = 3;
+		timeout.tv_usec = 0;
+
+		// check if the socket is ready
+		int returnVal = select((int)_socket+1, nullptr, &writeSockets, nullptr, &timeout);
+		if(returnVal > 0) {
+			result = true;
+		} else {
+			//Could not connect
+			if(returnVal == SOCKET_ERROR) {
+				//int nError = WSAGetLastError();				
+				//std::cout << "select failed: nError " << std::to_string(nError) << " returnVal" << std::to_string(returnVal) << std::endl;			
+			}
+			SetConnectionErrorFlag();
+		}
+		
+		freeaddrinfo(addrInfo);
+	}
+
+	return result;
+}
+
+void Socket::Listen(int backlog)
+{
+	if(listen(_socket, backlog) == SOCKET_ERROR) {
+		std::cout << "listen failed." << std::endl;
+		SetConnectionErrorFlag();
+	}
+}
+
+shared_ptr<Socket> Socket::Accept()
+{
+	uintptr_t socket = accept(_socket, nullptr, nullptr);
+	return shared_ptr<Socket>(new Socket(socket));
+}
+
+bool WouldBlock(int nError)
+{
+	return nError == WSAEWOULDBLOCK || nError == EAGAIN;
+}
+
+int Socket::Send(char *buf, int len, int flags)
+{
+	int retryCount = 15;
+	int nError = 0;
+	int returnVal;
+	do {
+		//Loop until everything has been sent (shouldn't loop at all in the vast majority of cases)
+		returnVal = send(_socket, buf, len, flags);
+
+		if(returnVal > 0) {
+			//Sent partial data, adjust pointer & length
+			buf += returnVal;
+			len -= returnVal;
+		} else if(returnVal == SOCKET_ERROR) {
+			nError = WSAGetLastError();
+			if(nError != 0) {
+				if(!WouldBlock(nError)) {
+					SetConnectionErrorFlag();
+				} else {
+					retryCount--;
+					if(retryCount == 0) {
+						//Connection seems dead, close it.
+						std::cout << "Unable to send data, closing socket." << std::endl;
+						Close();
+						return 0;
+					}
+					
+					std::this_thread::sleep_for(std::chrono::duration<int, std::milli>(10));
+				}
+			}
+		}
+	} while(WouldBlock(nError) && len > 0);
+		
+	return returnVal;
+}
+
+void Socket::BufferedSend(char *buf, int len)
+{
+	if(_bufferPosition+len < BUFFER_SIZE) {
+		memcpy(_sendBuffer+_bufferPosition, buf, len);
+		_bufferPosition += len;
+	} else {
+		std::cout << "prevented buffer overflow";
+	}
+}
+
+void Socket::SendBuffer()
+{
+	Send(_sendBuffer, _bufferPosition, 0);
+	_bufferPosition = 0;
+}
+
+int Socket::Recv(char *buf, int len, int flags)
+{
+	int returnVal = recv(_socket, buf, len, flags);
+	
+	if(returnVal == SOCKET_ERROR) {
+		int nError = WSAGetLastError();
+		if(nError && !WouldBlock(nError)) {
+			std::cout << "recv failed: nError " << std::to_string(nError) << " returnVal" << std::to_string(returnVal) << std::endl;			
+			SetConnectionErrorFlag();
+		}
+	} else if(returnVal == 0) {
+		//Socket closed
+		std::cout << "Socket closed by peer." << std::endl;
+		Close();
+	}
+
+	return returnVal;
+}
+
+#else
+
+//Libretro port does not need sockets.
+
+Socket::Socket()
+{
+}
+
+Socket::Socket(uintptr_t socket)
+{
+}
+
+Socket::~Socket()
+{
+}
+
+void Socket::SetSocketOptions()
+{
+}
+
+void Socket::SetConnectionErrorFlag()
+{
+}
+
+void Socket::Close()
+{
+}
+
+bool Socket::ConnectionError()
+{
+	return true;
+}
+
+void Socket::Bind(uint16_t port)
+{
+}
+
+bool Socket::Connect(const char* hostname, uint16_t port)
+{
+	return false;
+}
+
+void Socket::Listen(int backlog)
+{
+}
+
+shared_ptr<Socket> Socket::Accept()
+{
+	return shared_ptr<Socket>(new Socket(0));
+}
+
+bool WouldBlock(int nError)
+{
+	return false;
+}
+
+int Socket::Send(char *buf, int len, int flags)
+{
+	return 0;
+}
+
+void Socket::BufferedSend(char *buf, int len)
+{
+}
+
+void Socket::SendBuffer()
+{
+}
+
+int Socket::Recv(char *buf, int len, int flags)
+{
+	return 0;
+}
+#endif
diff --git a/Utilities/Socket.h b/Utilities/Socket.h
new file mode 100644
index 0000000..acecb10
--- /dev/null
+++ b/Utilities/Socket.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include "stdafx.h"
+
+class Socket
+{
+private:
+#ifndef LIBRETRO
+	#ifdef _WIN32
+	bool _cleanupWSA = false;
+	#endif
+	
+	uintptr_t _socket = ~0;
+	bool _connectionError = false;
+	char* _sendBuffer;
+	int _bufferPosition;
+	int32_t _UPnPPort = -1;
+#endif
+
+public:
+	Socket();
+	Socket(uintptr_t socket);
+	~Socket();
+
+	void SetSocketOptions();
+	void SetConnectionErrorFlag();
+
+	void Close();
+	bool ConnectionError();
+
+	void Bind(uint16_t port);
+	bool Connect(const char* hostname, uint16_t port);
+	void Listen(int backlog);
+	shared_ptr<Socket> Accept();
+
+	int Send(char *buf, int len, int flags);
+	void BufferedSend(char *buf, int len);
+	void SendBuffer();
+	int Recv(char *buf, int len, int flags);
+};
diff --git a/Utilities/StringUtilities.h b/Utilities/StringUtilities.h
new file mode 100644
index 0000000..22658be
--- /dev/null
+++ b/Utilities/StringUtilities.h
@@ -0,0 +1,20 @@
+#pragma once
+#include "stdafx.h"
+
+class StringUtilities
+{
+public:
+	static vector<string> Split(string input, char delimiter)
+	{
+		vector<string> result;
+		size_t index = 0;
+		size_t lastIndex = 0;
+		while((index = input.find(delimiter, index)) != string::npos) {
+			result.push_back(input.substr(lastIndex, index - lastIndex));
+			index++;
+			lastIndex = index;
+		}
+		result.push_back(input.substr(lastIndex));
+		return result;
+	}
+};
diff --git a/Utilities/Timer.cpp b/Utilities/Timer.cpp
new file mode 100644
index 0000000..9a155e1
--- /dev/null
+++ b/Utilities/Timer.cpp
@@ -0,0 +1,97 @@
+#include "stdafx.h"
+#include "Timer.h"
+
+#ifndef LIBRETRO
+
+#include <thread>
+
+#ifdef _WIN32
+#include <Windows.h>
+
+Timer::Timer() 
+{
+	LARGE_INTEGER li;
+	if(!QueryPerformanceFrequency(&li)) {
+		throw;
+	}
+
+	_frequency = double(li.QuadPart) / 1000.0;
+
+	QueryPerformanceCounter(&li);
+	_start = li.QuadPart;
+}
+
+void Timer::Reset()
+{
+	LARGE_INTEGER li;
+	QueryPerformanceCounter(&li);
+	_start = li.QuadPart;
+}
+
+double Timer::GetElapsedMS()
+{
+	LARGE_INTEGER li;
+	QueryPerformanceCounter(&li);
+	return double(li.QuadPart - _start) / _frequency;
+}
+
+#else 
+#include <time.h>
+
+Timer::Timer() 
+{
+	Reset();
+}
+
+void Timer::Reset()
+{
+	timespec start;
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	_start = start.tv_sec * 1000000000 + start.tv_nsec;
+}
+
+double Timer::GetElapsedMS()
+{
+	timespec end;
+	clock_gettime(CLOCK_MONOTONIC, &end);
+
+	uint64_t currentTime = end.tv_sec * 1000000000 + end.tv_nsec;
+	
+	return (double)(currentTime - _start) / 1000000.0;
+}
+
+#endif
+
+void Timer::WaitUntil(double targetMillisecond) 
+{
+	if(targetMillisecond > 0) {
+		double elapsedTime = GetElapsedMS();
+		if(targetMillisecond - elapsedTime > 1) {
+			std::this_thread::sleep_for(std::chrono::duration<int, std::milli>((int)(targetMillisecond - elapsedTime)));
+		}
+	}
+}
+
+#else 
+
+//This is not used by Libretro port, remove its dependencies
+
+Timer::Timer()
+{
+}
+
+void Timer::Reset()
+{
+}
+
+double Timer::GetElapsedMS()
+{
+	return 0.0;
+}
+
+void Timer::WaitUntil(double targetMillisecond)
+{
+}
+
+#endif
\ No newline at end of file
diff --git a/Utilities/Timer.h b/Utilities/Timer.h
new file mode 100644
index 0000000..19888fc
--- /dev/null
+++ b/Utilities/Timer.h
@@ -0,0 +1,18 @@
+#pragma once
+#include "stdafx.h"
+
+class Timer
+{
+	private:
+#ifndef LIBRETRO
+		#ifdef WIN32
+		double _frequency = 0.0;
+		#endif
+		uint64_t _start;
+#endif
+	public:
+		Timer();
+		void Reset();
+		double GetElapsedMS();
+		void WaitUntil(double targetMillisecond);
+};
\ No newline at end of file
diff --git a/Utilities/UPnPPortMapper.cpp b/Utilities/UPnPPortMapper.cpp
new file mode 100644
index 0000000..2b5bc8e
--- /dev/null
+++ b/Utilities/UPnPPortMapper.cpp
@@ -0,0 +1,156 @@
+#include "stdafx.h"
+#include "UPnPPortMapper.h"
+
+#ifdef _WIN32
+#include <winsock2.h>
+#include <natupnp.h>
+#include <ws2tcpip.h>
+
+bool UPnPPortMapper::AddNATPortMapping(uint16_t internalPort, uint16_t externalPort, IPProtocol protocol) 
+{
+	bool result = false;
+		
+	CoInitializeEx(nullptr, COINIT_MULTITHREADED);
+
+	IUPnPNAT *nat = nullptr;
+	HRESULT hResult = CoCreateInstance(__uuidof(UPnPNAT), nullptr, CLSCTX_ALL, __uuidof(IUPnPNAT), (void**)&nat);
+
+	BSTR proto = SysAllocString((protocol == IPProtocol::TCP) ? L"TCP" : L"UDP");
+
+	if(SUCCEEDED(hResult) && nat) {
+		IStaticPortMappingCollection *spmc = nullptr;
+		hResult = nat->get_StaticPortMappingCollection(&spmc);
+		if(SUCCEEDED(hResult) && spmc) {
+			IStaticPortMapping *spm = nullptr;
+			hResult = spmc->get_Item(externalPort, proto, &spm);
+			if(spm != nullptr) {
+				//An identical mapping already exists, remove it
+				if(RemoveNATPortMapping(externalPort, protocol)) {
+					std::cout << "Removed existing UPnP mapping." << std::endl;
+					spm->Release();
+					spm = nullptr;
+				}
+			}
+
+			if(!SUCCEEDED(hResult) || spm == nullptr) {
+				std::cout << "Attempting to automatically forward port via UPnP..." << std::endl;
+
+				vector<wstring> localIPs = GetLocalIPs();
+				BSTR desc = SysAllocString(L"Mesen NetPlay");
+				spm = nullptr;
+
+				for(size_t i = 0, len = localIPs.size(); i < len; i++) {
+					BSTR clientStr = SysAllocString(localIPs[i].c_str());
+					hResult = spmc->Add(externalPort, proto, internalPort, clientStr, true, desc, &spm);
+					SysFreeString(clientStr);
+					SysFreeString(desc);
+
+					if(SUCCEEDED(hResult) && spm) {
+						//Successfully added a new port mapping
+						std::cout << std::dec << "Forwarded port " << externalPort << " to IP " << utf8::utf8::encode(localIPs[i]) << std::endl;
+						result = true;
+					} else {
+						std::cout << "Unable to add UPnP port mapping. IP: " << utf8::utf8::encode(localIPs[i]) << " HRESULT: 0x" << std::hex << hResult << std::endl;
+					}
+
+					if(spm) {
+						spm->Release();
+					}
+				}
+			} else {
+				std::cout << "Unable to add UPnP port mapping." << std::endl;
+			}
+			spmc->Release();
+		}
+		nat->Release();
+	}
+
+	SysFreeString(proto);
+	
+	CoUninitialize();
+
+	return result;
+}
+
+bool UPnPPortMapper::RemoveNATPortMapping(uint16_t externalPort, IPProtocol protocol) 
+{
+	IUPnPNAT *nat = nullptr;
+	IStaticPortMappingCollection *spmc;
+	bool result = false;
+
+	CoInitializeEx(nullptr, COINIT_MULTITHREADED);
+
+	HRESULT hResult = ::CoCreateInstance(__uuidof(UPnPNAT), nullptr, CLSCTX_ALL, __uuidof(IUPnPNAT), (void**)&nat);
+
+	BSTR proto = SysAllocString((protocol == IPProtocol::TCP) ? L"TCP" : L"UDP");
+
+	if(SUCCEEDED(hResult) && nat) {
+		hResult = nat->get_StaticPortMappingCollection(&spmc);
+		if(SUCCEEDED(hResult) && spmc) {
+			spmc->Remove(externalPort, proto);
+			spmc->Release();
+			result = true;
+		}
+		nat->Release();
+	}
+
+	SysFreeString(proto);
+
+	CoUninitialize();
+
+	return result;
+}
+
+vector<wstring> UPnPPortMapper::GetLocalIPs()
+{
+	vector<wstring> localIPs;
+	ADDRINFOW *result = nullptr;
+	ADDRINFOW *current = nullptr;
+	ADDRINFOW hints;
+
+	ZeroMemory(&hints, sizeof(hints));
+	hints.ai_family = AF_INET;
+	hints.ai_socktype = SOCK_STREAM;
+	hints.ai_protocol = IPPROTO_TCP;
+
+	wchar_t hostName[255];
+	DWORD hostSize = 255;
+	GetComputerName(hostName, &hostSize);
+
+	if(GetAddrInfoW(hostName, nullptr, &hints, &result) == 0) {
+		current = result;
+		while(current != nullptr) {
+			wchar_t ipAddr[255];
+			DWORD ipSize = 255;
+
+			if(WSAAddressToString(current->ai_addr, (DWORD)current->ai_addrlen, nullptr, ipAddr, &ipSize) == 0) {
+				if(std::find(localIPs.begin(), localIPs.end(), ipAddr) == localIPs.end()) {
+					localIPs.push_back(ipAddr);
+				}
+			}
+			current = current->ai_next;
+		}
+		FreeAddrInfoW(result);
+	}
+
+	return localIPs;
+}
+
+#else
+	
+bool UPnPPortMapper::AddNATPortMapping(uint16_t internalPort, uint16_t externalPort, IPProtocol protocol) 
+{
+	return false;
+}
+
+bool UPnPPortMapper::RemoveNATPortMapping(uint16_t externalPort, IPProtocol protocol) 
+{
+	return false;
+}
+
+vector<wstring> UPnPPortMapper::GetLocalIPs()
+{
+	return vector<wstring>();
+}	
+	
+#endif
\ No newline at end of file
diff --git a/Utilities/UPnPPortMapper.h b/Utilities/UPnPPortMapper.h
new file mode 100644
index 0000000..82f1301
--- /dev/null
+++ b/Utilities/UPnPPortMapper.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "stdafx.h"
+
+using std::wstring;
+
+enum class IPProtocol
+{
+	TCP = 0,
+	UDP = 1
+};
+
+class UPnPPortMapper
+{
+private:
+	static vector<wstring> GetLocalIPs();
+
+public:
+	static bool AddNATPortMapping(uint16_t internalPort, uint16_t externalPort, IPProtocol protocol);
+	static bool RemoveNATPortMapping(uint16_t externalPort, IPProtocol protocol);
+};
\ No newline at end of file
diff --git a/Utilities/UTF8Util.cpp b/Utilities/UTF8Util.cpp
new file mode 100644
index 0000000..0a99fea
--- /dev/null
+++ b/Utilities/UTF8Util.cpp
@@ -0,0 +1,31 @@
+#include "stdafx.h"
+#include "UTF8Util.h"
+#include <codecvt>
+#include <locale>
+
+namespace utf8 
+{
+	std::wstring utf8::decode(const std::string &str)
+	{
+		std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
+		return conv.from_bytes(str);
+	}
+
+	std::string utf8::encode(const std::wstring &wstr)
+	{
+		std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
+    	return conv.to_bytes(wstr);
+	}
+
+	std::string utf8::encode(const std::u16string &wstr)
+	{
+		#ifdef _MSC_VER
+			std::wstring_convert<std::codecvt_utf8_utf16<int16_t>, int16_t> conv;
+			auto p = reinterpret_cast<const int16_t *>(wstr.data());
+			return conv.to_bytes(p, p + wstr.size());
+		#else 
+			std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> conv;
+  			return conv.to_bytes(wstr);
+		#endif
+	}	
+}
\ No newline at end of file
diff --git a/Utilities/UTF8Util.h b/Utilities/UTF8Util.h
new file mode 100644
index 0000000..c60c966
--- /dev/null
+++ b/Utilities/UTF8Util.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <fstream>
+
+namespace utf8 {
+	class utf8
+	{
+	public:
+		static std::wstring decode(const std::string &str);
+		static std::string encode(const std::wstring &wstr);
+		static std::string encode(const std::u16string &wstr);
+	};
+		
+#if defined(_WIN32) && !defined(LIBRETRO)
+	class ifstream : public std::ifstream
+	{
+	public:
+		ifstream(const std::string& _Str, ios_base::openmode _Mode = ios_base::in, int _Prot = (int)ios_base::_Openprot) : std::ifstream(utf8::decode(_Str), _Mode, _Prot) { }
+		ifstream() : std::ifstream() { }
+		void open(const std::string& _Str, ios_base::openmode _Mode = ios_base::in, int _Prot = (int)ios_base::_Openprot)	{ std::ifstream::open(utf8::decode(_Str), _Mode, _Prot); }
+	};
+
+	class ofstream : public std::ofstream
+	{
+	public:
+		ofstream(const std::string& _Str, ios_base::openmode _Mode = ios_base::in, int _Prot = (int)ios_base::_Openprot) : std::ofstream(utf8::decode(_Str), _Mode, _Prot) { }
+		ofstream() : std::ofstream() { }
+		void open(const std::string& _Str, ios_base::openmode _Mode = ios_base::in, int _Prot = (int)ios_base::_Openprot) { std::ofstream::open(utf8::decode(_Str), _Mode, _Prot); }
+	};
+#else
+	using std::ifstream;
+	using std::ofstream;
+#endif
+}
\ No newline at end of file
diff --git a/Utilities/UpsPatcher.cpp b/Utilities/UpsPatcher.cpp
new file mode 100644
index 0000000..1c3a72b
--- /dev/null
+++ b/Utilities/UpsPatcher.cpp
@@ -0,0 +1,100 @@
+#include "stdafx.h"
+#include <assert.h>
+#include <cstring>
+#include "UpsPatcher.h"
+#include "CRC32.h"
+
+int64_t UpsPatcher::ReadBase128Number(std::istream &file)
+{
+	int64_t result = 0;
+	int shift = 0;
+	uint8_t buffer;
+	while(true) {
+		file.read((char*)&buffer, 1);
+		if(file.eof()) {
+			return -1;
+		}
+		result += (buffer & 0x7F) << shift;
+		shift += 7;
+		if(buffer & 0x80) {
+			break;
+		}
+		result += (int64_t)1 << shift;
+	}
+
+	return result;
+}
+
+bool UpsPatcher::PatchBuffer(string upsFilepath, vector<uint8_t> &input, vector<uint8_t> &output)
+{
+	ifstream upsFile(upsFilepath, std::ios::in | std::ios::binary);
+	if(upsFile) {
+		return PatchBuffer(upsFile, input, output);
+	}
+	return false;
+}
+
+bool UpsPatcher::PatchBuffer(std::istream &upsFile, vector<uint8_t> &input, vector<uint8_t> &output)
+{
+	upsFile.seekg(0, std::ios::end);
+	size_t fileSize = (size_t)upsFile.tellg();
+	upsFile.seekg(0, std::ios::beg);
+
+	char header[4];
+	upsFile.read((char*)&header, 4);
+	if(memcmp((char*)&header, "UPS1", 4) != 0) {
+		//Invalid UPS file
+		return false;
+	}
+
+	int64_t inputFileSize = ReadBase128Number(upsFile);
+	int64_t outputFileSize = ReadBase128Number(upsFile);
+	if(inputFileSize == -1 || outputFileSize == -1) {
+		//Invalid file
+		return false;
+	}
+
+	output.resize((size_t)outputFileSize);
+	std::copy(input.begin(), input.end(), output.begin());
+
+	uint32_t pos = 0;
+	while((size_t)upsFile.tellg() < fileSize - 12) {
+		int32_t offset = (int32_t)ReadBase128Number(upsFile);
+		if(offset == -1) {
+			//Invalid file
+			return false;
+		}
+
+		pos += offset;
+
+		while(true) {
+			uint8_t xorValue = 0;
+			upsFile.read((char*)&xorValue, 1);
+			if((size_t)upsFile.tellg() > fileSize - 12) {
+				//Invalid file
+				return false;
+			}
+
+			output[pos] ^= xorValue;
+			pos++;
+
+			if(!xorValue) {
+				break;
+			}
+		}
+	}
+
+	uint8_t inputChecksum[4];
+	uint8_t outputChecksum[4];
+	upsFile.read((char*)inputChecksum, 4);
+	upsFile.read((char*)outputChecksum, 4);
+	uint32_t patchInputCrc = inputChecksum[0] | (inputChecksum[1] << 8) | (inputChecksum[2] << 16) | (inputChecksum[3] << 24);
+	uint32_t patchOutputCrc = outputChecksum[0] | (outputChecksum[1] << 8) | (outputChecksum[2] << 16) | (outputChecksum[3] << 24);
+	uint32_t inputCrc = CRC32::GetCRC(input.data(), input.size());
+	uint32_t outputCrc = CRC32::GetCRC(output.data(), output.size());
+
+	if(patchInputCrc != inputCrc || patchOutputCrc != outputCrc) {
+		return false;
+	}
+	return true;
+}
diff --git a/Utilities/UpsPatcher.h b/Utilities/UpsPatcher.h
new file mode 100644
index 0000000..bf75b1e
--- /dev/null
+++ b/Utilities/UpsPatcher.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include "stdafx.h"
+
+class UpsPatcher
+{
+private:
+	static int64_t ReadBase128Number(std::istream &file);
+
+public:
+	static bool PatchBuffer(std::istream &upsFile, vector<uint8_t> &input, vector<uint8_t> &output);
+	static bool PatchBuffer(string upsFilepath, vector<uint8_t> &input, vector<uint8_t> &output);
+};
\ No newline at end of file
diff --git a/Utilities/Utilities.vcxproj b/Utilities/Utilities.vcxproj
new file mode 100644
index 0000000..8a133d7
--- /dev/null
+++ b/Utilities/Utilities.vcxproj
@@ -0,0 +1,627 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Libretro|Win32">
+      <Configuration>Libretro</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Libretro|x64">
+      <Configuration>Libretro</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Optimize|Win32">
+      <Configuration>PGO Optimize</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Optimize|x64">
+      <Configuration>PGO Optimize</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Profile|Win32">
+      <Configuration>PGO Profile</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Profile|x64">
+      <Configuration>PGO Profile</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{B5330148-E8C7-46BA-B54E-69BE59EA337D}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>Utilities</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\PGO Profile\</OutDir>
+    <IntDir>obj\$(Platform)\PGO Profile\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\PGO Profile\</OutDir>
+    <IntDir>obj\$(Platform)\PGO Profile\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <CallingConvention>Cdecl</CallingConvention>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <ShowIncludes>false</ShowIncludes>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <CallingConvention>Cdecl</CallingConvention>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>LIBRETRO;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>LIBRETRO;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="ArchiveReader.h" />
+    <ClInclude Include="AviWriter.h" />
+    <ClInclude Include="Base64.h" />
+    <ClInclude Include="blip_buf.h" />
+    <ClInclude Include="BpsPatcher.h" />
+    <ClInclude Include="CamstudioCodec.h" />
+    <ClInclude Include="CRC32.h" />
+    <ClInclude Include="FastString.h" />
+    <ClInclude Include="FolderUtilities.h" />
+    <ClInclude Include="HexUtilities.h" />
+    <ClInclude Include="HQX\common.h" />
+    <ClInclude Include="HQX\hqx.h" />
+    <ClInclude Include="IpsPatcher.h" />
+    <ClInclude Include="KreedSaiEagle\SaiEagle.h" />
+    <ClInclude Include="LowPassFilter.h" />
+    <ClInclude Include="md5.h" />
+    <ClInclude Include="miniz.h" />
+    <ClInclude Include="AutoResetEvent.h" />
+    <ClInclude Include="nes_ntsc.h" />
+    <ClInclude Include="nes_ntsc_config.h" />
+    <ClInclude Include="nes_ntsc_impl.h" />
+    <ClInclude Include="BaseCodec.h" />
+    <ClInclude Include="orfanidis_eq.h" />
+    <ClInclude Include="PlatformUtilities.h" />
+    <ClInclude Include="PNGHelper.h" />
+    <ClInclude Include="RawCodec.h" />
+    <ClInclude Include="Scale2x\scale2x.h" />
+    <ClInclude Include="Scale2x\scale3x.h" />
+    <ClInclude Include="Scale2x\scalebit.h" />
+    <ClInclude Include="sha1.h" />
+    <ClInclude Include="stb_vorbis.h" />
+    <ClInclude Include="StringUtilities.h" />
+    <ClInclude Include="SZReader.h" />
+    <ClInclude Include="UPnPPortMapper.h" />
+    <ClInclude Include="SimpleLock.h" />
+    <ClInclude Include="Socket.h" />
+    <ClInclude Include="stdafx.h" />
+    <ClInclude Include="targetver.h" />
+    <ClInclude Include="Timer.h" />
+    <ClInclude Include="UpsPatcher.h" />
+    <ClInclude Include="UTF8Util.h" />
+    <ClInclude Include="VirtualFile.h" />
+    <ClInclude Include="xBRZ\config.h" />
+    <ClInclude Include="xBRZ\xbrz.h" />
+    <ClInclude Include="ZipReader.h" />
+    <ClInclude Include="ZipWriter.h" />
+    <ClInclude Include="ZmbvCodec.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="ArchiveReader.cpp" />
+    <ClCompile Include="AviWriter.cpp" />
+    <ClCompile Include="blip_buf.cpp" />
+    <ClCompile Include="BpsPatcher.cpp" />
+    <ClCompile Include="CamstudioCodec.cpp" />
+    <ClCompile Include="CRC32.cpp" />
+    <ClCompile Include="FolderUtilities.cpp" />
+    <ClCompile Include="HexUtilities.cpp" />
+    <ClCompile Include="HQX\hq2x.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="HQX\hq3x.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="HQX\hq4x.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="HQX\init.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="IpsPatcher.cpp" />
+    <ClCompile Include="KreedSaiEagle\2xSai.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="KreedSaiEagle\Super2xSai.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="KreedSaiEagle\SuperEagle.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="md5.cpp" />
+    <ClCompile Include="miniz.cpp" />
+    <ClCompile Include="nes_ntsc.cpp" />
+    <ClCompile Include="PlatformUtilities.cpp" />
+    <ClCompile Include="PNGHelper.cpp" />
+    <ClCompile Include="AutoResetEvent.cpp" />
+    <ClCompile Include="Scale2x\scale2x.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="Scale2x\scale3x.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="Scale2x\scalebit.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="sha1.cpp" />
+    <ClCompile Include="SimpleLock.cpp" />
+    <ClCompile Include="Socket.cpp" />
+    <ClCompile Include="stb_vorbis.cpp" />
+    <ClCompile Include="stdafx.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">Create</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="SZReader.cpp" />
+    <ClCompile Include="Timer.cpp" />
+    <ClCompile Include="UPnPPortMapper.cpp" />
+    <ClCompile Include="UpsPatcher.cpp" />
+    <ClCompile Include="UTF8Util.cpp" />
+    <ClCompile Include="VirtualFile.cpp" />
+    <ClCompile Include="xBRZ\xbrz.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="ZipReader.cpp" />
+    <ClCompile Include="ZipWriter.cpp" />
+    <ClCompile Include="ZmbvCodec.cpp" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/Utilities/Utilities.vcxproj.filters b/Utilities/Utilities.vcxproj.filters
new file mode 100644
index 0000000..3b173eb
--- /dev/null
+++ b/Utilities/Utilities.vcxproj.filters
@@ -0,0 +1,296 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="xBRZ">
+      <UniqueIdentifier>{34df7dd9-5f1b-4aec-9212-1b70f1fada59}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="HQX">
+      <UniqueIdentifier>{c29925fd-7698-4db8-a328-73ef7f8993a9}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Scale2x">
+      <UniqueIdentifier>{87329bd1-28ac-4ced-a4c2-b51777018d16}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="KreedSaiEagle">
+      <UniqueIdentifier>{8e159744-fb91-4e16-aa82-8d8703ba2762}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Avi">
+      <UniqueIdentifier>{8b0e23bf-1bd9-4cc1-8046-784fd01e8688}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Patches">
+      <UniqueIdentifier>{6d519bc1-7c40-448a-95d2-9ad94cd20644}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="stdafx.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="targetver.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="Timer.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="FolderUtilities.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="Socket.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="SimpleLock.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="UPnPPortMapper.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="CRC32.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="miniz.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="UTF8Util.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="PNGHelper.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="AutoResetEvent.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="md5.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="ZipWriter.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="ZipReader.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="LowPassFilter.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="blip_buf.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="nes_ntsc.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="nes_ntsc_config.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="nes_ntsc_impl.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="xBRZ\config.h">
+      <Filter>xBRZ</Filter>
+    </ClInclude>
+    <ClInclude Include="xBRZ\xbrz.h">
+      <Filter>xBRZ</Filter>
+    </ClInclude>
+    <ClInclude Include="HQX\hqx.h">
+      <Filter>HQX</Filter>
+    </ClInclude>
+    <ClInclude Include="HQX\common.h">
+      <Filter>HQX</Filter>
+    </ClInclude>
+    <ClInclude Include="Scale2x\scale2x.h">
+      <Filter>Scale2x</Filter>
+    </ClInclude>
+    <ClInclude Include="Scale2x\scale3x.h">
+      <Filter>Scale2x</Filter>
+    </ClInclude>
+    <ClInclude Include="Scale2x\scalebit.h">
+      <Filter>Scale2x</Filter>
+    </ClInclude>
+    <ClInclude Include="KreedSaiEagle\SaiEagle.h">
+      <Filter>KreedSaiEagle</Filter>
+    </ClInclude>
+    <ClInclude Include="SZReader.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="PlatformUtilities.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="HexUtilities.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="ZmbvCodec.h">
+      <Filter>Avi</Filter>
+    </ClInclude>
+    <ClInclude Include="AviWriter.h">
+      <Filter>Avi</Filter>
+    </ClInclude>
+    <ClInclude Include="BaseCodec.h">
+      <Filter>Avi</Filter>
+    </ClInclude>
+    <ClInclude Include="RawCodec.h">
+      <Filter>Avi</Filter>
+    </ClInclude>
+    <ClInclude Include="CamstudioCodec.h">
+      <Filter>Avi</Filter>
+    </ClInclude>
+    <ClInclude Include="StringUtilities.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="IpsPatcher.h">
+      <Filter>Patches</Filter>
+    </ClInclude>
+    <ClInclude Include="UpsPatcher.h">
+      <Filter>Patches</Filter>
+    </ClInclude>
+    <ClInclude Include="BpsPatcher.h">
+      <Filter>Patches</Filter>
+    </ClInclude>
+    <ClInclude Include="orfanidis_eq.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="sha1.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="ArchiveReader.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="stb_vorbis.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="Base64.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="VirtualFile.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="FastString.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="stdafx.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="FolderUtilities.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="miniz.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="SimpleLock.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="Socket.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="UPnPPortMapper.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="UTF8Util.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="Timer.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="CRC32.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="PNGHelper.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="AutoResetEvent.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="md5.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="ZipWriter.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="ZipReader.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="blip_buf.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="nes_ntsc.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="xBRZ\xbrz.cpp">
+      <Filter>xBRZ</Filter>
+    </ClCompile>
+    <ClCompile Include="HQX\hq2x.cpp">
+      <Filter>HQX</Filter>
+    </ClCompile>
+    <ClCompile Include="HQX\hq3x.cpp">
+      <Filter>HQX</Filter>
+    </ClCompile>
+    <ClCompile Include="HQX\hq4x.cpp">
+      <Filter>HQX</Filter>
+    </ClCompile>
+    <ClCompile Include="HQX\init.cpp">
+      <Filter>HQX</Filter>
+    </ClCompile>
+    <ClCompile Include="Scale2x\scale2x.cpp">
+      <Filter>Scale2x</Filter>
+    </ClCompile>
+    <ClCompile Include="Scale2x\scale3x.cpp">
+      <Filter>Scale2x</Filter>
+    </ClCompile>
+    <ClCompile Include="Scale2x\scalebit.cpp">
+      <Filter>Scale2x</Filter>
+    </ClCompile>
+    <ClCompile Include="KreedSaiEagle\SuperEagle.cpp">
+      <Filter>KreedSaiEagle</Filter>
+    </ClCompile>
+    <ClCompile Include="KreedSaiEagle\Super2xSai.cpp">
+      <Filter>KreedSaiEagle</Filter>
+    </ClCompile>
+    <ClCompile Include="KreedSaiEagle\2xSai.cpp">
+      <Filter>KreedSaiEagle</Filter>
+    </ClCompile>
+    <ClCompile Include="SZReader.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="ArchiveReader.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="PlatformUtilities.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="HexUtilities.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="ZmbvCodec.cpp">
+      <Filter>Avi</Filter>
+    </ClCompile>
+    <ClCompile Include="AviWriter.cpp">
+      <Filter>Avi</Filter>
+    </ClCompile>
+    <ClCompile Include="CamstudioCodec.cpp">
+      <Filter>Avi</Filter>
+    </ClCompile>
+    <ClCompile Include="UpsPatcher.cpp">
+      <Filter>Patches</Filter>
+    </ClCompile>
+    <ClCompile Include="IpsPatcher.cpp">
+      <Filter>Patches</Filter>
+    </ClCompile>
+    <ClCompile Include="BpsPatcher.cpp">
+      <Filter>Patches</Filter>
+    </ClCompile>
+    <ClCompile Include="sha1.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="stb_vorbis.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="VirtualFile.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/Utilities/VirtualFile.cpp b/Utilities/VirtualFile.cpp
new file mode 100644
index 0000000..59b7676
--- /dev/null
+++ b/Utilities/VirtualFile.cpp
@@ -0,0 +1,168 @@
+#include "stdafx.h"
+#include <algorithm>
+#include <iterator>
+#include "VirtualFile.h"
+#include "../Utilities/sha1.h"
+#include "../Utilities/ArchiveReader.h"
+#include "../Utilities/StringUtilities.h"
+#include "../Utilities/FolderUtilities.h"
+#include "../Utilities/BpsPatcher.h"
+#include "../Utilities/IpsPatcher.h"
+#include "../Utilities/UpsPatcher.h"
+
+//TODO
+const std::initializer_list<string> VirtualFile::RomExtensions = { ".nes", ".fds", ".nsf", ".nsfe", "*.unf", "*.unif" };
+
+VirtualFile::VirtualFile()
+{
+}
+
+VirtualFile::VirtualFile(const string & archivePath, const string innerFile)
+{
+	_path = archivePath;
+	_innerFile = innerFile;
+}
+
+VirtualFile::VirtualFile(const string & file)
+{
+	vector<string> tokens = StringUtilities::Split(file, '\x1');
+	_path = tokens[0];
+	if(tokens.size() > 1) {
+		_innerFile = tokens[1];
+		if(tokens.size() > 2) {
+			try {
+				_innerFileIndex = std::stoi(tokens[2]);
+			} catch(std::exception) {}
+		}
+	}
+}
+
+VirtualFile::VirtualFile(const void *buffer, size_t bufferSize, string fileName)
+{
+	_path = fileName;
+
+	_data.resize(bufferSize);
+	memcpy(_data.data(), buffer, bufferSize);
+}
+
+VirtualFile::VirtualFile(std::istream & input, string filePath)
+{
+	_path = filePath;
+	FromStream(input, _data);
+}
+
+VirtualFile::operator std::string() const
+{
+	if(_innerFile.empty()) {
+		return _path;
+	} else if(_path.empty()) {
+		throw std::runtime_error("Cannot convert to string");
+	} else {
+		return _path + "\x1" + _innerFile;
+	}
+}
+
+void VirtualFile::FromStream(std::istream & input, vector<uint8_t>& output)
+{
+	input.seekg(0, std::ios::end);
+	uint32_t fileSize = (uint32_t)input.tellg();
+	input.seekg(0, std::ios::beg);
+
+	output.resize(fileSize, 0);
+	input.read((char*)output.data(), fileSize);
+}
+
+void VirtualFile::LoadFile()
+{
+	if(_data.size() == 0) {
+		if(!_innerFile.empty()) {
+			shared_ptr<ArchiveReader> reader = ArchiveReader::GetReader(_path);
+			if(reader) {
+				if(_innerFileIndex >= 0) {
+					vector<string> filelist = reader->GetFileList(VirtualFile::RomExtensions);
+					if((int32_t)filelist.size() > _innerFileIndex) {
+						reader->ExtractFile(filelist[_innerFileIndex], _data);
+					}
+				} else {
+					reader->ExtractFile(_innerFile, _data);
+				}
+			}
+		} else {
+			ifstream input(_path, std::ios::in | std::ios::binary);
+			if(input.good()) {
+				FromStream(input, _data);
+			}
+		}
+	}
+}
+
+bool VirtualFile::IsValid()
+{
+	LoadFile();
+	return _data.size() > 0;
+}
+
+string VirtualFile::GetFilePath()
+{
+	return _path;
+}
+
+string VirtualFile::GetFolderPath()
+{
+	return FolderUtilities::GetFolderName(_path);
+}
+
+string VirtualFile::GetFileName()
+{
+	return _innerFile.empty() ? FolderUtilities::GetFilename(_path, true) : _innerFile;
+}
+
+string VirtualFile::GetSha1Hash()
+{
+	LoadFile();
+	return SHA1::GetHash(_data);
+}
+
+bool VirtualFile::ReadFile(vector<uint8_t>& out)
+{
+	LoadFile();
+	if(_data.size() > 0) {
+		out.resize(_data.size(), 0);
+		std::copy(_data.begin(), _data.end(), out.begin());
+		return true;
+	}
+	return false;
+}
+
+bool VirtualFile::ReadFile(std::stringstream & out)
+{
+	LoadFile();
+	if(_data.size() > 0) {
+		out.write((char*)_data.data(), _data.size());
+		return true;
+	}
+	return false;
+}
+
+bool VirtualFile::ApplyPatch(VirtualFile &patch)
+{
+	//Apply patch file
+	bool result = false;
+	if(patch.IsValid() && patch._data.size() >= 5) {
+		vector<uint8_t> patchedData;
+		std::stringstream ss;
+		patch.ReadFile(ss);
+
+		if(memcmp(patch._data.data(), "PATCH", 5) == 0) {
+			result = IpsPatcher::PatchBuffer(ss, _data, patchedData);
+		} else if(memcmp(patch._data.data(), "UPS1", 4) == 0) {
+			result = UpsPatcher::PatchBuffer(ss, _data, patchedData);
+		} else if(memcmp(patch._data.data(), "BPS1", 4) == 0) {
+			result = BpsPatcher::PatchBuffer(ss, _data, patchedData);
+		}
+		if(result) {
+			_data = patchedData;
+		}
+	}
+	return result;
+}
diff --git a/Utilities/VirtualFile.h b/Utilities/VirtualFile.h
new file mode 100644
index 0000000..590bf1c
--- /dev/null
+++ b/Utilities/VirtualFile.h
@@ -0,0 +1,38 @@
+#pragma once
+#include "stdafx.h"
+#include <sstream>
+
+class VirtualFile
+{
+private:
+	string _path = "";
+	string _innerFile = "";
+	int32_t _innerFileIndex = -1;
+	vector<uint8_t> _data;
+
+	void FromStream(std::istream &input, vector<uint8_t> &output);
+
+	void LoadFile();
+
+public:
+	static const std::initializer_list<string> RomExtensions;
+
+	VirtualFile();
+	VirtualFile(const string &archivePath, const string innerFile);
+	VirtualFile(const string &file);
+	VirtualFile(const void *buffer, size_t bufferSize, string fileName = "noname");
+	VirtualFile(std::istream &input, string filePath);
+
+	operator std::string() const;
+	
+	bool IsValid();
+	string GetFilePath();
+	string GetFolderPath();
+	string GetFileName();
+	string GetSha1Hash();
+
+	bool ReadFile(vector<uint8_t> &out);
+	bool ReadFile(std::stringstream &out);
+
+	bool ApplyPatch(VirtualFile &patch);
+};
\ No newline at end of file
diff --git a/Utilities/ZipReader.cpp b/Utilities/ZipReader.cpp
new file mode 100644
index 0000000..9023eb1
--- /dev/null
+++ b/Utilities/ZipReader.cpp
@@ -0,0 +1,66 @@
+#include "stdafx.h"
+#include <string.h>
+#include <sstream>
+#include "ZipReader.h"
+
+ZipReader::ZipReader()
+{
+	memset(&_zipArchive, 0, sizeof(mz_zip_archive));
+}
+
+ZipReader::~ZipReader()
+{
+	if(_initialized) {
+		mz_zip_reader_end(&_zipArchive);
+	}
+}
+
+bool ZipReader::InternalLoadArchive(void* buffer, size_t size)
+{
+	if(_initialized) {
+		mz_zip_reader_end(&_zipArchive);
+		memset(&_zipArchive, 0, sizeof(mz_zip_archive));
+		_initialized = false;
+	}
+
+	return mz_zip_reader_init_mem(&_zipArchive, buffer, size, 0) != 0;
+}
+
+vector<string> ZipReader::InternalGetFileList()
+{
+	vector<string> fileList;
+	if(_initialized) {
+		for(int i = 0, len = (int)mz_zip_reader_get_num_files(&_zipArchive); i < len; i++) {
+			mz_zip_archive_file_stat file_stat;
+			if(!mz_zip_reader_file_stat(&_zipArchive, i, &file_stat)) {
+				std::cout << "mz_zip_reader_file_stat() failed!" << std::endl;
+			}
+
+			fileList.push_back(file_stat.m_filename);
+		}
+	}
+	return fileList;
+}
+
+bool ZipReader::ExtractFile(string filename, vector<uint8_t> &output)
+{
+	if(_initialized) {
+		size_t uncompSize;
+		void *p = mz_zip_reader_extract_file_to_heap(&_zipArchive, filename.c_str(), &uncompSize, 0);
+		if(!p) {
+#ifdef _DEBUG
+			std::cout << "mz_zip_reader_extract_file_to_heap() failed!" << std::endl;
+#endif
+			return false;
+		}
+
+		output = vector<uint8_t>((uint8_t*)p, (uint8_t*)p + uncompSize);
+
+		// We're done.
+		mz_free(p);
+
+		return true;
+	}
+
+	return false;
+}
\ No newline at end of file
diff --git a/Utilities/ZipReader.h b/Utilities/ZipReader.h
new file mode 100644
index 0000000..4d4573e
--- /dev/null
+++ b/Utilities/ZipReader.h
@@ -0,0 +1,20 @@
+#pragma once
+#include "stdafx.h"
+#include "miniz.h"
+#include "ArchiveReader.h"
+
+class ZipReader : public ArchiveReader
+{
+private:
+	mz_zip_archive _zipArchive;
+
+protected:
+	bool InternalLoadArchive(void* buffer, size_t size);
+	vector<string> InternalGetFileList();
+
+public:
+	ZipReader();
+	virtual ~ZipReader();
+
+	bool ExtractFile(string filename, vector<uint8_t> &output);
+};
\ No newline at end of file
diff --git a/Utilities/ZipWriter.cpp b/Utilities/ZipWriter.cpp
new file mode 100644
index 0000000..37ef6b9
--- /dev/null
+++ b/Utilities/ZipWriter.cpp
@@ -0,0 +1,54 @@
+#include "stdafx.h"
+#include <string>
+#include <cstring>
+#include <sstream>
+#include "ZipWriter.h"
+#include "FolderUtilities.h"
+
+ZipWriter::ZipWriter()
+{
+}
+
+ZipWriter::~ZipWriter()
+{
+}
+
+bool ZipWriter::Initialize(string filename)
+{
+	_zipFilename = filename;
+	memset(&_zipArchive, 0, sizeof(mz_zip_archive));
+	return mz_zip_writer_init_file(&_zipArchive, _zipFilename.c_str(), 0) != 0;
+}
+
+bool ZipWriter::Save()
+{
+	bool result = mz_zip_writer_finalize_archive(&_zipArchive) != 0;
+	result &= mz_zip_writer_end(&_zipArchive) != 0;
+	return result;
+}
+
+void ZipWriter::AddFile(string filepath, string zipFilename)
+{
+	if(!mz_zip_writer_add_file(&_zipArchive, zipFilename.c_str(), filepath.c_str(), "", 0, MZ_BEST_COMPRESSION)) {
+		std::cout << "mz_zip_writer_add_file() failed!" << std::endl;
+	}
+}
+
+void ZipWriter::AddFile(vector<uint8_t> &fileData, string zipFilename)
+{
+	if(!mz_zip_writer_add_mem(&_zipArchive, zipFilename.c_str(), fileData.data(), fileData.size(), MZ_BEST_COMPRESSION)) {
+		std::cout << "mz_zip_writer_add_file() failed!" << std::endl;
+	}
+}
+
+void ZipWriter::AddFile(std::stringstream &filestream, string zipFilename)
+{
+	filestream.seekg(0, std::ios::end);
+	size_t bufferSize = (size_t)filestream.tellg();
+	filestream.seekg(0, std::ios::beg);
+
+	vector<uint8_t> buffer(bufferSize);
+	filestream.read((char*)buffer.data(), bufferSize);
+
+	AddFile(buffer, zipFilename);
+}
diff --git a/Utilities/ZipWriter.h b/Utilities/ZipWriter.h
new file mode 100644
index 0000000..45b111c
--- /dev/null
+++ b/Utilities/ZipWriter.h
@@ -0,0 +1,21 @@
+#pragma once
+#include "stdafx.h"
+#include "miniz.h"
+
+class ZipWriter
+{
+private:
+	mz_zip_archive _zipArchive;
+	string _zipFilename;
+
+public:
+	ZipWriter();
+	~ZipWriter();
+
+	bool Initialize(string filename);
+	bool Save();
+
+	void AddFile(string filepath, string zipFilename);
+	void AddFile(vector<uint8_t> &fileData, string zipFilename);
+	void AddFile(std::stringstream &filestream, string zipFilename);
+};
\ No newline at end of file
diff --git a/Utilities/ZmbvCodec.cpp b/Utilities/ZmbvCodec.cpp
new file mode 100644
index 0000000..9fe112d
--- /dev/null
+++ b/Utilities/ZmbvCodec.cpp
@@ -0,0 +1,408 @@
+// This file is a part of Mesen
+// It is a heavily modified version of the zmbv.h/cpp file found in DOSBox's code.
+
+/*
+ *  Copyright (C) 2002-2011  The DOSBox Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include "stdafx.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "miniz.h"
+#include "ZmbvCodec.h"
+
+#define DBZV_VERSION_HIGH 0
+#define DBZV_VERSION_LOW 1
+
+#define COMPRESSION_NONE 0
+#define COMPRESSION_ZLIB 1
+
+#define MAX_VECTOR	16
+
+#define Mask_KeyFrame			0x01
+#define	Mask_DeltaPalette		0x02
+
+int ZmbvCodec::NeededSize( int _width, int _height, zmbv_format_t _format) {
+	int f;
+	switch (_format) {
+	case ZMBV_FORMAT_8BPP:f = 1;break;
+	case ZMBV_FORMAT_15BPP:f = 2;break;
+	case ZMBV_FORMAT_16BPP:f = 2;break;
+	case ZMBV_FORMAT_32BPP:f = 4;break;
+	default:
+		return -1;
+	}
+	f = f*_width*_height + 2*(1+(_width/8)) * (1+(_height/8))+1024;
+	return f + f/1000;
+}
+
+bool ZmbvCodec::SetupBuffers(zmbv_format_t _format, int blockwidth, int blockheight) {
+	FreeBuffers();
+	palsize = 0;
+	switch (_format) {
+	case ZMBV_FORMAT_8BPP:
+		pixelsize = 1;
+		palsize = 256;
+		break;
+	case ZMBV_FORMAT_15BPP:
+		pixelsize = 2;
+		break;
+	case ZMBV_FORMAT_16BPP:
+		pixelsize = 2;
+		break;
+	case ZMBV_FORMAT_32BPP:
+		pixelsize = 4;
+		break;
+	default:
+		return false;
+	};
+	bufsize = (height+2*MAX_VECTOR)*pitch*pixelsize+2048;
+
+	buf1 = new unsigned char[bufsize];
+	buf2 = new unsigned char[bufsize];
+	work = new unsigned char[bufsize];
+
+	int xblocks = (width/blockwidth);
+	int xleft = width % blockwidth;
+	if (xleft) xblocks++;
+	int yblocks = (height/blockheight);
+	int yleft = height % blockheight;
+	if (yleft) yblocks++;
+	blockcount=yblocks*xblocks;
+	blocks=new FrameBlock[blockcount];
+
+	if (!buf1 || !buf2 || !work || !blocks) {
+		FreeBuffers();
+		return false;
+	}
+	int y,x,i;
+	i=0;
+	for (y=0;y<yblocks;y++) {
+		for (x=0;x<xblocks;x++) {
+			blocks[i].start=((y*blockheight)+MAX_VECTOR)*pitch+
+				(x*blockwidth)+MAX_VECTOR;
+			if (xleft && x==(xblocks-1)) {
+                blocks[i].dx=xleft;
+			} else {
+				blocks[i].dx=blockwidth;
+			}
+			if (yleft && y==(yblocks-1)) {
+                blocks[i].dy=yleft;
+			} else {
+				blocks[i].dy=blockheight;
+			}
+			i++;
+		}
+	}
+
+	memset(buf1,0,bufsize);
+	memset(buf2,0,bufsize);
+	memset(work,0,bufsize);
+	oldframe=buf1;
+	newframe=buf2;
+	format = _format;
+
+	_bufSize = NeededSize(width, height, format);
+	_buf = new uint8_t[_bufSize];
+
+	return true;
+}
+
+void ZmbvCodec::CreateVectorTable(void) {
+	int x,y,s;
+	VectorCount=1;
+
+	VectorTable[0].x=VectorTable[0].y=0;
+	for (s=1;s<=10;s++) {
+		for (y=0-s;y<=0+s;y++) for (x=0-s;x<=0+s;x++) {
+			if (abs(x)==s || abs(y)==s) {
+				VectorTable[VectorCount].x=x;
+				VectorTable[VectorCount].y=y;
+				VectorCount++;
+			}
+		}
+	}
+}
+
+template<class P>
+INLINE int ZmbvCodec::PossibleBlock(int vx,int vy,FrameBlock * block) {
+	int ret=0;
+	P * pold=((P*)oldframe)+block->start+(vy*pitch)+vx;
+	P * pnew=((P*)newframe)+block->start;;	
+	for (int y=0;y<block->dy;y+=4) {
+		for (int x=0;x<block->dx;x+=4) {
+			int test=0-((pold[x]-pnew[x])&0x00ffffff);
+			ret-=(test>>31);
+		}
+		pold+=pitch*4;
+		pnew+=pitch*4;
+	}
+	return ret;
+}
+
+template<class P>
+INLINE int ZmbvCodec::CompareBlock(int vx,int vy,FrameBlock * block) {
+	int ret=0;
+	P * pold=((P*)oldframe)+block->start+(vy*pitch)+vx;
+	P * pnew=((P*)newframe)+block->start;;	
+	for (int y=0;y<block->dy;y++) {
+		for (int x=0;x<block->dx;x++) {
+			int test=0-((pold[x]-pnew[x])&0x00ffffff);
+			ret-=(test>>31);
+		}
+		pold+=pitch;
+		pnew+=pitch;
+	}
+	return ret;
+}
+
+template<class P>
+INLINE void ZmbvCodec::AddXorBlock(int vx,int vy,FrameBlock * block) {
+	P * pold=((P*)oldframe)+block->start+(vy*pitch)+vx;
+	P * pnew=((P*)newframe)+block->start;
+	for (int y=0;y<block->dy;y++) {
+		for (int x=0;x<block->dx;x++) {
+			*((P*)&work[workUsed])=pnew[x] ^ pold[x];
+			workUsed+=sizeof(P);
+		}
+		pold+=pitch;
+		pnew+=pitch;
+	}
+}
+
+template<class P>
+void ZmbvCodec::AddXorFrame(void) {
+	signed char * vectors=(signed char*)&work[workUsed];
+	/* Align the following xor data on 4 byte boundary*/
+	workUsed=(workUsed + blockcount*2 +3) & ~3;
+	for (int b=0;b<blockcount;b++) {
+		FrameBlock * block=&blocks[b];
+		int bestvx = 0;
+		int bestvy = 0;
+		int bestchange=CompareBlock<P>(0,0, block);
+		int possibles=64;
+		for (int v=0;v<VectorCount && possibles;v++) {
+			if (bestchange<4) break;
+			int vx = VectorTable[v].x;
+			int vy = VectorTable[v].y;
+			if (PossibleBlock<P>(vx, vy, block) < 4) {
+				possibles--;
+				int testchange=CompareBlock<P>(vx,vy, block);
+				if (testchange<bestchange) {
+					bestchange=testchange;
+					bestvx = vx;
+					bestvy = vy;
+				}
+			}
+		}
+		vectors[b*2+0]=(bestvx << 1);
+		vectors[b*2+1]=(bestvy << 1);
+		if (bestchange) {
+			vectors[b*2+0]|=1;
+			AddXorBlock<P>(bestvx, bestvy, block);
+		}
+	}
+}
+
+bool ZmbvCodec::SetupCompress( int _width, int _height, uint32_t compressionLevel ) {
+	width = _width;
+	height = _height;
+	pitch = _width + 2*MAX_VECTOR;
+	format = ZMBV_FORMAT_NONE;
+	if (deflateInit (&zstream, compressionLevel) != Z_OK)
+		return false;
+
+	return true;
+}
+
+bool ZmbvCodec::PrepareCompressFrame(int flags, zmbv_format_t _format, char * pal)
+{
+	int i;
+	unsigned char *firstByte;
+
+	if (_format != format) {
+		if (!SetupBuffers( _format, 16, 16))
+			return false;
+		flags|=1;	//Force a keyframe
+	}
+	/* replace oldframe with new frame */
+	unsigned char *copyFrame = newframe;
+	newframe = oldframe;
+	oldframe = copyFrame;
+
+	compressInfo.linesDone = 0;
+	compressInfo.writeSize = _bufSize;
+	compressInfo.writeDone = 1;
+	compressInfo.writeBuf = (unsigned char *)_buf;
+	/* Set a pointer to the first byte which will contain info about this frame */
+	firstByte = compressInfo.writeBuf;
+	*firstByte = 0;
+	//Reset the work buffer
+	workUsed = 0;workPos = 0;
+	if (flags & 1) {
+		/* Make a keyframe */
+		*firstByte |= Mask_KeyFrame;
+		KeyframeHeader * header = (KeyframeHeader *)(compressInfo.writeBuf + compressInfo.writeDone);
+		header->high_version = DBZV_VERSION_HIGH;
+		header->low_version = DBZV_VERSION_LOW;
+		header->compression = COMPRESSION_ZLIB;
+		header->format = format;
+		header->blockwidth = 16;
+		header->blockheight = 16;
+		compressInfo.writeDone += sizeof(KeyframeHeader);
+		/* Copy the new frame directly over */
+		if (palsize) {
+			if (pal)
+				memcpy(&palette, pal, sizeof(palette));
+			else 
+				memset(&palette,0, sizeof(palette));
+			/* keyframes get the full palette */
+			for (i=0;i<palsize;i++) {
+				work[workUsed++] = palette[i*4+0];
+				work[workUsed++] = palette[i*4+1];
+				work[workUsed++] = palette[i*4+2];
+			}
+		}
+		/* Restart deflate */
+		deflateReset(&zstream);
+	} else {
+		if (palsize && pal && memcmp(pal, palette, palsize * 4)) {
+			*firstByte |= Mask_DeltaPalette;
+			for(i=0;i<palsize;i++) {
+				work[workUsed++]=palette[i*4+0] ^ pal[i*4+0];
+				work[workUsed++]=palette[i*4+1] ^ pal[i*4+1];
+				work[workUsed++]=palette[i*4+2] ^ pal[i*4+2];
+			}
+			memcpy(&palette,pal, palsize * 4);
+		}
+	}
+	return true;
+}
+
+void ZmbvCodec::CompressLines(int lineCount, void *lineData[])
+{
+	int linePitch = pitch * pixelsize;
+	int lineWidth = width * pixelsize;
+	int i = 0;
+	unsigned char *destStart = newframe + pixelsize*(MAX_VECTOR+(compressInfo.linesDone+MAX_VECTOR)*pitch);
+	while ( i < lineCount && (compressInfo.linesDone < height)) {
+		memcpy(destStart, lineData[i],  lineWidth );
+		destStart += linePitch;
+		i++; compressInfo.linesDone++;
+	}
+}
+
+int ZmbvCodec::FinishCompressFrame(uint8_t** compressedData)
+{
+	unsigned char firstByte = *compressInfo.writeBuf;
+	if (firstByte & Mask_KeyFrame) {
+		int i;
+		/* Add the full frame data */
+		unsigned char * readFrame = newframe + pixelsize*(MAX_VECTOR+MAX_VECTOR*pitch);	
+		for (i=0;i<height;i++) {
+			memcpy(&work[workUsed], readFrame, width*pixelsize);
+			readFrame += pitch*pixelsize;
+			workUsed += width*pixelsize;
+		}
+	} else {
+		/* Add the delta frame data */
+		switch (format) {
+			case ZMBV_FORMAT_8BPP:
+				AddXorFrame<int8_t>();
+				break;
+			case ZMBV_FORMAT_15BPP:
+			case ZMBV_FORMAT_16BPP:
+				AddXorFrame<int16_t>();
+				break;
+
+			default:
+			case ZMBV_FORMAT_32BPP:
+				AddXorFrame<int32_t>();
+				break;
+		}
+	}
+	/* Create the actual frame with compression */
+	zstream.next_in = (Bytef *)work;
+	zstream.avail_in = workUsed;
+	zstream.total_in = 0;
+
+	zstream.next_out = (Bytef *)(compressInfo.writeBuf + compressInfo.writeDone);
+	zstream.avail_out = compressInfo.writeSize - compressInfo.writeDone;
+	zstream.total_out = 0;
+	
+	deflate(&zstream, Z_SYNC_FLUSH);
+
+	*compressedData = _buf;
+
+	return compressInfo.writeDone + zstream.total_out;
+}
+
+void ZmbvCodec::FreeBuffers()
+{
+	if (blocks) {
+		delete[] blocks;
+		blocks= nullptr;
+	}
+	if (buf1) {
+		delete[] buf1;
+		buf1= nullptr;
+	}
+	if (buf2) {
+		delete[] buf2;
+		buf2= nullptr;
+	}
+	if (work) {
+		delete[] work;
+		work= nullptr;
+	}
+	if(_buf) {
+		delete[] _buf;
+		_buf = nullptr;
+	}
+}
+
+ZmbvCodec::ZmbvCodec() 
+{
+	CreateVectorTable();
+	blocks = nullptr;
+	buf1 = nullptr;
+	buf2 = nullptr;
+	work = nullptr;
+	memset( &zstream, 0, sizeof(zstream));
+}
+
+int ZmbvCodec::CompressFrame(bool isKeyFrame, uint8_t *frameData, uint8_t** compressedData)
+{
+	if(!PrepareCompressFrame(isKeyFrame ? 1 : 0, ZMBV_FORMAT_32BPP, nullptr)) {
+		return -1;
+	}
+
+	for(int i = 0; i < height; i++) {
+		void * rowPointer = frameData + i*width*4;
+		CompressLines(1, &rowPointer);
+	}
+
+	return FinishCompressFrame(compressedData);
+}
+
+const char* ZmbvCodec::GetFourCC()
+{
+	return "ZMBV";
+}
\ No newline at end of file
diff --git a/Utilities/ZmbvCodec.h b/Utilities/ZmbvCodec.h
new file mode 100644
index 0000000..5e3c285
--- /dev/null
+++ b/Utilities/ZmbvCodec.h
@@ -0,0 +1,115 @@
+// This file is a part of Mesen
+// It is a heavily modified version of the zmbv.h/cpp file found in DOSBox's code.
+
+/*
+ *  Copyright (C) 2002-2011  The DOSBox Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#pragma once
+
+#include "BaseCodec.h"
+#include "miniz.h"
+
+#ifdef _MSC_VER
+#define INLINE __forceinline
+#else
+#define INLINE inline
+#endif
+
+typedef enum {
+	ZMBV_FORMAT_NONE		= 0x00,
+	ZMBV_FORMAT_1BPP		= 0x01,
+	ZMBV_FORMAT_2BPP		= 0x02,
+	ZMBV_FORMAT_4BPP		= 0x03,
+	ZMBV_FORMAT_8BPP		= 0x04,
+	ZMBV_FORMAT_15BPP	= 0x05,
+	ZMBV_FORMAT_16BPP	= 0x06,
+	ZMBV_FORMAT_24BPP	= 0x07,
+	ZMBV_FORMAT_32BPP	= 0x08
+} zmbv_format_t;
+
+class ZmbvCodec : public BaseCodec
+{
+private:
+	struct FrameBlock {
+		int start = 0;
+		int dx = 0,dy = 0;
+	};
+	struct CodecVector {
+		int x = 0,y = 0;
+		int slot = 0;
+	};
+	struct KeyframeHeader {
+		unsigned char high_version = 0;
+		unsigned char low_version = 0;
+		unsigned char compression = 0;
+		unsigned char format = 0;
+		unsigned char blockwidth = 0,blockheight = 0;
+	};
+
+	struct {
+		int		linesDone = 0;
+		int		writeSize = 0;
+		int		writeDone = 0;
+		unsigned char	*writeBuf = nullptr;
+	} compressInfo;
+
+	CodecVector VectorTable[512] = {};
+	int VectorCount = 0;
+
+	unsigned char *oldframe=nullptr, *newframe=nullptr;
+	unsigned char *buf1=nullptr, *buf2=nullptr, *work=nullptr;
+	int bufsize = 0;
+
+	int blockcount = 0; 
+	FrameBlock * blocks = nullptr;
+
+	int workUsed = 0, workPos = 0;
+
+	int palsize = 0;
+	char palette[256*4] = {};
+	int height = 0, width = 0, pitch = 0;
+	zmbv_format_t format = zmbv_format_t::ZMBV_FORMAT_NONE;
+	int pixelsize = 0;
+
+	uint8_t* _buf = nullptr;
+	uint32_t _bufSize = 0;
+
+	z_stream zstream = {};
+
+	// methods
+	void FreeBuffers(void);
+	void CreateVectorTable(void);
+	bool SetupBuffers(zmbv_format_t format, int blockwidth, int blockheight);
+
+	template<class P> void AddXorFrame(void);
+	template<class P> INLINE int PossibleBlock(int vx,int vy,FrameBlock * block);
+	template<class P> INLINE int CompareBlock(int vx,int vy,FrameBlock * block);
+	template<class P> INLINE void AddXorBlock(int vx,int vy,FrameBlock * block);
+
+	int NeededSize(int _width, int _height, zmbv_format_t _format);
+
+	void CompressLines(int lineCount, void *lineData[]);
+	bool PrepareCompressFrame(int flags, zmbv_format_t _format, char * pal);
+	int FinishCompressFrame(uint8_t** compressedData);
+
+public:
+	ZmbvCodec();
+	bool SetupCompress(int _width, int _height, uint32_t compressionLevel) override;
+	int CompressFrame(bool isKeyFrame, uint8_t *frameData, uint8_t** compressedData) override;
+	const char* GetFourCC() override;
+};
diff --git a/Utilities/blip_buf.cpp b/Utilities/blip_buf.cpp
new file mode 100644
index 0000000..ad42438
--- /dev/null
+++ b/Utilities/blip_buf.cpp
@@ -0,0 +1,345 @@
+/* blip_buf 1.1.0. http://www.slack.net/~ant/ */
+
+#include "stdafx.h"
+#include "blip_buf.h"
+
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+
+/* Library Copyright (C) 2003-2009 Shay Green. This library is free software;
+you can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+library is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#if defined (BLARGG_TEST) && BLARGG_TEST
+	#include "blargg_test.h"
+#endif
+
+/* Equivalent to ULONG_MAX >= 0xFFFFFFFF00000000.
+Avoids constants that don't fit in 32 bits. */
+#if ULONG_MAX/0xFFFFFFFF > 0xFFFFFFFF
+	typedef unsigned long fixed_t;
+	enum { pre_shift = 32 };
+
+#elif defined(ULLONG_MAX)
+	typedef unsigned long long fixed_t;
+	enum { pre_shift = 32 };
+
+#else
+	typedef unsigned fixed_t;
+	enum { pre_shift = 0 };
+
+#endif
+
+enum { time_bits = pre_shift + 20 };
+
+static fixed_t const time_unit = (fixed_t) 1 << time_bits;
+
+enum { bass_shift  = 9 }; /* affects high-pass filter breakpoint frequency */
+enum { end_frame_extra = 2 }; /* allows deltas slightly after frame length */
+
+enum { half_width  = 8 };
+enum { buf_extra   = half_width*2 + end_frame_extra };
+enum { phase_bits  = 5 };
+enum { phase_count = 1 << phase_bits };
+enum { delta_bits  = 15 };
+enum { delta_unit  = 1 << delta_bits };
+enum { frac_bits = time_bits - pre_shift };
+
+/* We could eliminate avail and encode whole samples in offset, but that would
+limit the total buffered samples to blip_max_frame. That could only be
+increased by decreasing time_bits, which would reduce resample ratio accuracy.
+*/
+
+/** Sample buffer that resamples to output rate and accumulates samples
+until they're read out */
+struct blip_t
+{
+	fixed_t factor;
+	fixed_t offset;
+	int avail;
+	int size;
+	int integrator;
+};
+
+typedef int buf_t;
+
+/* probably not totally portable */
+#define SAMPLES( buf ) ((buf_t*) ((buf) + 1))
+
+/* Arithmetic (sign-preserving) right shift */
+#define ARITH_SHIFT( n, shift ) \
+	((n) >> (shift))
+
+enum { max_sample = +32767 };
+enum { min_sample = -32768 };
+
+#define CLAMP( n ) \
+	{\
+		if ( (short) n != n )\
+			n = ARITH_SHIFT( n, 16 ) ^ max_sample;\
+	}
+
+static void check_assumptions( void )
+{
+	int n;
+	
+	#if INT_MAX < 0x7FFFFFFF || UINT_MAX < 0xFFFFFFFF
+		#error "int must be at least 32 bits"
+	#endif
+	
+	assert( (-3 >> 1) == -2 ); /* right shift must preserve sign */
+	
+	n = max_sample * 2;
+	CLAMP( n );
+	assert( n == max_sample );
+	
+	n = min_sample * 2;
+	CLAMP( n );
+	assert( n == min_sample );
+	
+	assert( blip_max_ratio <= time_unit );
+	assert( blip_max_frame <= (fixed_t) -1 >> time_bits );
+}
+
+blip_t* blip_new( int size )
+{
+	blip_t* m;
+	assert( size >= 0 );
+	
+	m = (blip_t*) malloc( sizeof *m + (size + buf_extra) * sizeof (buf_t) );
+	if ( m )
+	{
+		m->factor = time_unit / blip_max_ratio;
+		m->size   = size;
+		blip_clear( m );
+		check_assumptions();
+	}
+	return m;
+}
+
+void blip_delete( blip_t* m )
+{
+	if ( m != NULL )
+	{
+		/* Clear fields in case user tries to use after freeing */
+		memset( m, 0, sizeof *m );
+		free( m );
+	}
+}
+
+void blip_set_rates( blip_t* m, double clock_rate, double sample_rate )
+{
+	double factor = time_unit * sample_rate / clock_rate;
+	m->factor = (fixed_t) factor;
+	
+	/* Fails if clock_rate exceeds maximum, relative to sample_rate */
+	assert( 0 <= factor - m->factor && factor - m->factor < 1 );
+	
+	/* Avoid requiring math.h. Equivalent to
+	m->factor = (int) ceil( factor ) */
+	if ( m->factor < factor )
+		m->factor++;
+	
+	/* At this point, factor is most likely rounded up, but could still
+	have been rounded down in the floating-point calculation. */
+}
+
+void blip_clear( blip_t* m )
+{
+	/* We could set offset to 0, factor/2, or factor-1. 0 is suitable if
+	factor is rounded up. factor-1 is suitable if factor is rounded down.
+	Since we don't know rounding direction, factor/2 accommodates either,
+	with the slight loss of showing an error in half the time. Since for
+	a 64-bit factor this is years, the halving isn't a problem. */
+	
+	m->offset     = m->factor / 2;
+	m->avail      = 0;
+	m->integrator = 0;
+	memset( SAMPLES( m ), 0, (m->size + buf_extra) * sizeof (buf_t) );
+}
+
+int blip_clocks_needed( const blip_t* m, int samples )
+{
+	fixed_t needed;
+	
+	/* Fails if buffer can't hold that many more samples */
+	assert( samples >= 0 && m->avail + samples <= m->size );
+	
+	needed = (fixed_t) samples * time_unit;
+	if ( needed < m->offset )
+		return 0;
+	
+	return (int)((needed - m->offset + m->factor - 1) / m->factor);
+}
+
+void blip_end_frame( blip_t* m, unsigned t )
+{
+	fixed_t off = t * m->factor + m->offset;
+	m->avail += off >> time_bits;
+	m->offset = off & (time_unit - 1);
+	
+	/* Fails if buffer size was exceeded */
+	assert( m->avail <= m->size );
+}
+
+int blip_samples_avail( const blip_t* m )
+{
+	return m->avail;
+}
+
+static void remove_samples( blip_t* m, int count )
+{
+	buf_t* buf = SAMPLES( m );
+	int remain = m->avail + buf_extra - count;
+	m->avail -= count;
+	
+	memmove( &buf [0], &buf [count], remain * sizeof buf [0] );
+	memset( &buf [remain], 0, count * sizeof buf [0] );
+}
+
+int blip_read_samples( blip_t* m, short out [], int count, int stereo )
+{
+	assert( count >= 0 );
+	
+	if ( count > m->avail )
+		count = m->avail;
+	
+	if ( count )
+	{
+		int const step = stereo ? 2 : 1;
+		buf_t const* in  = SAMPLES( m );
+		buf_t const* end = in + count;
+		int sum = m->integrator;
+		do
+		{
+			/* Eliminate fraction */
+			int s = ARITH_SHIFT( sum, delta_bits );
+			
+			sum += *in++;
+			
+			CLAMP( s );
+			
+			*out = s;
+			out += step;
+			
+			/* High-pass filter */
+			sum -= s << (delta_bits - bass_shift);
+		}
+		while ( in != end );
+		m->integrator = sum;
+		
+		remove_samples( m, count );
+	}
+	
+	return count;
+}
+
+/* Things that didn't help performance on x86:
+	__attribute__((aligned(128)))
+	#define short int
+	restrict
+*/
+
+/* Sinc_Generator( 0.9, 0.55, 4.5 ) */
+static short const bl_step [phase_count + 1] [half_width] =
+{
+{   43, -115,  350, -488, 1136, -914, 5861,21022},
+{   44, -118,  348, -473, 1076, -799, 5274,21001},
+{   45, -121,  344, -454, 1011, -677, 4706,20936},
+{   46, -122,  336, -431,  942, -549, 4156,20829},
+{   47, -123,  327, -404,  868, -418, 3629,20679},
+{   47, -122,  316, -375,  792, -285, 3124,20488},
+{   47, -120,  303, -344,  714, -151, 2644,20256},
+{   46, -117,  289, -310,  634,  -17, 2188,19985},
+{   46, -114,  273, -275,  553,  117, 1758,19675},
+{   44, -108,  255, -237,  471,  247, 1356,19327},
+{   43, -103,  237, -199,  390,  373,  981,18944},
+{   42,  -98,  218, -160,  310,  495,  633,18527},
+{   40,  -91,  198, -121,  231,  611,  314,18078},
+{   38,  -84,  178,  -81,  153,  722,   22,17599},
+{   36,  -76,  157,  -43,   80,  824, -241,17092},
+{   34,  -68,  135,   -3,    8,  919, -476,16558},
+{   32,  -61,  115,   34,  -60, 1006, -683,16001},
+{   29,  -52,   94,   70, -123, 1083, -862,15422},
+{   27,  -44,   73,  106, -184, 1152,-1015,14824},
+{   25,  -36,   53,  139, -239, 1211,-1142,14210},
+{   22,  -27,   34,  170, -290, 1261,-1244,13582},
+{   20,  -20,   16,  199, -335, 1301,-1322,12942},
+{   18,  -12,   -3,  226, -375, 1331,-1376,12293},
+{   15,   -4,  -19,  250, -410, 1351,-1408,11638},
+{   13,    3,  -35,  272, -439, 1361,-1419,10979},
+{   11,    9,  -49,  292, -464, 1362,-1410,10319},
+{    9,   16,  -63,  309, -483, 1354,-1383, 9660},
+{    7,   22,  -75,  322, -496, 1337,-1339, 9005},
+{    6,   26,  -85,  333, -504, 1312,-1280, 8355},
+{    4,   31,  -94,  341, -507, 1278,-1205, 7713},
+{    3,   35, -102,  347, -506, 1238,-1119, 7082},
+{    1,   40, -110,  350, -499, 1190,-1021, 6464},
+{    0,   43, -115,  350, -488, 1136, -914, 5861}
+};
+
+/* Shifting by pre_shift allows calculation using unsigned int rather than
+possibly-wider fixed_t. On 32-bit platforms, this is likely more efficient.
+And by having pre_shift 32, a 32-bit platform can easily do the shift by
+simply ignoring the low half. */
+
+void blip_add_delta( blip_t* m, unsigned time, int delta )
+{
+	unsigned fixed = (unsigned) ((time * m->factor + m->offset) >> pre_shift);
+	buf_t* out = SAMPLES( m ) + m->avail + (fixed >> frac_bits);
+	
+	int const phase_shift = frac_bits - phase_bits;
+	int phase = fixed >> phase_shift & (phase_count - 1);
+	short const* in  = bl_step [phase];
+	short const* rev = bl_step [phase_count - phase];
+	
+	int interp = fixed >> (phase_shift - delta_bits) & (delta_unit - 1);
+	int delta2 = (delta * interp) >> delta_bits;
+	delta -= delta2;
+	
+	/* Fails if buffer size was exceeded */
+	assert( out <= &SAMPLES( m ) [m->size + end_frame_extra] );
+	
+	out [0] += in[0]*delta + in[half_width+0]*delta2;
+	out [1] += in[1]*delta + in[half_width+1]*delta2;
+	out [2] += in[2]*delta + in[half_width+2]*delta2;
+	out [3] += in[3]*delta + in[half_width+3]*delta2;
+	out [4] += in[4]*delta + in[half_width+4]*delta2;
+	out [5] += in[5]*delta + in[half_width+5]*delta2;
+	out [6] += in[6]*delta + in[half_width+6]*delta2;
+	out [7] += in[7]*delta + in[half_width+7]*delta2;
+	
+	in = rev;
+	out [ 8] += in[7]*delta + in[7-half_width]*delta2;
+	out [ 9] += in[6]*delta + in[6-half_width]*delta2;
+	out [10] += in[5]*delta + in[5-half_width]*delta2;
+	out [11] += in[4]*delta + in[4-half_width]*delta2;
+	out [12] += in[3]*delta + in[3-half_width]*delta2;
+	out [13] += in[2]*delta + in[2-half_width]*delta2;
+	out [14] += in[1]*delta + in[1-half_width]*delta2;
+	out [15] += in[0]*delta + in[0-half_width]*delta2;
+}
+
+void blip_add_delta_fast( blip_t* m, unsigned time, int delta )
+{
+	unsigned fixed = (unsigned) ((time * m->factor + m->offset) >> pre_shift);
+	buf_t* out = SAMPLES( m ) + m->avail + (fixed >> frac_bits);
+	
+	int interp = fixed >> (frac_bits - delta_bits) & (delta_unit - 1);
+	int delta2 = delta * interp;
+	
+	/* Fails if buffer size was exceeded */
+	assert( out <= &SAMPLES( m ) [m->size + end_frame_extra] );
+	
+	out [7] += delta * delta_unit - delta2;
+	out [8] += delta2;
+}
diff --git a/Utilities/blip_buf.h b/Utilities/blip_buf.h
new file mode 100644
index 0000000..a50ff96
--- /dev/null
+++ b/Utilities/blip_buf.h
@@ -0,0 +1,81 @@
+#pragma once
+#include "stdafx.h"
+
+/** \file
+Sample buffer that resamples from input clock rate to output sample rate */
+
+/* blip_buf 1.1.0 */
+#ifndef BLIP_BUF_H 
+#define BLIP_BUF_H
+
+#if defined(_MSC_VER)
+    #define EXPORT __declspec(dllexport)
+#else
+    #define EXPORT 
+#endif 
+
+#ifdef __cplusplus
+	extern "C" {
+#endif
+
+/** First parameter of most functions is blip_t*, or const blip_t* if nothing
+is changed. */
+typedef struct blip_t blip_t;
+
+/** Creates new buffer that can hold at most sample_count samples. Sets rates
+so that there are blip_max_ratio clocks per sample. Returns pointer to new
+buffer, or NULL if insufficient memory. */
+EXPORT blip_t* blip_new( int sample_count );
+
+/** Sets approximate input clock rate and output sample rate. For every
+clock_rate input clocks, approximately sample_rate samples are generated. */
+EXPORT void blip_set_rates( blip_t*, double clock_rate, double sample_rate );
+
+enum { /** Maximum clock_rate/sample_rate ratio. For a given sample_rate,
+clock_rate must not be greater than sample_rate*blip_max_ratio. */
+blip_max_ratio = 1 << 20 };
+
+/** Clears entire buffer. Afterwards, blip_samples_avail() == 0. */
+EXPORT void blip_clear( blip_t* );
+
+/** Adds positive/negative delta into buffer at specified clock time. */
+EXPORT void blip_add_delta( blip_t*, unsigned int clock_time, int delta );
+
+/** Same as blip_add_delta(), but uses faster, lower-quality synthesis. */
+void blip_add_delta_fast( blip_t*, unsigned int clock_time, int delta );
+
+/** Length of time frame, in clocks, needed to make sample_count additional
+samples available. */
+int blip_clocks_needed( const blip_t*, int sample_count );
+
+enum { /** Maximum number of samples that can be generated from one time frame. */
+blip_max_frame = 4000 };
+
+/** Makes input clocks before clock_duration available for reading as output
+samples. Also begins new time frame at clock_duration, so that clock time 0 in
+the new time frame specifies the same clock as clock_duration in the old time
+frame specified. Deltas can have been added slightly past clock_duration (up to
+however many clocks there are in two output samples). */
+EXPORT void blip_end_frame( blip_t*, unsigned int clock_duration );
+
+/** Number of buffered samples available for reading. */
+int blip_samples_avail( const blip_t* );
+
+/** Reads and removes at most 'count' samples and writes them to 'out'. If
+'stereo' is true, writes output to every other element of 'out', allowing easy
+interleaving of two buffers into a stereo sample stream. Outputs 16-bit signed
+samples. Returns number of samples actually read.  */
+EXPORT int blip_read_samples( blip_t*, short out [], int count, int stereo );
+
+/** Frees buffer. No effect if NULL is passed. */
+EXPORT void blip_delete( blip_t* );
+
+
+/* Deprecated */
+typedef blip_t blip_buffer_t;
+
+#ifdef __cplusplus
+	}
+#endif
+
+#endif
diff --git a/Utilities/md5.cpp b/Utilities/md5.cpp
new file mode 100644
index 0000000..f7c1f7c
--- /dev/null
+++ b/Utilities/md5.cpp
@@ -0,0 +1,315 @@
+/*
+ * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
+ * MD5 Message-Digest Algorithm (RFC 1321).
+ *
+ * Homepage:
+ * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
+ *
+ * Author:
+ * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
+ *
+ * This software was written by Alexander Peslyak in 2001.  No copyright is
+ * claimed, and the software is hereby placed in the public domain.
+ * In case this attempt to disclaim copyright and place the software in the
+ * public domain is deemed null and void, then the software is
+ * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
+ * general public under the following terms:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted.
+ *
+ * There's ABSOLUTELY NO WARRANTY, express or implied.
+ *
+ * (This is a heavily cut-down "BSD license".)
+ *
+ * This differs from Colin Plumb's older public domain implementation in that
+ * no exactly 32-bit integer data type is required (any 32-bit or wider
+ * unsigned integer data type will do), there's no compile-time endianness
+ * configuration, and the function prototypes match OpenSSL's.  No code from
+ * Colin Plumb's implementation has been reused; this comment merely compares
+ * the properties of the two independent implementations.
+ *
+ * The primary goals of this implementation are portability and ease of use.
+ * It is meant to be fast, but not as fast as possible.  Some known
+ * optimizations are not included to reduce source code size and avoid
+ * compile-time configuration.
+ */
+
+#include "stdafx.h"
+#include <string.h>
+#include <sstream>
+#include <iomanip>
+#include "md5.h"
+
+/*
+ * The basic MD5 functions.
+ *
+ * F and G are optimized compared to their RFC 1321 definitions for
+ * architectures that lack an AND-NOT instruction, just like in Colin Plumb's
+ * implementation.
+ */
+#define F(x, y, z)			((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z)			((y) ^ ((z) & ((x) ^ (y))))
+#define H(x, y, z)			(((x) ^ (y)) ^ (z))
+#define H2(x, y, z)			((x) ^ ((y) ^ (z)))
+#define I(x, y, z)			((y) ^ ((x) | ~(z)))
+
+/*
+ * The MD5 transformation for all four rounds.
+ */
+#define STEP(f, a, b, c, d, x, t, s) \
+	(a) += f((b), (c), (d)) + (x) + (t); \
+	(a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
+	(a) += (b);
+
+/*
+ * SET reads 4 input bytes in little-endian byte order and stores them
+ * in a properly aligned word in host byte order.
+ *
+ * The check for little-endian architectures that tolerate unaligned
+ * memory accesses is just an optimization.  Nothing will break if it
+ * doesn't work.
+ */
+#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
+#define SET(n) \
+	(*(MD5_u32plus *)&ptr[(n) * 4])
+#define GET(n) \
+	SET(n)
+#else
+#define SET(n) \
+	(ctx->block[(n)] = \
+	(MD5_u32plus)ptr[(n) * 4] | \
+	((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \
+	((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \
+	((MD5_u32plus)ptr[(n) * 4 + 3] << 24))
+#define GET(n) \
+	(ctx->block[(n)])
+#endif
+
+/*
+ * This processes one or more 64-byte data blocks, but does NOT update
+ * the bit counters.  There are no alignment requirements.
+ */
+static const void *body(MD5_CTX *ctx, const void *data, unsigned long size)
+{
+	const unsigned char *ptr;
+	MD5_u32plus a, b, c, d;
+	MD5_u32plus saved_a, saved_b, saved_c, saved_d;
+
+	ptr = (const unsigned char *)data;
+
+	a = ctx->a;
+	b = ctx->b;
+	c = ctx->c;
+	d = ctx->d;
+
+	do {
+		saved_a = a;
+		saved_b = b;
+		saved_c = c;
+		saved_d = d;
+
+/* Round 1 */
+		STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
+		STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
+		STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
+		STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
+		STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
+		STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
+		STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
+		STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
+		STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
+		STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
+		STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
+		STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
+		STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
+		STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
+		STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
+		STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
+
+/* Round 2 */
+		STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
+		STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
+		STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
+		STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
+		STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
+		STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
+		STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
+		STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
+		STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
+		STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
+		STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
+		STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
+		STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
+		STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
+		STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
+		STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
+
+/* Round 3 */
+		STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
+		STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
+		STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
+		STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
+		STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
+		STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
+		STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
+		STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
+		STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
+		STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
+		STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
+		STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
+		STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
+		STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
+		STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
+		STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
+
+/* Round 4 */
+		STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
+		STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
+		STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
+		STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
+		STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
+		STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
+		STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
+		STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
+		STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
+		STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
+		STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
+		STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
+		STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
+		STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
+		STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
+		STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
+
+		a += saved_a;
+		b += saved_b;
+		c += saved_c;
+		d += saved_d;
+
+		ptr += 64;
+	} while (size -= 64);
+
+	ctx->a = a;
+	ctx->b = b;
+	ctx->c = c;
+	ctx->d = d;
+
+	return ptr;
+}
+
+void MD5_Init(MD5_CTX *ctx)
+{
+	ctx->a = 0x67452301;
+	ctx->b = 0xefcdab89;
+	ctx->c = 0x98badcfe;
+	ctx->d = 0x10325476;
+
+	ctx->lo = 0;
+	ctx->hi = 0;
+}
+
+void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size)
+{
+	MD5_u32plus saved_lo;
+	unsigned long used, available;
+
+	saved_lo = ctx->lo;
+	if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
+		ctx->hi++;
+	ctx->hi += size >> 29;
+
+	used = saved_lo & 0x3f;
+
+	if (used) {
+		available = 64 - used;
+
+		if (size < available) {
+			memcpy(&ctx->buffer[used], data, size);
+			return;
+		}
+
+		memcpy(&ctx->buffer[used], data, available);
+		data = (const unsigned char *)data + available;
+		size -= available;
+		body(ctx, ctx->buffer, 64);
+	}
+
+	if (size >= 64) {
+		data = body(ctx, data, size & ~(unsigned long)0x3f);
+		size &= 0x3f;
+	}
+
+	memcpy(ctx->buffer, data, size);
+}
+
+void MD5_Final(unsigned char *result, MD5_CTX *ctx)
+{
+	unsigned long used, available;
+
+	used = ctx->lo & 0x3f;
+
+	ctx->buffer[used++] = 0x80;
+
+	available = 64 - used;
+
+	if (available < 8) {
+		memset(&ctx->buffer[used], 0, available);
+		body(ctx, ctx->buffer, 64);
+		used = 0;
+		available = 64;
+	}
+
+	memset(&ctx->buffer[used], 0, available - 8);
+
+	ctx->lo <<= 3;
+	ctx->buffer[56] = ctx->lo;
+	ctx->buffer[57] = ctx->lo >> 8;
+	ctx->buffer[58] = ctx->lo >> 16;
+	ctx->buffer[59] = ctx->lo >> 24;
+	ctx->buffer[60] = ctx->hi;
+	ctx->buffer[61] = ctx->hi >> 8;
+	ctx->buffer[62] = ctx->hi >> 16;
+	ctx->buffer[63] = ctx->hi >> 24;
+
+	body(ctx, ctx->buffer, 64);
+
+	result[0] = ctx->a;
+	result[1] = ctx->a >> 8;
+	result[2] = ctx->a >> 16;
+	result[3] = ctx->a >> 24;
+	result[4] = ctx->b;
+	result[5] = ctx->b >> 8;
+	result[6] = ctx->b >> 16;
+	result[7] = ctx->b >> 24;
+	result[8] = ctx->c;
+	result[9] = ctx->c >> 8;
+	result[10] = ctx->c >> 16;
+	result[11] = ctx->c >> 24;
+	result[12] = ctx->d;
+	result[13] = ctx->d >> 8;
+	result[14] = ctx->d >> 16;
+	result[15] = ctx->d >> 24;
+
+	memset(ctx, 0, sizeof(*ctx));
+}
+
+void GetMd5Sum(unsigned char* result, void* buffer, unsigned long size)
+{
+	MD5_CTX context;
+	MD5_Init(&context);
+	MD5_Update(&context, (void*)buffer, size);
+	MD5_Final(result, &context);
+}
+
+string GetMd5Sum(void* buffer, size_t size)
+{
+	unsigned char result[16];
+	GetMd5Sum(result, buffer, (unsigned long)size);
+
+	std::stringstream ss;
+	ss << std::hex << std::uppercase << std::setfill('0');
+	for(int i = 0; i < 16; i++) {
+		ss << std::setw(2) << (int)result[i];
+	}
+	return ss.str();
+}
\ No newline at end of file
diff --git a/Utilities/md5.h b/Utilities/md5.h
new file mode 100644
index 0000000..ed006ed
--- /dev/null
+++ b/Utilities/md5.h
@@ -0,0 +1,42 @@
+/*
+ * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
+ * MD5 Message-Digest Algorithm (RFC 1321).
+ *
+ * Homepage:
+ * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
+ *
+ * Author:
+ * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
+ *
+ * This software was written by Alexander Peslyak in 2001.  No copyright is
+ * claimed, and the software is hereby placed in the public domain.
+ * In case this attempt to disclaim copyright and place the software in the
+ * public domain is deemed null and void, then the software is
+ * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
+ * general public under the following terms:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted.
+ *
+ * There's ABSOLUTELY NO WARRANTY, express or implied.
+ *
+ * See md5.c for more information.
+ */
+
+#pragma once 
+
+/* Any 32-bit or wider unsigned integer data type will do */
+typedef unsigned int MD5_u32plus;
+
+typedef struct {
+	MD5_u32plus lo, hi;
+	MD5_u32plus a, b, c, d;
+	unsigned char buffer[64];
+	MD5_u32plus block[16];
+} MD5_CTX;
+
+extern void MD5_Init(MD5_CTX *ctx);
+extern void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size);
+extern void MD5_Final(unsigned char *result, MD5_CTX *ctx);
+extern void GetMd5Sum(unsigned char *result, void* buffer, unsigned long size);
+extern string GetMd5Sum(void* buffer, size_t size);
diff --git a/Utilities/miniz.cpp b/Utilities/miniz.cpp
new file mode 100644
index 0000000..84ac43e
--- /dev/null
+++ b/Utilities/miniz.cpp
@@ -0,0 +1,4157 @@
+/* miniz.c v1.15 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing
+   See "unlicense" statement at the end of this file.
+   Rich Geldreich <richgel99@gmail.com>, last updated Oct. 13, 2013
+   Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt
+
+   Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define
+   MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros).
+
+   * Change History
+     10/13/13 v1.15 r4 - Interim bugfix release while I work on the next major release with Zip64 support (almost there!):
+       - Critical fix for the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY bug (thanks kahmyong.moon@hp.com) which could cause locate files to not find files. This bug
+        would only have occured in earlier versions if you explicitly used this flag, OR if you used mz_zip_extract_archive_file_to_heap() or mz_zip_add_mem_to_archive_file_in_place()
+        (which used this flag). If you can't switch to v1.15 but want to fix this bug, just remove the uses of this flag from both helper funcs (and of course don't use the flag).
+       - Bugfix in mz_zip_reader_extract_to_mem_no_alloc() from kymoon when pUser_read_buf is not NULL and compressed size is > uncompressed size
+       - Fixing mz_zip_reader_extract_*() funcs so they don't try to extract compressed data from directory entries, to account for weird zipfiles which contain zero-size compressed data on dir entries.
+         Hopefully this fix won't cause any issues on weird zip archives, because it assumes the low 16-bits of zip external attributes are DOS attributes (which I believe they always are in practice).
+       - Fixing mz_zip_reader_is_file_a_directory() so it doesn't check the internal attributes, just the filename and external attributes
+       - mz_zip_reader_init_file() - missing MZ_FCLOSE() call if the seek failed
+       - Added cmake support for Linux builds which builds all the examples, tested with clang v3.3 and gcc v4.6.
+       - Clang fix for tdefl_write_image_to_png_file_in_memory() from toffaletti
+       - Merged MZ_FORCEINLINE fix from hdeanclark
+       - Fix <time.h> include before config #ifdef, thanks emil.brink
+       - Added tdefl_write_image_to_png_file_in_memory_ex(): supports Y flipping (super useful for OpenGL apps), and explicit control over the compression level (so you can
+        set it to 1 for real-time compression).
+       - Merged in some compiler fixes from paulharris's github repro.
+       - Retested this build under Windows (VS 2010, including static analysis), tcc  0.9.26, gcc v4.6 and clang v3.3.
+       - Added example6.c, which dumps an image of the mandelbrot set to a PNG file.
+       - Modified example2 to help test the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY flag more.
+       - In r3: Bugfix to mz_zip_writer_add_file() found during merge: Fix possible src file fclose() leak if alignment bytes+local header file write faiiled
+       - In r4: Minor bugfix to mz_zip_writer_add_from_zip_reader(): Was pushing the wrong central dir header offset, appears harmless in this release, but it became a problem in the zip64 branch
+     5/20/12 v1.14 - MinGW32/64 GCC 4.6.1 compiler fixes: added MZ_FORCEINLINE, #include <time.h> (thanks fermtect).
+     5/19/12 v1.13 - From jason@cornsyrup.org and kelwert@mtu.edu - Fix mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bit.
+       - Temporarily/locally slammed in "typedef unsigned long mz_ulong" and re-ran a randomized regression test on ~500k files.
+       - Eliminated a bunch of warnings when compiling with GCC 32-bit/64.
+       - Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze (static analysis) option and fixed all warnings (except for the silly
+        "Use of the comma-operator in a tested expression.." analysis warning, which I purposely use to work around a MSVC compiler warning).
+       - Created 32-bit and 64-bit Codeblocks projects/workspace. Built and tested Linux executables. The codeblocks workspace is compatible with Linux+Win32/x64.
+       - Added miniz_tester solution/project, which is a useful little app derived from LZHAM's tester app that I use as part of the regression test.
+       - Ran miniz.c and tinfl.c through another series of regression testing on ~500,000 files and archives.
+       - Modified example5.c so it purposely disables a bunch of high-level functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the MINIZ_NO_STDIO bug report.)
+       - Fix ftell() usage in examples so they exit with an error on files which are too large (a limitation of the examples, not miniz itself).
+     4/12/12 v1.12 - More comments, added low-level example5.c, fixed a couple minor level_and_flags issues in the archive API's.
+      level_and_flags can now be set to MZ_DEFAULT_COMPRESSION. Thanks to Bruce Dawson <bruced@valvesoftware.com> for the feedback/bug report.
+     5/28/11 v1.11 - Added statement from unlicense.org
+     5/27/11 v1.10 - Substantial compressor optimizations:
+      - Level 1 is now ~4x faster than before. The L1 compressor's throughput now varies between 70-110MB/sec. on a
+      - Core i7 (actual throughput varies depending on the type of data, and x64 vs. x86).
+      - Improved baseline L2-L9 compression perf. Also, greatly improved compression perf. issues on some file types.
+      - Refactored the compression code for better readability and maintainability.
+      - Added level 10 compression level (L10 has slightly better ratio than level 9, but could have a potentially large
+       drop in throughput on some files).
+     5/15/11 v1.09 - Initial stable release.
+
+   * Low-level Deflate/Inflate implementation notes:
+
+     Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or
+     greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses
+     approximately as well as zlib.
+
+     Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function
+     coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory
+     block large enough to hold the entire file.
+
+     The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation.
+
+   * zlib-style API notes:
+
+     miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in
+     zlib replacement in many apps:
+        The z_stream struct, optional memory allocation callbacks
+        deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound
+        inflateInit/inflateInit2/inflate/inflateEnd
+        compress, compress2, compressBound, uncompress
+        CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines.
+        Supports raw deflate streams or standard zlib streams with adler-32 checking.
+
+     Limitations:
+      The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries.
+      I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but
+      there are no guarantees that miniz.c pulls this off perfectly.
+
+   * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by
+     Alex Evans. Supports 1-4 bytes/pixel images.
+
+   * ZIP archive API notes:
+
+     The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to
+     get the job done with minimal fuss. There are simple API's to retrieve file information, read files from
+     existing archives, create new archives, append new files to existing archives, or clone archive data from
+     one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h),
+     or you can specify custom file read/write callbacks.
+
+     - Archive reading: Just call this function to read a single file from a disk archive:
+
+      void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name,
+        size_t *pSize, mz_uint zip_flags);
+
+     For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central
+     directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files.
+
+     - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file:
+
+     int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags);
+
+     The locate operation can optionally check file comments too, which (as one example) can be used to identify
+     multiple versions of the same file in an archive. This function uses a simple linear search through the central
+     directory, so it's not very fast.
+
+     Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and
+     retrieve detailed info on each file by calling mz_zip_reader_file_stat().
+
+     - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data
+     to disk and builds an exact image of the central directory in memory. The central directory image is written
+     all at once at the end of the archive file when the archive is finalized.
+
+     The archive writer can optionally align each file's local header and file data to any power of 2 alignment,
+     which can be useful when the archive will be read from optical media. Also, the writer supports placing
+     arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still
+     readable by any ZIP tool.
+
+     - Archive appending: The simple way to add a single file to an archive is to call this function:
+
+      mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name,
+        const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
+
+     The archive will be created if it doesn't already exist, otherwise it'll be appended to.
+     Note the appending is done in-place and is not an atomic operation, so if something goes wrong
+     during the operation it's possible the archive could be left without a central directory (although the local
+     file headers and file data will be fine, so the archive will be recoverable).
+
+     For more complex archive modification scenarios:
+     1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to
+     preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the
+     compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and
+     you're done. This is safe but requires a bunch of temporary disk space or heap memory.
+
+     2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(),
+     append new files as needed, then finalize the archive which will write an updated central directory to the
+     original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a
+     possibility that the archive's central directory could be lost with this method if anything goes wrong, though.
+
+     - ZIP archive support limitations:
+     No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files.
+     Requires streams capable of seeking.
+
+   * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the
+     below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it.
+
+   * Important: For best perf. be sure to customize the below macros for your target platform:
+     #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
+     #define MINIZ_LITTLE_ENDIAN 1
+     #define MINIZ_HAS_64BIT_REGISTERS 1
+
+   * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz
+     uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files
+     (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes).
+*/
+
+#include "stdafx.h"
+#include "miniz.h"
+
+typedef unsigned char mz_validate_uint16[sizeof(mz_uint16)==2 ? 1 : -1];
+typedef unsigned char mz_validate_uint32[sizeof(mz_uint32)==4 ? 1 : -1];
+typedef unsigned char mz_validate_uint64[sizeof(mz_uint64)==8 ? 1 : -1];
+
+#include <string.h>
+#include <assert.h>
+
+#define MZ_ASSERT(x) assert(x)
+
+#ifdef MINIZ_NO_MALLOC
+  #define MZ_MALLOC(x) NULL
+  #define MZ_FREE(x) (void)x, ((void)0)
+  #define MZ_REALLOC(p, x) NULL
+#else
+  #define MZ_MALLOC(x) malloc(x)
+  #define MZ_FREE(x) free(x)
+  #define MZ_REALLOC(p, x) realloc(p, x)
+#endif
+
+#define MZ_MAX(a,b) (((a)>(b))?(a):(b))
+#define MZ_MIN(a,b) (((a)<(b))?(a):(b))
+#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj))
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+  #define MZ_READ_LE16(p) *((const mz_uint16 *)(p))
+  #define MZ_READ_LE32(p) *((const mz_uint32 *)(p))
+#else
+  #define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U))
+  #define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U))
+#endif
+
+#ifdef _MSC_VER
+  #define MZ_FORCEINLINE __forceinline
+#elif defined(__GNUC__)
+  #define MZ_FORCEINLINE inline __attribute__((__always_inline__))
+#else
+  #define MZ_FORCEINLINE inline
+#endif
+
+#ifdef __cplusplus
+  extern "C" {
+#endif
+
+// ------------------- zlib-style API's
+
+mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len)
+{
+  mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); size_t block_len = buf_len % 5552;
+  if (!ptr) return MZ_ADLER32_INIT;
+  while (buf_len) {
+    for (i = 0; i + 7 < block_len; i += 8, ptr += 8) {
+      s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1;
+      s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1;
+    }
+    for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1;
+    s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552;
+  }
+  return (s2 << 16) + s1;
+}
+
+// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/
+mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len)
+{
+  static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
+    0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c };
+  mz_uint32 crcu32 = (mz_uint32)crc;
+  if (!ptr) return MZ_CRC32_INIT;
+  crcu32 = ~crcu32; while (buf_len--) { mz_uint8 b = *ptr++; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; }
+  return ~crcu32;
+}
+
+void mz_free(void *p)
+{
+  MZ_FREE(p);
+}
+
+#ifndef MINIZ_NO_ZLIB_APIS
+
+static void *def_alloc_func(void *opaque, size_t items, size_t size) { (void)opaque, (void)items, (void)size; return MZ_MALLOC(items * size); }
+static void def_free_func(void *opaque, void *address) { (void)opaque, (void)address; MZ_FREE(address); }
+static void *def_realloc_func(void *opaque, void *address, size_t items, size_t size) { (void)opaque, (void)address, (void)items, (void)size; return MZ_REALLOC(address, items * size); }
+
+const char *mz_version(void)
+{
+  return MZ_VERSION;
+}
+
+int mz_deflateInit(mz_streamp pStream, int level)
+{
+  return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY);
+}
+
+int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy)
+{
+  tdefl_compressor *pComp;
+  mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy);
+
+  if (!pStream) return MZ_STREAM_ERROR;
+  if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))) return MZ_PARAM_ERROR;
+
+  pStream->data_type = 0;
+  pStream->adler = MZ_ADLER32_INIT;
+  pStream->msg = NULL;
+  pStream->reserved = 0;
+  pStream->total_in = 0;
+  pStream->total_out = 0;
+  if (!pStream->zalloc) pStream->zalloc = def_alloc_func;
+  if (!pStream->zfree) pStream->zfree = def_free_func;
+
+  pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor));
+  if (!pComp)
+    return MZ_MEM_ERROR;
+
+  pStream->state = (struct mz_internal_state *)pComp;
+
+  if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY)
+  {
+    mz_deflateEnd(pStream);
+    return MZ_PARAM_ERROR;
+  }
+
+  return MZ_OK;
+}
+
+int mz_deflateReset(mz_streamp pStream)
+{
+  if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree)) return MZ_STREAM_ERROR;
+  pStream->total_in = pStream->total_out = 0;
+  tdefl_init((tdefl_compressor*)pStream->state, NULL, NULL, ((tdefl_compressor*)pStream->state)->m_flags);
+  return MZ_OK;
+}
+
+int mz_deflate(mz_streamp pStream, int flush)
+{
+  size_t in_bytes, out_bytes;
+  mz_ulong orig_total_in, orig_total_out;
+  int mz_status = MZ_OK;
+
+  if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out)) return MZ_STREAM_ERROR;
+  if (!pStream->avail_out) return MZ_BUF_ERROR;
+
+  if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH;
+
+  if (((tdefl_compressor*)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE)
+    return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR;
+
+  orig_total_in = pStream->total_in; orig_total_out = pStream->total_out;
+  for ( ; ; )
+  {
+    tdefl_status defl_status;
+    in_bytes = pStream->avail_in; out_bytes = pStream->avail_out;
+
+    defl_status = tdefl_compress((tdefl_compressor*)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush);
+    pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes;
+    pStream->total_in += (mz_uint)in_bytes; pStream->adler = tdefl_get_adler32((tdefl_compressor*)pStream->state);
+
+    pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes;
+    pStream->total_out += (mz_uint)out_bytes;
+
+    if (defl_status < 0)
+    {
+      mz_status = MZ_STREAM_ERROR;
+      break;
+    }
+    else if (defl_status == TDEFL_STATUS_DONE)
+    {
+      mz_status = MZ_STREAM_END;
+      break;
+    }
+    else if (!pStream->avail_out)
+      break;
+    else if ((!pStream->avail_in) && (flush != MZ_FINISH))
+    {
+      if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out))
+        break;
+      return MZ_BUF_ERROR; // Can't make forward progress without some input.
+    }
+  }
+  return mz_status;
+}
+
+int mz_deflateEnd(mz_streamp pStream)
+{
+  if (!pStream) return MZ_STREAM_ERROR;
+  if (pStream->state)
+  {
+    pStream->zfree(pStream->opaque, pStream->state);
+    pStream->state = NULL;
+  }
+  return MZ_OK;
+}
+
+mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len)
+{
+  (void)pStream;
+  // This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.)
+  return MZ_MAX(128 + (source_len * 110) / 100, 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5);
+}
+
+int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level)
+{
+  int status;
+  mz_stream stream;
+  memset(&stream, 0, sizeof(stream));
+
+  // In case mz_ulong is 64-bits (argh I hate longs).
+  if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR;
+
+  stream.next_in = pSource;
+  stream.avail_in = (mz_uint32)source_len;
+  stream.next_out = pDest;
+  stream.avail_out = (mz_uint32)*pDest_len;
+
+  status = mz_deflateInit(&stream, level);
+  if (status != MZ_OK) return status;
+
+  status = mz_deflate(&stream, MZ_FINISH);
+  if (status != MZ_STREAM_END)
+  {
+    mz_deflateEnd(&stream);
+    return (status == MZ_OK) ? MZ_BUF_ERROR : status;
+  }
+
+  *pDest_len = stream.total_out;
+  return mz_deflateEnd(&stream);
+}
+
+int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len)
+{
+  return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION);
+}
+
+mz_ulong mz_compressBound(mz_ulong source_len)
+{
+  return mz_deflateBound(NULL, source_len);
+}
+
+typedef struct
+{
+  tinfl_decompressor m_decomp;
+  mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; int m_window_bits;
+  mz_uint8 m_dict[TINFL_LZ_DICT_SIZE];
+  tinfl_status m_last_status;
+} inflate_state;
+
+int mz_inflateInit2(mz_streamp pStream, int window_bits)
+{
+  inflate_state *pDecomp;
+  if (!pStream) return MZ_STREAM_ERROR;
+  if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)) return MZ_PARAM_ERROR;
+
+  pStream->data_type = 0;
+  pStream->adler = 0;
+  pStream->msg = NULL;
+  pStream->total_in = 0;
+  pStream->total_out = 0;
+  pStream->reserved = 0;
+  if (!pStream->zalloc) pStream->zalloc = def_alloc_func;
+  if (!pStream->zfree) pStream->zfree = def_free_func;
+
+  pDecomp = (inflate_state*)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state));
+  if (!pDecomp) return MZ_MEM_ERROR;
+
+  pStream->state = (struct mz_internal_state *)pDecomp;
+
+  tinfl_init(&pDecomp->m_decomp);
+  pDecomp->m_dict_ofs = 0;
+  pDecomp->m_dict_avail = 0;
+  pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT;
+  pDecomp->m_first_call = 1;
+  pDecomp->m_has_flushed = 0;
+  pDecomp->m_window_bits = window_bits;
+
+  return MZ_OK;
+}
+
+int mz_inflateInit(mz_streamp pStream)
+{
+   return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS);
+}
+
+int mz_inflate(mz_streamp pStream, int flush)
+{
+  inflate_state* pState;
+  mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32;
+  size_t in_bytes, out_bytes, orig_avail_in;
+  tinfl_status status;
+
+  if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR;
+  if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH;
+  if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) return MZ_STREAM_ERROR;
+
+  pState = (inflate_state*)pStream->state;
+  if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER;
+  orig_avail_in = pStream->avail_in;
+
+  first_call = pState->m_first_call; pState->m_first_call = 0;
+  if (pState->m_last_status < 0) return MZ_DATA_ERROR;
+
+  if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR;
+  pState->m_has_flushed |= (flush == MZ_FINISH);
+
+  if ((flush == MZ_FINISH) && (first_call))
+  {
+    // MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file.
+    decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
+    in_bytes = pStream->avail_in; out_bytes = pStream->avail_out;
+    status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags);
+    pState->m_last_status = status;
+    pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes;
+    pStream->adler = tinfl_get_adler32(&pState->m_decomp);
+    pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes;
+
+    if (status < 0)
+      return MZ_DATA_ERROR;
+    else if (status != TINFL_STATUS_DONE)
+    {
+      pState->m_last_status = TINFL_STATUS_FAILED;
+      return MZ_BUF_ERROR;
+    }
+    return MZ_STREAM_END;
+  }
+  // flush != MZ_FINISH then we must assume there's more input.
+  if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT;
+
+  if (pState->m_dict_avail)
+  {
+    n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
+    memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
+    pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n;
+    pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);
+    return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK;
+  }
+
+  for ( ; ; )
+  {
+    in_bytes = pStream->avail_in;
+    out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs;
+
+    status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags);
+    pState->m_last_status = status;
+
+    pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes;
+    pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp);
+
+    pState->m_dict_avail = (mz_uint)out_bytes;
+
+    n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
+    memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
+    pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n;
+    pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);
+
+    if (status < 0)
+       return MZ_DATA_ERROR; // Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well).
+    else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in))
+      return MZ_BUF_ERROR; // Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH.
+    else if (flush == MZ_FINISH)
+    {
+       // The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH.
+       if (status == TINFL_STATUS_DONE)
+          return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END;
+       // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong.
+       else if (!pStream->avail_out)
+          return MZ_BUF_ERROR;
+    }
+    else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail))
+      break;
+  }
+
+  return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK;
+}
+
+int mz_inflateEnd(mz_streamp pStream)
+{
+  if (!pStream)
+    return MZ_STREAM_ERROR;
+  if (pStream->state)
+  {
+    pStream->zfree(pStream->opaque, pStream->state);
+    pStream->state = NULL;
+  }
+  return MZ_OK;
+}
+
+int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len)
+{
+  mz_stream stream;
+  int status;
+  memset(&stream, 0, sizeof(stream));
+
+  // In case mz_ulong is 64-bits (argh I hate longs).
+  if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR;
+
+  stream.next_in = pSource;
+  stream.avail_in = (mz_uint32)source_len;
+  stream.next_out = pDest;
+  stream.avail_out = (mz_uint32)*pDest_len;
+
+  status = mz_inflateInit(&stream);
+  if (status != MZ_OK)
+    return status;
+
+  status = mz_inflate(&stream, MZ_FINISH);
+  if (status != MZ_STREAM_END)
+  {
+    mz_inflateEnd(&stream);
+    return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status;
+  }
+  *pDest_len = stream.total_out;
+
+  return mz_inflateEnd(&stream);
+}
+
+const char *mz_error(int err)
+{
+  static struct { int m_err; const char *m_pDesc; } s_error_descs[] =
+  {
+    { MZ_OK, "" }, { MZ_STREAM_END, "stream end" }, { MZ_NEED_DICT, "need dictionary" }, { MZ_ERRNO, "file error" }, { MZ_STREAM_ERROR, "stream error" },
+    { MZ_DATA_ERROR, "data error" }, { MZ_MEM_ERROR, "out of memory" }, { MZ_BUF_ERROR, "buf error" }, { MZ_VERSION_ERROR, "version error" }, { MZ_PARAM_ERROR, "parameter error" }
+  };
+  mz_uint i; for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc;
+  return NULL;
+}
+
+#endif //MINIZ_NO_ZLIB_APIS
+
+// ------------------- Low-level Decompression (completely independent from all compression API's)
+
+#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l)
+#define TINFL_MEMSET(p, c, l) memset(p, c, l)
+
+#define TINFL_CR_BEGIN switch(r->m_state) { case 0:
+#define TINFL_CR_RETURN(state_index, result) do { status = result; r->m_state = state_index; goto common_exit; case state_index:; } MZ_MACRO_END
+#define TINFL_CR_RETURN_FOREVER(state_index, result) do { for ( ; ; ) { TINFL_CR_RETURN(state_index, result); } } MZ_MACRO_END
+#define TINFL_CR_FINISH }
+
+// TODO: If the caller has indicated that there's no more input, and we attempt to read beyond the input buf, then something is wrong with the input because the inflator never
+// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of the stream with 0's in this scenario.
+#define TINFL_GET_BYTE(state_index, c) do { \
+  if (pIn_buf_cur >= pIn_buf_end) { \
+    for ( ; ; ) { \
+      if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \
+        TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \
+        if (pIn_buf_cur < pIn_buf_end) { \
+          c = *pIn_buf_cur++; \
+          break; \
+        } \
+      } else { \
+        c = 0; \
+        break; \
+      } \
+    } \
+  } else c = *pIn_buf_cur++; } MZ_MACRO_END
+
+#define TINFL_NEED_BITS(state_index, n) do { mz_uint c; TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; } while (num_bits < (mz_uint)(n))
+#define TINFL_SKIP_BITS(state_index, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END
+#define TINFL_GET_BITS(state_index, b, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } b = bit_buf & ((1 << (n)) - 1); bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END
+
+// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2.
+// It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a
+// Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the
+// bit buffer contains >=15 bits (deflate's max. Huffman code size).
+#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \
+  do { \
+    temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \
+    if (temp >= 0) { \
+      code_len = temp >> 9; \
+      if ((code_len) && (num_bits >= code_len)) \
+      break; \
+    } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \
+       code_len = TINFL_FAST_LOOKUP_BITS; \
+       do { \
+          temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \
+       } while ((temp < 0) && (num_bits >= (code_len + 1))); if (temp >= 0) break; \
+    } TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; \
+  } while (num_bits < 15);
+
+// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read
+// beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully
+// decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32.
+// The slow path is only executed at the very end of the input buffer.
+#define TINFL_HUFF_DECODE(state_index, sym, pHuff) do { \
+  int temp; mz_uint code_len, c; \
+  if (num_bits < 15) { \
+    if ((pIn_buf_end - pIn_buf_cur) < 2) { \
+       TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \
+    } else { \
+       bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); pIn_buf_cur += 2; num_bits += 16; \
+    } \
+  } \
+  if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \
+    code_len = temp >> 9, temp &= 511; \
+  else { \
+    code_len = TINFL_FAST_LOOKUP_BITS; do { temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; } while (temp < 0); \
+  } sym = temp; bit_buf >>= code_len; num_bits -= code_len; } MZ_MACRO_END
+
+tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags)
+{
+  static const int s_length_base[31] = { 3,4,5,6,7,8,9,10,11,13, 15,17,19,23,27,31,35,43,51,59, 67,83,99,115,131,163,195,227,258,0,0 };
+  static const int s_length_extra[31]= { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
+  static const int s_dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
+  static const int s_dist_extra[32] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+  static const mz_uint8 s_length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
+  static const int s_min_table_sizes[3] = { 257, 1, 4 };
+
+  tinfl_status status = TINFL_STATUS_FAILED; mz_uint32 num_bits, dist, counter, num_extra; tinfl_bit_buf_t bit_buf;
+  const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size;
+  mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size;
+  size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start;
+
+  // Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter).
+  if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start)) { *pIn_buf_size = *pOut_buf_size = 0; return TINFL_STATUS_BAD_PARAM; }
+
+  num_bits = r->m_num_bits; bit_buf = r->m_bit_buf; dist = r->m_dist; counter = r->m_counter; num_extra = r->m_num_extra; dist_from_out_buf_start = r->m_dist_from_out_buf_start;
+  TINFL_CR_BEGIN
+
+  bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; r->m_z_adler32 = r->m_check_adler32 = 1;
+  if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER)
+  {
+    TINFL_GET_BYTE(1, r->m_zhdr0); TINFL_GET_BYTE(2, r->m_zhdr1);
+    counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8));
+    if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (unsigned int)(1U << (8U + (r->m_zhdr0 >> 4)))));
+    if (counter) { TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); }
+  }
+
+  do
+  {
+    TINFL_GET_BITS(3, r->m_final, 3); r->m_type = r->m_final >> 1;
+    if (r->m_type == 0)
+    {
+      TINFL_SKIP_BITS(5, num_bits & 7);
+      for (counter = 0; counter < 4; ++counter) { if (num_bits) TINFL_GET_BITS(6, r->m_raw_header[counter], 8); else TINFL_GET_BYTE(7, r->m_raw_header[counter]); }
+      if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); }
+      while ((counter) && (num_bits))
+      {
+        TINFL_GET_BITS(51, dist, 8);
+        while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); }
+        *pOut_buf_cur++ = (mz_uint8)dist;
+        counter--;
+      }
+      while (counter)
+      {
+        size_t n; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); }
+        while (pIn_buf_cur >= pIn_buf_end)
+        {
+          if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT)
+          {
+            TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT);
+          }
+          else
+          {
+            TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED);
+          }
+        }
+        n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter);
+        TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); pIn_buf_cur += n; pOut_buf_cur += n; counter -= (mz_uint)n;
+      }
+    }
+    else if (r->m_type == 3)
+    {
+      TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED);
+    }
+    else
+    {
+      if (r->m_type == 1)
+      {
+        mz_uint8 *p = r->m_tables[0].m_code_size; mz_uint i;
+        r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32);
+        for ( i = 0; i <= 143; ++i) *p++ = 8;
+		  for ( ; i <= 255; ++i) *p++ = 9; 
+		  for ( ; i <= 279; ++i) *p++ = 7; 
+		  for ( ; i <= 287; ++i) *p++ = 8;
+      }
+      else
+      {
+        for (counter = 0; counter < 3; counter++) { TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); r->m_table_sizes[counter] += s_min_table_sizes[counter]; }
+        MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); for (counter = 0; counter < r->m_table_sizes[2]; counter++) { mz_uint s; TINFL_GET_BITS(14, s, 3); r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; }
+        r->m_table_sizes[2] = 19;
+      }
+      for ( ; (int)r->m_type >= 0; r->m_type--)
+      {
+        int tree_next, tree_cur; tinfl_huff_table *pTable;
+        mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pTable = &r->m_tables[r->m_type]; MZ_CLEAR_OBJ(total_syms); MZ_CLEAR_OBJ(pTable->m_look_up); MZ_CLEAR_OBJ(pTable->m_tree);
+        for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) total_syms[pTable->m_code_size[i]]++;
+        used_syms = 0, total = 0; next_code[0] = next_code[1] = 0;
+        for (i = 1; i <= 15; ++i) { used_syms += total_syms[i]; next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); }
+        if ((65536 != total) && (used_syms > 1))
+        {
+          TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED);
+        }
+        for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index)
+        {
+          mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index]; if (!code_size) continue;
+          cur_code = next_code[code_size]++; for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1);
+          if (code_size <= TINFL_FAST_LOOKUP_BITS) { mz_int16 k = (mz_int16)((code_size << 9) | sym_index); while (rev_code < TINFL_FAST_LOOKUP_SIZE) { pTable->m_look_up[rev_code] = k; rev_code += (1 << code_size); } continue; }
+          if (0 == (tree_cur = pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)])) { pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; }
+          rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1);
+          for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--)
+          {
+            tree_cur -= ((rev_code >>= 1) & 1);
+            if (!pTable->m_tree[-tree_cur - 1]) { pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } else tree_cur = pTable->m_tree[-tree_cur - 1];
+          }
+          tree_cur -= ((rev_code >>= 1) & 1); pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index;
+        }
+        if (r->m_type == 2)
+        {
+          for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]); )
+          {
+            mz_uint s; TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); if (dist < 16) { r->m_len_codes[counter++] = (mz_uint8)dist; continue; }
+            if ((dist == 16) && (!counter))
+            {
+              TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED);
+            }
+            num_extra = "\02\03\07"[dist - 16]; TINFL_GET_BITS(18, s, num_extra); s += "\03\03\013"[dist - 16];
+            TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); counter += s;
+          }
+          if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter)
+          {
+            TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED);
+          }
+          TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]);
+        }
+      }
+      for ( ; ; )
+      {
+        mz_uint8 *pSrc;
+        for ( ; ; )
+        {
+          if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2))
+          {
+            TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]);
+            if (counter >= 256)
+              break;
+            while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); }
+            *pOut_buf_cur++ = (mz_uint8)counter;
+          }
+          else
+          {
+            int sym2; mz_uint code_len;
+#if TINFL_USE_64BIT_BITBUF
+            if (num_bits < 30) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); pIn_buf_cur += 4; num_bits += 32; }
+#else
+            if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; }
+#endif
+            if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0)
+              code_len = sym2 >> 9;
+            else
+            {
+              code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0);
+            }
+            counter = sym2; bit_buf >>= code_len; num_bits -= code_len;
+            if (counter & 256)
+              break;
+
+#if !TINFL_USE_64BIT_BITBUF
+            if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; }
+#endif
+            if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0)
+              code_len = sym2 >> 9;
+            else
+            {
+              code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0);
+            }
+            bit_buf >>= code_len; num_bits -= code_len;
+
+            pOut_buf_cur[0] = (mz_uint8)counter;
+            if (sym2 & 256)
+            {
+              pOut_buf_cur++;
+              counter = sym2;
+              break;
+            }
+            pOut_buf_cur[1] = (mz_uint8)sym2;
+            pOut_buf_cur += 2;
+          }
+        }
+        if ((counter &= 511) == 256) break;
+
+        num_extra = s_length_extra[counter - 257]; counter = s_length_base[counter - 257];
+        if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(25, extra_bits, num_extra); counter += extra_bits; }
+
+        TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]);
+        num_extra = s_dist_extra[dist]; dist = s_dist_base[dist];
+        if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(27, extra_bits, num_extra); dist += extra_bits; }
+
+        dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start;
+        if ((dist > dist_from_out_buf_start) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))
+        {
+          TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED);
+        }
+
+        pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask);
+
+        if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end)
+        {
+          while (counter--)
+          {
+            while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); }
+            *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask];
+          }
+          continue;
+        }
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
+        else if ((counter >= 9) && (counter <= dist))
+        {
+          const mz_uint8 *pSrc_end = pSrc + (counter & ~7);
+          do
+          {
+            ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0];
+            ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1];
+            pOut_buf_cur += 8;
+          } while ((pSrc += 8) < pSrc_end);
+          if ((counter &= 7) < 3)
+          {
+            if (counter)
+            {
+              pOut_buf_cur[0] = pSrc[0];
+              if (counter > 1)
+                pOut_buf_cur[1] = pSrc[1];
+              pOut_buf_cur += counter;
+            }
+            continue;
+          }
+        }
+#endif
+        do
+        {
+          pOut_buf_cur[0] = pSrc[0];
+          pOut_buf_cur[1] = pSrc[1];
+          pOut_buf_cur[2] = pSrc[2];
+          pOut_buf_cur += 3; pSrc += 3;
+        } while ((int)(counter -= 3) > 2);
+        if ((int)counter > 0)
+        {
+          pOut_buf_cur[0] = pSrc[0];
+          if ((int)counter > 1)
+            pOut_buf_cur[1] = pSrc[1];
+          pOut_buf_cur += counter;
+        }
+      }
+    }
+  } while (!(r->m_final & 1));
+  if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER)
+  {
+    TINFL_SKIP_BITS(32, num_bits & 7); for (counter = 0; counter < 4; ++counter) { mz_uint s; if (num_bits) TINFL_GET_BITS(41, s, 8); else TINFL_GET_BYTE(42, s); r->m_z_adler32 = (r->m_z_adler32 << 8) | s; }
+  }
+  TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE);
+  TINFL_CR_FINISH
+
+common_exit:
+  r->m_num_bits = num_bits; r->m_bit_buf = bit_buf; r->m_dist = dist; r->m_counter = counter; r->m_num_extra = num_extra; r->m_dist_from_out_buf_start = dist_from_out_buf_start;
+  *pIn_buf_size = pIn_buf_cur - pIn_buf_next; *pOut_buf_size = pOut_buf_cur - pOut_buf_next;
+  if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0))
+  {
+    const mz_uint8 *ptr = pOut_buf_next; size_t buf_len = *pOut_buf_size;
+    mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; size_t block_len = buf_len % 5552;
+    while (buf_len)
+    {
+      for (i = 0; i + 7 < block_len; i += 8, ptr += 8)
+      {
+        s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1;
+        s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1;
+      }
+      for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1;
+      s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552;
+    }
+    r->m_check_adler32 = (s2 << 16) + s1; if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) status = TINFL_STATUS_ADLER32_MISMATCH;
+  }
+  return status;
+}
+
+// Higher level helper functions.
+void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags)
+{
+  tinfl_decompressor decomp; void *pBuf = NULL, *pNew_buf; size_t src_buf_ofs = 0, out_buf_capacity = 0;
+  *pOut_len = 0;
+  tinfl_init(&decomp);
+  for ( ; ; )
+  {
+    size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity;
+    tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8*)pBuf, pBuf ? (mz_uint8*)pBuf + *pOut_len : NULL, &dst_buf_size,
+      (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
+    if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT))
+    {
+      MZ_FREE(pBuf); *pOut_len = 0; return NULL;
+    }
+    src_buf_ofs += src_buf_size;
+    *pOut_len += dst_buf_size;
+    if (status == TINFL_STATUS_DONE) break;
+    new_out_buf_capacity = out_buf_capacity * 2; if (new_out_buf_capacity < 128) new_out_buf_capacity = 128;
+    pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity);
+    if (!pNew_buf)
+    {
+      MZ_FREE(pBuf); *pOut_len = 0; return NULL;
+    }
+    pBuf = pNew_buf; out_buf_capacity = new_out_buf_capacity;
+  }
+  return pBuf;
+}
+
+size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags)
+{
+  tinfl_decompressor decomp; tinfl_status status; tinfl_init(&decomp);
+  status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf, &src_buf_len, (mz_uint8*)pOut_buf, (mz_uint8*)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
+  return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len;
+}
+
+int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags)
+{
+  int result = 0;
+  tinfl_decompressor decomp;
+  mz_uint8 *pDict = (mz_uint8*)MZ_MALLOC(TINFL_LZ_DICT_SIZE); size_t in_buf_ofs = 0, dict_ofs = 0;
+  if (!pDict)
+    return TINFL_STATUS_FAILED;
+  tinfl_init(&decomp);
+  for ( ; ; )
+  {
+    size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs;
+    tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size,
+      (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)));
+    in_buf_ofs += in_buf_size;
+    if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user)))
+      break;
+    if (status != TINFL_STATUS_HAS_MORE_OUTPUT)
+    {
+      result = (status == TINFL_STATUS_DONE);
+      break;
+    }
+    dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1);
+  }
+  MZ_FREE(pDict);
+  *pIn_buf_size = in_buf_ofs;
+  return result;
+}
+
+// ------------------- Low-level Compression (independent from all decompression API's)
+
+// Purposely making these tables static for faster init and thread safety.
+static const mz_uint16 s_tdefl_len_sym[256] = {
+  257,258,259,260,261,262,263,264,265,265,266,266,267,267,268,268,269,269,269,269,270,270,270,270,271,271,271,271,272,272,272,272,
+  273,273,273,273,273,273,273,273,274,274,274,274,274,274,274,274,275,275,275,275,275,275,275,275,276,276,276,276,276,276,276,276,
+  277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278,
+  279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,
+  281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,
+  282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,
+  283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,
+  284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,285 };
+
+static const mz_uint8 s_tdefl_len_extra[256] = {
+  0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0 };
+
+static const mz_uint8 s_tdefl_small_dist_sym[512] = {
+  0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,
+  11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,
+  13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,
+  14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,
+  14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+  15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+  17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+  17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+  17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17 };
+
+static const mz_uint8 s_tdefl_small_dist_extra[512] = {
+  0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7 };
+
+static const mz_uint8 s_tdefl_large_dist_sym[128] = {
+  0,0,18,19,20,20,21,21,22,22,22,22,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,26,26,26,26,
+  26,26,26,26,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,
+  28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29 };
+
+static const mz_uint8 s_tdefl_large_dist_extra[128] = {
+  0,0,8,8,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,
+  12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
+  13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13 };
+
+// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values.
+typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq;
+static tdefl_sym_freq* tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq* pSyms0, tdefl_sym_freq* pSyms1)
+{
+  mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; tdefl_sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1; MZ_CLEAR_OBJ(hist);
+  for (i = 0; i < num_syms; i++) { mz_uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; }
+  while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--;
+  for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8)
+  {
+    const mz_uint32* pHist = &hist[pass << 8];
+    mz_uint offsets[256], cur_ofs = 0;
+    for (i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; }
+    for (i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i];
+    { tdefl_sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; }
+  }
+  return pCur_syms;
+}
+
+// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
+static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n)
+{
+  int root, leaf, next, avbl, used, dpth;
+  if (n==0) return; else if (n==1) { A[0].m_key = 1; return; }
+  A[0].m_key += A[1].m_key; root = 0; leaf = 2;
+  for (next=1; next < n-1; next++)
+  {
+    if (leaf>=n || A[root].m_key<A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = (mz_uint16)next; } else A[next].m_key = A[leaf++].m_key;
+    if (leaf>=n || (root<next && A[root].m_key<A[leaf].m_key)) { A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key); A[root++].m_key = (mz_uint16)next; } else A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key);
+  }
+  A[n-2].m_key = 0; for (next=n-3; next>=0; next--) A[next].m_key = A[A[next].m_key].m_key+1;
+  avbl = 1; used = dpth = 0; root = n-2; next = n-1;
+  while (avbl>0)
+  {
+    while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; }
+    while (avbl>used) { A[next--].m_key = (mz_uint16)(dpth); avbl--; }
+    avbl = 2*used; dpth++; used = 0;
+  }
+}
+
+// Limits canonical Huffman code table's max code size.
+enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 };
+static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size)
+{
+  int i; mz_uint32 total = 0; if (code_list_len <= 1) return;
+  for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i];
+  for (i = max_code_size; i > 0; i--) total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i));
+  while (total != (1UL << max_code_size))
+  {
+    pNum_codes[max_code_size]--;
+    for (i = max_code_size - 1; i > 0; i--) if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; }
+    total--;
+  }
+}
+
+static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table)
+{
+  int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; MZ_CLEAR_OBJ(num_codes);
+  if (static_table)
+  {
+    for (i = 0; i < table_len; i++) num_codes[d->m_huff_code_sizes[table_num][i]]++;
+  }
+  else
+  {
+    tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms;
+    int num_used_syms = 0;
+    const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0];
+    for (i = 0; i < table_len; i++) if (pSym_count[i]) { syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; syms0[num_used_syms++].m_sym_index = (mz_uint16)i; }
+
+    pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); tdefl_calculate_minimum_redundancy(pSyms, num_used_syms);
+
+    for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++;
+
+    tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit);
+
+    MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); MZ_CLEAR_OBJ(d->m_huff_codes[table_num]);
+    for (i = 1, j = num_used_syms; i <= code_size_limit; i++)
+      for (l = num_codes[i]; l > 0; l--) d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i);
+  }
+
+  next_code[1] = 0; for (j = 0, i = 2; i <= code_size_limit; i++) next_code[i] = j = ((j + num_codes[i - 1]) << 1);
+
+  for (i = 0; i < table_len; i++)
+  {
+    mz_uint rev_code = 0, code, code_size; if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue;
+    code = next_code[code_size]++; for (l = code_size; l > 0; l--, code >>= 1) rev_code = (rev_code << 1) | (code & 1);
+    d->m_huff_codes[table_num][i] = (mz_uint16)rev_code;
+  }
+}
+
+#define TDEFL_PUT_BITS(b, l) do { \
+  mz_uint bits = b; mz_uint len = l; MZ_ASSERT(bits <= ((1U << len) - 1U)); \
+  d->m_bit_buffer |= (bits << d->m_bits_in); d->m_bits_in += len; \
+  while (d->m_bits_in >= 8) { \
+    if (d->m_pOutput_buf < d->m_pOutput_buf_end) \
+      *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \
+      d->m_bit_buffer >>= 8; \
+      d->m_bits_in -= 8; \
+  } \
+} MZ_MACRO_END
+
+#define TDEFL_RLE_PREV_CODE_SIZE() { if (rle_repeat_count) { \
+  if (rle_repeat_count < 3) { \
+    d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \
+    while (rle_repeat_count--) packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \
+  } else { \
+    d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); packed_code_sizes[num_packed_code_sizes++] = 16; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \
+} rle_repeat_count = 0; } }
+
+#define TDEFL_RLE_ZERO_CODE_SIZE() { if (rle_z_count) { \
+  if (rle_z_count < 3) { \
+    d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \
+  } else if (rle_z_count <= 10) { \
+    d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); packed_code_sizes[num_packed_code_sizes++] = 17; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \
+  } else { \
+    d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); packed_code_sizes[num_packed_code_sizes++] = 18; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \
+} rle_z_count = 0; } }
+
+static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
+
+static void tdefl_start_dynamic_block(tdefl_compressor *d)
+{
+  int num_lit_codes, num_dist_codes, num_bit_lengths; mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index;
+  mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF;
+
+  d->m_huff_count[0][256] = 1;
+
+  tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE);
+  tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE);
+
+  for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break;
+  for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break;
+
+  memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes);
+  memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes);
+  total_code_sizes_to_pack = num_lit_codes + num_dist_codes; num_packed_code_sizes = 0; rle_z_count = 0; rle_repeat_count = 0;
+
+  memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2);
+  for (i = 0; i < total_code_sizes_to_pack; i++)
+  {
+    mz_uint8 code_size = code_sizes_to_pack[i];
+    if (!code_size)
+    {
+      TDEFL_RLE_PREV_CODE_SIZE();
+      if (++rle_z_count == 138) { TDEFL_RLE_ZERO_CODE_SIZE(); }
+    }
+    else
+    {
+      TDEFL_RLE_ZERO_CODE_SIZE();
+      if (code_size != prev_code_size)
+      {
+        TDEFL_RLE_PREV_CODE_SIZE();
+        d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1); packed_code_sizes[num_packed_code_sizes++] = code_size;
+      }
+      else if (++rle_repeat_count == 6)
+      {
+        TDEFL_RLE_PREV_CODE_SIZE();
+      }
+    }
+    prev_code_size = code_size;
+  }
+  if (rle_repeat_count) { TDEFL_RLE_PREV_CODE_SIZE(); } else { TDEFL_RLE_ZERO_CODE_SIZE(); }
+
+  tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE);
+
+  TDEFL_PUT_BITS(2, 2);
+
+  TDEFL_PUT_BITS(num_lit_codes - 257, 5);
+  TDEFL_PUT_BITS(num_dist_codes - 1, 5);
+
+  for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) break;
+  num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); TDEFL_PUT_BITS(num_bit_lengths - 4, 4);
+  for (i = 0; (int)i < num_bit_lengths; i++) TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3);
+
+  for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes; )
+  {
+    mz_uint code = packed_code_sizes[packed_code_sizes_index++]; MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2);
+    TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]);
+    if (code >= 16) TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]);
+  }
+}
+
+static void tdefl_start_static_block(tdefl_compressor *d)
+{
+  mz_uint i;
+  mz_uint8 *p = &d->m_huff_code_sizes[0][0];
+
+  for (i = 0; i <= 143; ++i) *p++ = 8;
+  for ( ; i <= 255; ++i) *p++ = 9;
+  for ( ; i <= 279; ++i) *p++ = 7;
+  for ( ; i <= 287; ++i) *p++ = 8;
+
+  memset(d->m_huff_code_sizes[1], 5, 32);
+
+  tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE);
+  tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE);
+
+  TDEFL_PUT_BITS(1, 2);
+}
+
+static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF };
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS
+static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d)
+{
+  mz_uint flags;
+  mz_uint8 *pLZ_codes;
+  mz_uint8 *pOutput_buf = d->m_pOutput_buf;
+  mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf;
+  mz_uint64 bit_buffer = d->m_bit_buffer;
+  mz_uint bits_in = d->m_bits_in;
+
+#define TDEFL_PUT_BITS_FAST(b, l) { bit_buffer |= (((mz_uint64)(b)) << bits_in); bits_in += (l); }
+
+  flags = 1;
+  for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1)
+  {
+    if (flags == 1)
+      flags = *pLZ_codes++ | 0x100;
+
+    if (flags & 1)
+    {
+      mz_uint s0, s1, n0, n1, sym, num_extra_bits;
+      mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16 *)(pLZ_codes + 1); pLZ_codes += 3;
+
+      MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+      TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+      TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]);
+
+      // This sequence coaxes MSVC into using cmov's vs. jmp's.
+      s0 = s_tdefl_small_dist_sym[match_dist & 511];
+      n0 = s_tdefl_small_dist_extra[match_dist & 511];
+      s1 = s_tdefl_large_dist_sym[match_dist >> 8];
+      n1 = s_tdefl_large_dist_extra[match_dist >> 8];
+      sym = (match_dist < 512) ? s0 : s1;
+      num_extra_bits = (match_dist < 512) ? n0 : n1;
+
+      MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
+      TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]);
+      TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits);
+    }
+    else
+    {
+      mz_uint lit = *pLZ_codes++;
+      MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+      TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+
+      if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end))
+      {
+        flags >>= 1;
+        lit = *pLZ_codes++;
+        MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+        TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+
+        if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end))
+        {
+          flags >>= 1;
+          lit = *pLZ_codes++;
+          MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+          TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+        }
+      }
+    }
+
+    if (pOutput_buf >= d->m_pOutput_buf_end)
+      return MZ_FALSE;
+
+    *(mz_uint64*)pOutput_buf = bit_buffer;
+    pOutput_buf += (bits_in >> 3);
+    bit_buffer >>= (bits_in & ~7);
+    bits_in &= 7;
+  }
+
+#undef TDEFL_PUT_BITS_FAST
+
+  d->m_pOutput_buf = pOutput_buf;
+  d->m_bits_in = 0;
+  d->m_bit_buffer = 0;
+
+  while (bits_in)
+  {
+    mz_uint32 n = MZ_MIN(bits_in, 16);
+    TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n);
+    bit_buffer >>= n;
+    bits_in -= n;
+  }
+
+  TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);
+
+  return (d->m_pOutput_buf < d->m_pOutput_buf_end);
+}
+#else
+static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d)
+{
+  mz_uint flags;
+  mz_uint8 *pLZ_codes;
+
+  flags = 1;
+  for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1)
+  {
+    if (flags == 1)
+      flags = *pLZ_codes++ | 0x100;
+    if (flags & 1)
+    {
+      mz_uint sym, num_extra_bits;
+      mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); pLZ_codes += 3;
+
+      MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+      TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+      TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]);
+
+      if (match_dist < 512)
+      {
+        sym = s_tdefl_small_dist_sym[match_dist]; num_extra_bits = s_tdefl_small_dist_extra[match_dist];
+      }
+      else
+      {
+        sym = s_tdefl_large_dist_sym[match_dist >> 8]; num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8];
+      }
+      MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
+      TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]);
+      TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits);
+    }
+    else
+    {
+      mz_uint lit = *pLZ_codes++;
+      MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+      TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+    }
+  }
+
+  TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);
+
+  return (d->m_pOutput_buf < d->m_pOutput_buf_end);
+}
+#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS
+
+static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block)
+{
+  if (static_block)
+    tdefl_start_static_block(d);
+  else
+    tdefl_start_dynamic_block(d);
+  return tdefl_compress_lz_codes(d);
+}
+
+static int tdefl_flush_block(tdefl_compressor *d, int flush)
+{
+  mz_uint saved_bit_buf, saved_bits_in;
+  mz_uint8 *pSaved_output_buf;
+  mz_bool comp_block_succeeded = MZ_FALSE;
+  int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size;
+  mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf;
+
+  d->m_pOutput_buf = pOutput_buf_start;
+  d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16;
+
+  MZ_ASSERT(!d->m_output_flush_remaining);
+  d->m_output_flush_ofs = 0;
+  d->m_output_flush_remaining = 0;
+
+  *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left);
+  d->m_pLZ_code_buf -= (d->m_num_flags_left == 8);
+
+  if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index))
+  {
+    TDEFL_PUT_BITS(0x78, 8); TDEFL_PUT_BITS(0x01, 8);
+  }
+
+  TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1);
+
+  pSaved_output_buf = d->m_pOutput_buf; saved_bit_buf = d->m_bit_buffer; saved_bits_in = d->m_bits_in;
+
+  if (!use_raw_block)
+    comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48));
+
+  // If the block gets expanded, forget the current contents of the output buffer and send a raw block instead.
+  if ( ((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) &&
+       ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size) )
+  {
+    mz_uint i; d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
+    TDEFL_PUT_BITS(0, 2);
+    if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); }
+    for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF)
+    {
+      TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16);
+    }
+    for (i = 0; i < d->m_total_lz_bytes; ++i)
+    {
+      TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8);
+    }
+  }
+  // Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes.
+  else if (!comp_block_succeeded)
+  {
+    d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
+    tdefl_compress_block(d, MZ_TRUE);
+  }
+
+  if (flush)
+  {
+    if (flush == TDEFL_FINISH)
+    {
+      if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); }
+      if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { mz_uint i, a = d->m_adler32; for (i = 0; i < 4; i++) { TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); a <<= 8; } }
+    }
+    else
+    {
+      mz_uint i, z = 0; TDEFL_PUT_BITS(0, 3); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, z ^= 0xFFFF) { TDEFL_PUT_BITS(z & 0xFFFF, 16); }
+    }
+  }
+
+  MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end);
+
+  memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
+  memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);
+
+  d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; d->m_total_lz_bytes = 0; d->m_block_index++;
+
+  if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0)
+  {
+    if (d->m_pPut_buf_func)
+    {
+      *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
+      if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user))
+        return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED);
+    }
+    else if (pOutput_buf_start == d->m_output_buf)
+    {
+      int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs));
+      memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy);
+      d->m_out_buf_ofs += bytes_to_copy;
+      if ((n -= bytes_to_copy) != 0)
+      {
+        d->m_output_flush_ofs = bytes_to_copy;
+        d->m_output_flush_remaining = n;
+      }
+    }
+    else
+    {
+      d->m_out_buf_ofs += n;
+    }
+  }
+
+  return d->m_output_flush_remaining;
+}
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
+#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16*)(p)
+static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len)
+{
+  mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len;
+  mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
+  const mz_uint16 *s = (const mz_uint16*)(d->m_dict + pos), *p, *q;
+  mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD(s);
+  MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return;
+  for ( ; ; )
+  {
+    for ( ; ; )
+    {
+      if (--num_probes_left == 0) return;
+      #define TDEFL_PROBE \
+        next_probe_pos = d->m_next[probe_pos]; \
+        if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \
+        probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \
+        if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) break;
+      TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE;
+    }
+    if (!dist) break;
+	 q = (const mz_uint16*)(d->m_dict + probe_pos); 
+	 if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue;
+	 p = s; probe_len = 32;
+    do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) &&
+                   (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) );
+    if (!probe_len)
+    {
+      *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); break;
+    }
+    else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8*)p == *(const mz_uint8*)q)) > match_len)
+    {
+      *pMatch_dist = dist; if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len) break;
+      c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]);
+    }
+  }
+}
+#else
+static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len)
+{
+  mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len;
+  mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
+  const mz_uint8 *s = d->m_dict + pos, *p, *q;
+  mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1];
+  MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return;
+  for ( ; ; )
+  {
+    for ( ; ; )
+    {
+      if (--num_probes_left == 0) return;
+      #define TDEFL_PROBE \
+        next_probe_pos = d->m_next[probe_pos]; \
+        if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \
+        probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \
+        if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) break;
+      TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE;
+    }
+    if (!dist) break; p = s; q = d->m_dict + probe_pos; for (probe_len = 0; probe_len < max_match_len; probe_len++) if (*p++ != *q++) break;
+    if (probe_len > match_len)
+    {
+      *pMatch_dist = dist; if ((*pMatch_len = match_len = probe_len) == max_match_len) return;
+      c0 = d->m_dict[pos + match_len]; c1 = d->m_dict[pos + match_len - 1];
+    }
+  }
+}
+#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+static mz_bool tdefl_compress_fast(tdefl_compressor *d)
+{
+  // Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio.
+  mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left;
+  mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags;
+  mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
+
+  while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size)))
+  {
+    const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096;
+    mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK;
+    mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size);
+    d->m_src_buf_left -= num_bytes_to_process;
+    lookahead_size += num_bytes_to_process;
+
+    while (num_bytes_to_process)
+    {
+      mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process);
+      memcpy(d->m_dict + dst_pos, d->m_pSrc, n);
+      if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
+        memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos));
+      d->m_pSrc += n;
+      dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK;
+      num_bytes_to_process -= n;
+    }
+
+    dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size);
+    if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) break;
+
+    while (lookahead_size >= 4)
+    {
+      mz_uint cur_match_dist, cur_match_len = 1;
+      mz_uint8 *pCur_dict = d->m_dict + cur_pos;
+      mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF;
+      mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK;
+      mz_uint probe_pos = d->m_hash[hash];
+      d->m_hash[hash] = (mz_uint16)lookahead_pos;
+
+      if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((*(const mz_uint32 *)(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram))
+      {
+        const mz_uint16 *p = (const mz_uint16 *)pCur_dict;
+        const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos);
+        mz_uint32 probe_len = 32;
+        do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) &&
+          (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) );
+        cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q);
+        if (!probe_len)
+          cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0;
+
+        if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U)))
+        {
+          cur_match_len = 1;
+          *pLZ_code_buf++ = (mz_uint8)first_trigram;
+          *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
+          d->m_huff_count[0][(mz_uint8)first_trigram]++;
+        }
+        else
+        {
+          mz_uint32 s0, s1;
+          cur_match_len = MZ_MIN(cur_match_len, lookahead_size);
+
+          MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE));
+
+          cur_match_dist--;
+
+          pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN);
+          *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist;
+          pLZ_code_buf += 3;
+          *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80);
+
+          s0 = s_tdefl_small_dist_sym[cur_match_dist & 511];
+          s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8];
+          d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++;
+
+          d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++;
+        }
+      }
+      else
+      {
+        *pLZ_code_buf++ = (mz_uint8)first_trigram;
+        *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
+        d->m_huff_count[0][(mz_uint8)first_trigram]++;
+      }
+
+      if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; }
+
+      total_lz_bytes += cur_match_len;
+      lookahead_pos += cur_match_len;
+      dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE);
+      cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK;
+      MZ_ASSERT(lookahead_size >= cur_match_len);
+      lookahead_size -= cur_match_len;
+
+      if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8])
+      {
+        int n;
+        d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size;
+        d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left;
+        if ((n = tdefl_flush_block(d, 0)) != 0)
+          return (n < 0) ? MZ_FALSE : MZ_TRUE;
+        total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left;
+      }
+    }
+
+    while (lookahead_size)
+    {
+      mz_uint8 lit = d->m_dict[cur_pos];
+
+      total_lz_bytes++;
+      *pLZ_code_buf++ = lit;
+      *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
+      if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; }
+
+      d->m_huff_count[0][lit]++;
+
+      lookahead_pos++;
+      dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE);
+      cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK;
+      lookahead_size--;
+
+      if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8])
+      {
+        int n;
+        d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size;
+        d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left;
+        if ((n = tdefl_flush_block(d, 0)) != 0)
+          return (n < 0) ? MZ_FALSE : MZ_TRUE;
+        total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left;
+      }
+    }
+  }
+
+  d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size;
+  d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left;
+  return MZ_TRUE;
+}
+#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+
+static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit)
+{
+  d->m_total_lz_bytes++;
+  *d->m_pLZ_code_buf++ = lit;
+  *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; }
+  d->m_huff_count[0][lit]++;
+}
+
+static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist)
+{
+  mz_uint32 s0, s1;
+
+  MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE));
+
+  d->m_total_lz_bytes += match_len;
+
+  d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN);
+
+  match_dist -= 1;
+  d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF);
+  d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); d->m_pLZ_code_buf += 3;
+
+  *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; }
+
+  s0 = s_tdefl_small_dist_sym[match_dist & 511]; s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127];
+  d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++;
+
+  if (match_len >= TDEFL_MIN_MATCH_LEN) d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++;
+}
+
+static mz_bool tdefl_compress_normal(tdefl_compressor *d)
+{
+  const mz_uint8 *pSrc = d->m_pSrc; size_t src_buf_left = d->m_src_buf_left;
+  tdefl_flush flush = d->m_flush;
+
+  while ((src_buf_left) || ((flush) && (d->m_lookahead_size)))
+  {
+    mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos;
+    // Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN.
+    if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1))
+    {
+      mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2;
+      mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK];
+      mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size);
+      const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process;
+      src_buf_left -= num_bytes_to_process;
+      d->m_lookahead_size += num_bytes_to_process;
+      while (pSrc != pSrc_end)
+      {
+        mz_uint8 c = *pSrc++; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
+        hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1);
+        d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos);
+        dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; ins_pos++;
+      }
+    }
+    else
+    {
+      while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN))
+      {
+        mz_uint8 c = *pSrc++;
+        mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK;
+        src_buf_left--;
+        d->m_dict[dst_pos] = c;
+        if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
+          d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
+        if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN)
+        {
+          mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2;
+          mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1);
+          d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos);
+        }
+      }
+    }
+    d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size);
+    if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN))
+      break;
+
+    // Simple lazy/greedy parsing state machine.
+    len_to_move = 1; cur_match_dist = 0; cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
+    if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS))
+    {
+      if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))
+      {
+        mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK];
+        cur_match_len = 0; while (cur_match_len < d->m_lookahead_size) { if (d->m_dict[cur_pos + cur_match_len] != c) break; cur_match_len++; }
+        if (cur_match_len < TDEFL_MIN_MATCH_LEN) cur_match_len = 0; else cur_match_dist = 1;
+      }
+    }
+    else
+    {
+      tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len);
+    }
+    if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5)))
+    {
+      cur_match_dist = cur_match_len = 0;
+    }
+    if (d->m_saved_match_len)
+    {
+      if (cur_match_len > d->m_saved_match_len)
+      {
+        tdefl_record_literal(d, (mz_uint8)d->m_saved_lit);
+        if (cur_match_len >= 128)
+        {
+          tdefl_record_match(d, cur_match_len, cur_match_dist);
+          d->m_saved_match_len = 0; len_to_move = cur_match_len;
+        }
+        else
+        {
+          d->m_saved_lit = d->m_dict[cur_pos]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len;
+        }
+      }
+      else
+      {
+        tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist);
+        len_to_move = d->m_saved_match_len - 1; d->m_saved_match_len = 0;
+      }
+    }
+    else if (!cur_match_dist)
+      tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]);
+    else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128))
+    {
+      tdefl_record_match(d, cur_match_len, cur_match_dist);
+      len_to_move = cur_match_len;
+    }
+    else
+    {
+      d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len;
+    }
+    // Move the lookahead forward by len_to_move bytes.
+    d->m_lookahead_pos += len_to_move;
+    MZ_ASSERT(d->m_lookahead_size >= len_to_move);
+    d->m_lookahead_size -= len_to_move;
+    d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, TDEFL_LZ_DICT_SIZE);
+    // Check if it's time to flush the current LZ codes to the internal output buffer.
+    if ( (d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) ||
+         ( (d->m_total_lz_bytes > 31*1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) )
+    {
+      int n;
+      d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left;
+      if ((n = tdefl_flush_block(d, 0)) != 0)
+        return (n < 0) ? MZ_FALSE : MZ_TRUE;
+    }
+  }
+
+  d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left;
+  return MZ_TRUE;
+}
+
+static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d)
+{
+  if (d->m_pIn_buf_size)
+  {
+    *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
+  }
+
+  if (d->m_pOut_buf_size)
+  {
+    size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining);
+    memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n);
+    d->m_output_flush_ofs += (mz_uint)n;
+    d->m_output_flush_remaining -= (mz_uint)n;
+    d->m_out_buf_ofs += n;
+
+    *d->m_pOut_buf_size = d->m_out_buf_ofs;
+  }
+
+  return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY;
+}
+
+tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush)
+{
+  if (!d)
+  {
+    if (pIn_buf_size) *pIn_buf_size = 0;
+    if (pOut_buf_size) *pOut_buf_size = 0;
+    return TDEFL_STATUS_BAD_PARAM;
+  }
+
+  d->m_pIn_buf = pIn_buf; d->m_pIn_buf_size = pIn_buf_size;
+  d->m_pOut_buf = pOut_buf; d->m_pOut_buf_size = pOut_buf_size;
+  d->m_pSrc = (const mz_uint8 *)(pIn_buf); d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0;
+  d->m_out_buf_ofs = 0;
+  d->m_flush = flush;
+
+  if ( ((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) ||
+        (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf) )
+  {
+    if (pIn_buf_size) *pIn_buf_size = 0;
+    if (pOut_buf_size) *pOut_buf_size = 0;
+    return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM);
+  }
+  d->m_wants_to_finish |= (flush == TDEFL_FINISH);
+
+  if ((d->m_output_flush_remaining) || (d->m_finished))
+    return (d->m_prev_return_status = tdefl_flush_output_buffer(d));
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+  if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) &&
+      ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) &&
+      ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0))
+  {
+    if (!tdefl_compress_fast(d))
+      return d->m_prev_return_status;
+  }
+  else
+#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+  {
+    if (!tdefl_compress_normal(d))
+      return d->m_prev_return_status;
+  }
+
+  if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf))
+    d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf);
+
+  if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining))
+  {
+    if (tdefl_flush_block(d, flush) < 0)
+      return d->m_prev_return_status;
+    d->m_finished = (flush == TDEFL_FINISH);
+    if (flush == TDEFL_FULL_FLUSH) { MZ_CLEAR_OBJ(d->m_hash); MZ_CLEAR_OBJ(d->m_next); d->m_dict_size = 0; }
+  }
+
+  return (d->m_prev_return_status = tdefl_flush_output_buffer(d));
+}
+
+tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush)
+{
+  MZ_ASSERT(d->m_pPut_buf_func); return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush);
+}
+
+tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags)
+{
+  d->m_pPut_buf_func = pPut_buf_func; d->m_pPut_buf_user = pPut_buf_user;
+  d->m_flags = (mz_uint)(flags); d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0;
+  d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3;
+  if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) {
+    MZ_CLEAR_OBJ(d->m_hash);
+    MZ_CLEAR_OBJ(d->m_dict);
+  }
+  d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0;
+  d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0;
+  d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8;
+  d->m_pOutput_buf = d->m_output_buf; d->m_pOutput_buf_end = d->m_output_buf; d->m_prev_return_status = TDEFL_STATUS_OKAY;
+  d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; d->m_adler32 = 1;
+  d->m_pIn_buf = NULL; d->m_pOut_buf = NULL;
+  d->m_pIn_buf_size = NULL; d->m_pOut_buf_size = NULL;
+  d->m_flush = TDEFL_NO_FLUSH; d->m_pSrc = NULL; d->m_src_buf_left = 0; d->m_out_buf_ofs = 0;
+  memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
+  memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);
+  return TDEFL_STATUS_OKAY;
+}
+
+tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d)
+{
+  return d->m_prev_return_status;
+}
+
+mz_uint32 tdefl_get_adler32(tdefl_compressor *d)
+{
+  return d->m_adler32;
+}
+
+mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags)
+{
+  tdefl_compressor *pComp; mz_bool succeeded; if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE;
+  pComp = (tdefl_compressor*)MZ_MALLOC(sizeof(tdefl_compressor)); if (!pComp) return MZ_FALSE;
+  succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY);
+  succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE);
+  MZ_FREE(pComp); return succeeded;
+}
+
+typedef struct
+{
+  size_t m_size, m_capacity;
+  mz_uint8 *m_pBuf;
+  mz_bool m_expandable;
+} tdefl_output_buffer;
+
+static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser)
+{
+  tdefl_output_buffer *p = (tdefl_output_buffer *)pUser;
+  size_t new_size = p->m_size + len;
+  if (new_size > p->m_capacity)
+  {
+    size_t new_capacity = p->m_capacity; mz_uint8 *pNew_buf; if (!p->m_expandable) return MZ_FALSE;
+    do { new_capacity = MZ_MAX(128U, new_capacity << 1U); } while (new_size > new_capacity);
+    pNew_buf = (mz_uint8*)MZ_REALLOC(p->m_pBuf, new_capacity); if (!pNew_buf) return MZ_FALSE;
+    p->m_pBuf = pNew_buf; p->m_capacity = new_capacity;
+  }
+  memcpy((mz_uint8*)p->m_pBuf + p->m_size, pBuf, len); p->m_size = new_size;
+  return MZ_TRUE;
+}
+
+void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags)
+{
+  tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf);
+  if (!pOut_len) return MZ_FALSE; else *pOut_len = 0;
+  out_buf.m_expandable = MZ_TRUE;
+  if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return NULL;
+  *pOut_len = out_buf.m_size; return out_buf.m_pBuf;
+}
+
+size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags)
+{
+  tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf);
+  if (!pOut_buf) return 0;
+  out_buf.m_pBuf = (mz_uint8*)pOut_buf; out_buf.m_capacity = out_buf_len;
+  if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return 0;
+  return out_buf.m_size;
+}
+
+#ifndef MINIZ_NO_ZLIB_APIS
+static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32,  16, 32, 128, 256,  512, 768, 1500 };
+
+// level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files).
+mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy)
+{
+  mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0);
+  if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER;
+
+  if (!level) comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS;
+  else if (strategy == MZ_FILTERED) comp_flags |= TDEFL_FILTER_MATCHES;
+  else if (strategy == MZ_HUFFMAN_ONLY) comp_flags &= ~TDEFL_MAX_PROBES_MASK;
+  else if (strategy == MZ_FIXED) comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS;
+  else if (strategy == MZ_RLE) comp_flags |= TDEFL_RLE_MATCHES;
+
+  return comp_flags;
+}
+#endif //MINIZ_NO_ZLIB_APIS
+
+#ifdef _MSC_VER
+#pragma warning (push)
+#pragma warning (disable:4204) // nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal)
+#endif
+
+// Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at
+// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/.
+// This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck.
+void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip)
+{
+  // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined.
+  static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32,  16, 32, 128, 256,  512, 768, 1500 };
+  tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); tdefl_output_buffer out_buf; int i, bpl = w * num_chans, y, z; mz_uint32 c; *pLen_out = 0;
+  if (!pComp) return NULL;
+  MZ_CLEAR_OBJ(out_buf); out_buf.m_expandable = MZ_TRUE; out_buf.m_capacity = 57+MZ_MAX(64, (1+bpl)*h); if (NULL == (out_buf.m_pBuf = (mz_uint8*)MZ_MALLOC(out_buf.m_capacity))) { MZ_FREE(pComp); return NULL; }
+  // write dummy header
+  for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf);
+  // compress image data
+  tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER);
+  for (y = 0; y < h; ++y) { tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); tdefl_compress_buffer(pComp, (mz_uint8*)pImage + (flip ? (h - 1 - y) : y) * bpl, bpl, TDEFL_NO_FLUSH); }
+  if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE) { MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; }
+  // write real header
+  *pLen_out = out_buf.m_size-41;
+  {
+    static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06};
+    mz_uint8 pnghdr[41]={0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a,0x00,0x00,0x00,0x0d,0x49,0x48,0x44,0x52,
+      0,0,(mz_uint8)(w>>8),(mz_uint8)w,0,0,(mz_uint8)(h>>8),(mz_uint8)h,8,chans[num_chans],0,0,0,0,0,0,0,
+      (mz_uint8)(*pLen_out>>24),(mz_uint8)(*pLen_out>>16),(mz_uint8)(*pLen_out>>8),(mz_uint8)*pLen_out,0x49,0x44,0x41,0x54};
+    c=(mz_uint32)mz_crc32(MZ_CRC32_INIT,pnghdr+12,17); for (i=0; i<4; ++i, c<<=8) ((mz_uint8*)(pnghdr+29))[i]=(mz_uint8)(c>>24);
+    memcpy(out_buf.m_pBuf, pnghdr, 41);
+  }
+  // write footer (IDAT CRC-32, followed by IEND chunk)
+  if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { *pLen_out = 0; MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; }
+  c = (mz_uint32)mz_crc32(MZ_CRC32_INIT,out_buf.m_pBuf+41-4, *pLen_out+4); for (i=0; i<4; ++i, c<<=8) (out_buf.m_pBuf+out_buf.m_size-16)[i] = (mz_uint8)(c >> 24);
+  // compute final size of file, grab compressed data buffer and return
+  *pLen_out += 57; MZ_FREE(pComp); return out_buf.m_pBuf;
+}
+void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out)
+{
+  // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out)
+  return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE);
+}
+
+#ifdef _MSC_VER
+#pragma warning (pop)
+#endif
+
+// ------------------- .ZIP archive reading
+
+#ifndef MINIZ_NO_ARCHIVE_APIS
+
+#ifdef MINIZ_NO_STDIO
+  #define MZ_FILE void *
+#else
+  #include <stdio.h>
+  #include <sys/stat.h>
+
+  #if defined(_MSC_VER) || defined(__MINGW64__)
+    static FILE *mz_fopen(const char *pFilename, const char *pMode)
+    {
+      FILE* pFile = NULL;
+		#ifdef _MSC_VER
+			_wfopen_s(&pFile, utf8::utf8::decode(pFilename).c_str(), utf8::utf8::decode(pMode).c_str());
+		#else 
+			fopen_s(&pFile, pFilename, pMode);
+		#endif
+      return pFile;
+    }
+    static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream)
+    {
+      FILE* pFile = NULL;
+      if (freopen_s(&pFile, pPath, pMode, pStream))
+        return NULL;
+      return pFile;
+    }
+    #ifndef MINIZ_NO_TIME
+      #include <sys/utime.h>
+    #endif
+    #define MZ_FILE FILE
+    #define MZ_FOPEN mz_fopen
+    #define MZ_FCLOSE fclose
+    #define MZ_FREAD fread
+    #define MZ_FWRITE fwrite
+    #define MZ_FTELL64 _ftelli64
+    #define MZ_FSEEK64 _fseeki64
+    #define MZ_FILE_STAT_STRUCT _stat
+    #define MZ_FILE_STAT _stat
+    #define MZ_FFLUSH fflush
+    #define MZ_FREOPEN mz_freopen
+    #define MZ_DELETE_FILE remove
+  #elif defined(__MINGW32__)
+    #ifndef MINIZ_NO_TIME
+      #include <sys/utime.h>
+    #endif
+    #define MZ_FILE FILE
+    #define MZ_FOPEN(f, m) fopen(f, m)
+    #define MZ_FCLOSE fclose
+    #define MZ_FREAD fread
+    #define MZ_FWRITE fwrite
+    #define MZ_FTELL64 ftello64
+    #define MZ_FSEEK64 fseeko64
+    #define MZ_FILE_STAT_STRUCT _stat
+    #define MZ_FILE_STAT _stat
+    #define MZ_FFLUSH fflush
+    #define MZ_FREOPEN(f, m, s) freopen(f, m, s)
+    #define MZ_DELETE_FILE remove
+  #elif defined(__TINYC__)
+    #ifndef MINIZ_NO_TIME
+      #include <sys/utime.h>
+    #endif
+    #define MZ_FILE FILE
+    #define MZ_FOPEN(f, m) fopen(f, m)
+    #define MZ_FCLOSE fclose
+    #define MZ_FREAD fread
+    #define MZ_FWRITE fwrite
+    #define MZ_FTELL64 ftell
+    #define MZ_FSEEK64 fseek
+    #define MZ_FILE_STAT_STRUCT stat
+    #define MZ_FILE_STAT stat
+    #define MZ_FFLUSH fflush
+    #define MZ_FREOPEN(f, m, s) freopen(f, m, s)
+    #define MZ_DELETE_FILE remove
+  #elif defined(__GNUC__) && _LARGEFILE64_SOURCE
+    #ifndef MINIZ_NO_TIME
+      #include <utime.h>
+    #endif
+    #define MZ_FILE FILE
+    #define MZ_FOPEN(f, m) fopen64(f, m)
+    #define MZ_FCLOSE fclose
+    #define MZ_FREAD fread
+    #define MZ_FWRITE fwrite
+    #define MZ_FTELL64 ftello64
+    #define MZ_FSEEK64 fseeko64
+    #define MZ_FILE_STAT_STRUCT stat64
+    #define MZ_FILE_STAT stat64
+    #define MZ_FFLUSH fflush
+    #define MZ_FREOPEN(p, m, s) freopen64(p, m, s)
+    #define MZ_DELETE_FILE remove
+  #else
+    #ifndef MINIZ_NO_TIME
+      #include <utime.h>
+    #endif
+    #define MZ_FILE FILE
+    #define MZ_FOPEN(f, m) fopen(f, m)
+    #define MZ_FCLOSE fclose
+    #define MZ_FREAD fread
+    #define MZ_FWRITE fwrite
+    #define MZ_FTELL64 ftello
+    #define MZ_FSEEK64 fseeko
+    #define MZ_FILE_STAT_STRUCT stat
+    #define MZ_FILE_STAT stat
+    #define MZ_FFLUSH fflush
+    #define MZ_FREOPEN(f, m, s) freopen(f, m, s)
+    #define MZ_DELETE_FILE remove
+  #endif // #ifdef _MSC_VER
+#endif // #ifdef MINIZ_NO_STDIO
+
+#define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c))
+
+// Various ZIP archive enums. To completely avoid cross platform compiler alignment and platform endian issues, miniz.c doesn't use structs for any of this stuff.
+enum
+{
+  // ZIP archive identifiers and record sizes
+  MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50, MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50, MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50,
+  MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22,
+  // Central directory header record offsets
+  MZ_ZIP_CDH_SIG_OFS = 0, MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4, MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6, MZ_ZIP_CDH_BIT_FLAG_OFS = 8,
+  MZ_ZIP_CDH_METHOD_OFS = 10, MZ_ZIP_CDH_FILE_TIME_OFS = 12, MZ_ZIP_CDH_FILE_DATE_OFS = 14, MZ_ZIP_CDH_CRC32_OFS = 16,
+  MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20, MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24, MZ_ZIP_CDH_FILENAME_LEN_OFS = 28, MZ_ZIP_CDH_EXTRA_LEN_OFS = 30,
+  MZ_ZIP_CDH_COMMENT_LEN_OFS = 32, MZ_ZIP_CDH_DISK_START_OFS = 34, MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36, MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38, MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42,
+  // Local directory header offsets
+  MZ_ZIP_LDH_SIG_OFS = 0, MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4, MZ_ZIP_LDH_BIT_FLAG_OFS = 6, MZ_ZIP_LDH_METHOD_OFS = 8, MZ_ZIP_LDH_FILE_TIME_OFS = 10,
+  MZ_ZIP_LDH_FILE_DATE_OFS = 12, MZ_ZIP_LDH_CRC32_OFS = 14, MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18, MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22,
+  MZ_ZIP_LDH_FILENAME_LEN_OFS = 26, MZ_ZIP_LDH_EXTRA_LEN_OFS = 28,
+  // End of central directory offsets
+  MZ_ZIP_ECDH_SIG_OFS = 0, MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4, MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6, MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8,
+  MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10, MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12, MZ_ZIP_ECDH_CDIR_OFS_OFS = 16, MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20,
+};
+
+typedef struct
+{
+  void *m_p;
+  size_t m_size, m_capacity;
+  mz_uint m_element_size;
+} mz_zip_array;
+
+struct mz_zip_internal_state_tag
+{
+  mz_zip_array m_central_dir;
+  mz_zip_array m_central_dir_offsets;
+  mz_zip_array m_sorted_central_dir_offsets;
+  MZ_FILE *m_pFile;
+  void *m_pMem;
+  size_t m_mem_size;
+  size_t m_mem_capacity;
+};
+
+#define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) (array_ptr)->m_element_size = element_size
+#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type *)((array_ptr)->m_p))[index]
+
+static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip, mz_zip_array *pArray)
+{
+  pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p);
+  memset(pArray, 0, sizeof(mz_zip_array));
+}
+
+static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip, mz_zip_array *pArray, size_t min_new_capacity, mz_uint growing)
+{
+  void *pNew_p; size_t new_capacity = min_new_capacity; MZ_ASSERT(pArray->m_element_size); if (pArray->m_capacity >= min_new_capacity) return MZ_TRUE;
+  if (growing) { new_capacity = MZ_MAX(1, pArray->m_capacity); while (new_capacity < min_new_capacity) new_capacity *= 2; }
+  if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, pArray->m_element_size, new_capacity))) return MZ_FALSE;
+  pArray->m_p = pNew_p; pArray->m_capacity = new_capacity;
+  return MZ_TRUE;
+}
+
+static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_capacity, mz_uint growing)
+{
+  if (new_capacity > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing)) return MZ_FALSE; }
+  return MZ_TRUE;
+}
+
+static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_size, mz_uint growing)
+{
+  if (new_size > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing)) return MZ_FALSE; }
+  pArray->m_size = new_size;
+  return MZ_TRUE;
+}
+
+static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip, mz_zip_array *pArray, size_t n)
+{
+  return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE);
+}
+
+static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip, mz_zip_array *pArray, const void *pElements, size_t n)
+{
+  size_t orig_size = pArray->m_size; if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE)) return MZ_FALSE;
+  memcpy((mz_uint8*)pArray->m_p + orig_size * pArray->m_element_size, pElements, n * pArray->m_element_size);
+  return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_TIME
+static time_t mz_zip_dos_to_time_t(int dos_time, int dos_date)
+{
+  struct tm tm;
+  memset(&tm, 0, sizeof(tm)); tm.tm_isdst = -1;
+  tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900; tm.tm_mon = ((dos_date >> 5) & 15) - 1; tm.tm_mday = dos_date & 31;
+  tm.tm_hour = (dos_time >> 11) & 31; tm.tm_min = (dos_time >> 5) & 63; tm.tm_sec = (dos_time << 1) & 62;
+  return mktime(&tm);
+}
+
+static void mz_zip_time_to_dos_time(time_t time, mz_uint16 *pDOS_time, mz_uint16 *pDOS_date)
+{
+#ifdef _MSC_VER
+  struct tm tm_struct;
+  struct tm *tm = &tm_struct;
+  errno_t err = localtime_s(tm, &time);
+  if (err)
+  {
+    *pDOS_date = 0; *pDOS_time = 0;
+    return;
+  }
+#else
+  struct tm *tm = localtime(&time);
+#endif
+  *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + ((tm->tm_sec) >> 1));
+  *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + ((tm->tm_mon + 1) << 5) + tm->tm_mday);
+}
+#endif
+
+#ifndef MINIZ_NO_STDIO
+static mz_bool mz_zip_get_file_modified_time(const char *pFilename, mz_uint16 *pDOS_time, mz_uint16 *pDOS_date)
+{
+#ifdef MINIZ_NO_TIME
+  (void)pFilename; *pDOS_date = *pDOS_time = 0;
+#else
+  struct MZ_FILE_STAT_STRUCT file_stat;
+  // On Linux with x86 glibc, this call will fail on large files (>= 0x80000000 bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh.
+  if (MZ_FILE_STAT(pFilename, &file_stat) != 0)
+    return MZ_FALSE;
+  mz_zip_time_to_dos_time(file_stat.st_mtime, pDOS_time, pDOS_date);
+#endif // #ifdef MINIZ_NO_TIME
+  return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_TIME
+static mz_bool mz_zip_set_file_times(const char *pFilename, time_t access_time, time_t modified_time)
+{
+  struct utimbuf t; t.actime = access_time; t.modtime = modified_time;
+  return !utime(pFilename, &t);
+}
+#endif // #ifndef MINIZ_NO_TIME
+#endif // #ifndef MINIZ_NO_STDIO
+
+static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, mz_uint32 flags)
+{
+  (void)flags;
+  if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID))
+    return MZ_FALSE;
+
+  if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func;
+  if (!pZip->m_pFree) pZip->m_pFree = def_free_func;
+  if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func;
+
+  pZip->m_zip_mode = MZ_ZIP_MODE_READING;
+  pZip->m_archive_size = 0;
+  pZip->m_central_directory_file_ofs = 0;
+  pZip->m_total_files = 0;
+
+  if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state))))
+    return MZ_FALSE;
+  memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state));
+  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8));
+  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32));
+  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32));
+  return MZ_TRUE;
+}
+
+static MZ_FORCEINLINE mz_bool mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, mz_uint r_index)
+{
+  const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE;
+  const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index));
+  mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+  mz_uint8 l = 0, r = 0;
+  pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
+  pE = pL + MZ_MIN(l_len, r_len);
+  while (pL < pE)
+  {
+    if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR)))
+      break;
+    pL++; pR++;
+  }
+  return (pL == pE) ? (l_len < r_len) : (l < r);
+}
+
+#define MZ_SWAP_UINT32(a, b) do { mz_uint32 t = a; a = b; b = t; } MZ_MACRO_END
+
+// Heap sort of lowercased filenames, used to help accelerate plain central directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), but it could allocate memory.)
+static void mz_zip_reader_sort_central_dir_offsets_by_filename(mz_zip_archive *pZip)
+{
+  mz_zip_internal_state *pState = pZip->m_pState;
+  const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets;
+  const mz_zip_array *pCentral_dir = &pState->m_central_dir;
+  mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0);
+  const int size = pZip->m_total_files;
+  int start = (size - 2) >> 1, end;
+  while (start >= 0)
+  {
+    int child, root = start;
+    for ( ; ; )
+    {
+      if ((child = (root << 1) + 1) >= size)
+        break;
+      child += (((child + 1) < size) && (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1])));
+      if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child]))
+        break;
+      MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child;
+    }
+    start--;
+  }
+
+  end = size - 1;
+  while (end > 0)
+  {
+    int child, root = 0;
+    MZ_SWAP_UINT32(pIndices[end], pIndices[0]);
+    for ( ; ; )
+    {
+      if ((child = (root << 1) + 1) >= end)
+        break;
+      child += (((child + 1) < end) && mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1]));
+      if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child]))
+        break;
+      MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child;
+    }
+    end--;
+  }
+}
+
+static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip, mz_uint32 flags)
+{
+  mz_uint cdir_size, num_this_disk, cdir_disk_index;
+  mz_uint64 cdir_ofs;
+  mz_int64 cur_file_ofs;
+  const mz_uint8 *p;
+  mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; mz_uint8 *pBuf = (mz_uint8 *)buf_u32;
+  mz_bool sort_central_dir = ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0);
+  // Basic sanity checks - reject files which are too small, and check the first 4 bytes of the file to make sure a local header is there.
+  if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
+    return MZ_FALSE;
+  // Find the end of central directory record by scanning the file from the end towards the beginning.
+  cur_file_ofs = MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0);
+  for ( ; ; )
+  {
+    int i, n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs);
+    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n)
+      return MZ_FALSE;
+    for (i = n - 4; i >= 0; --i)
+      if (MZ_READ_LE32(pBuf + i) == MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG)
+        break;
+    if (i >= 0)
+    {
+      cur_file_ofs += i;
+      break;
+    }
+    if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= (0xFFFF + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)))
+      return MZ_FALSE;
+    cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0);
+  }
+  // Read and verify the end of central directory record.
+  if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
+    return MZ_FALSE;
+  if ((MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) ||
+      ((pZip->m_total_files = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS)) != MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS)))
+    return MZ_FALSE;
+
+  num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS);
+  cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS);
+  if (((num_this_disk | cdir_disk_index) != 0) && ((num_this_disk != 1) || (cdir_disk_index != 1)))
+    return MZ_FALSE;
+
+  if ((cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS)) < pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)
+    return MZ_FALSE;
+
+  cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS);
+  if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size)
+    return MZ_FALSE;
+
+  pZip->m_central_directory_file_ofs = cdir_ofs;
+
+  if (pZip->m_total_files)
+  {
+     mz_uint i, n;
+
+    // Read the entire central directory into a heap block, and allocate another heap block to hold the unsorted central dir file record offsets, and another to hold the sorted indices.
+    if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, MZ_FALSE)) ||
+        (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets, pZip->m_total_files, MZ_FALSE)))
+      return MZ_FALSE;
+
+    if (sort_central_dir)
+    {
+      if (!mz_zip_array_resize(pZip, &pZip->m_pState->m_sorted_central_dir_offsets, pZip->m_total_files, MZ_FALSE))
+        return MZ_FALSE;
+    }
+
+    if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, pZip->m_pState->m_central_dir.m_p, cdir_size) != cdir_size)
+      return MZ_FALSE;
+
+    // Now create an index into the central directory file records, do some basic sanity checking on each record, and check for zip64 entries (which are not yet supported).
+    p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p;
+    for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i)
+    {
+      mz_uint total_header_size, comp_size, decomp_size, disk_index;
+      if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) || (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG))
+        return MZ_FALSE;
+      MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, i) = (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p);
+      if (sort_central_dir)
+        MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, mz_uint32, i) = i;
+      comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
+      decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
+      if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && (decomp_size != comp_size)) || (decomp_size && !comp_size) || (decomp_size == 0xFFFFFFFF) || (comp_size == 0xFFFFFFFF))
+        return MZ_FALSE;
+      disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS);
+      if ((disk_index != num_this_disk) && (disk_index != 1))
+        return MZ_FALSE;
+      if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size)
+        return MZ_FALSE;
+      if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) > n)
+        return MZ_FALSE;
+      n -= total_header_size; p += total_header_size;
+    }
+  }
+
+  if (sort_central_dir)
+    mz_zip_reader_sort_central_dir_offsets_by_filename(pZip);
+
+  return MZ_TRUE;
+}
+
+mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint32 flags)
+{
+  if ((!pZip) || (!pZip->m_pRead))
+    return MZ_FALSE;
+  if (!mz_zip_reader_init_internal(pZip, flags))
+    return MZ_FALSE;
+  pZip->m_archive_size = size;
+  if (!mz_zip_reader_read_central_dir(pZip, flags))
+  {
+    mz_zip_reader_end(pZip);
+    return MZ_FALSE;
+  }
+  return MZ_TRUE;
+}
+
+static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n)
+{
+  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
+  size_t s = (file_ofs >= pZip->m_archive_size) ? 0 : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n);
+  memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s);
+  return s;
+}
+
+mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags)
+{
+  if (!mz_zip_reader_init_internal(pZip, flags))
+    return MZ_FALSE;
+  pZip->m_archive_size = size;
+  pZip->m_pRead = mz_zip_mem_read_func;
+  pZip->m_pIO_opaque = pZip;
+#ifdef __cplusplus
+  pZip->m_pState->m_pMem = const_cast<void *>(pMem);
+#else
+  pZip->m_pState->m_pMem = (void *)pMem;
+#endif
+  pZip->m_pState->m_mem_size = size;
+  if (!mz_zip_reader_read_central_dir(pZip, flags))
+  {
+    mz_zip_reader_end(pZip);
+    return MZ_FALSE;
+  }
+  return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_STDIO
+static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n)
+{
+  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
+  mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile);
+  if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET))))
+    return 0;
+  return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile);
+}
+
+mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags)
+{
+  mz_uint64 file_size;
+  MZ_FILE *pFile = MZ_FOPEN(pFilename, "rb");
+  if (!pFile)
+    return MZ_FALSE;
+  if (MZ_FSEEK64(pFile, 0, SEEK_END))
+  {
+    MZ_FCLOSE(pFile);
+    return MZ_FALSE;
+  }
+  file_size = MZ_FTELL64(pFile);
+  if (!mz_zip_reader_init_internal(pZip, flags))
+  {
+    MZ_FCLOSE(pFile);
+    return MZ_FALSE;
+  }
+  pZip->m_pRead = mz_zip_file_read_func;
+  pZip->m_pIO_opaque = pZip;
+  pZip->m_pState->m_pFile = pFile;
+  pZip->m_archive_size = file_size;
+  if (!mz_zip_reader_read_central_dir(pZip, flags))
+  {
+    mz_zip_reader_end(pZip);
+    return MZ_FALSE;
+  }
+  return MZ_TRUE;
+}
+#endif // #ifndef MINIZ_NO_STDIO
+
+mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip)
+{
+  return pZip ? pZip->m_total_files : 0;
+}
+
+static MZ_FORCEINLINE const mz_uint8 *mz_zip_reader_get_cdh(mz_zip_archive *pZip, mz_uint file_index)
+{
+  if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
+    return NULL;
+  return &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index));
+}
+
+mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index)
+{
+  mz_uint m_bit_flag;
+  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
+  if (!p)
+    return MZ_FALSE;
+  m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
+  return (m_bit_flag & 1);
+}
+
+mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index)
+{
+  mz_uint filename_len, external_attr;
+  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
+  if (!p)
+    return MZ_FALSE;
+
+  // First see if the filename ends with a '/' character.
+  filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+  if (filename_len)
+  {
+    if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/')
+      return MZ_TRUE;
+  }
+
+  // Bugfix: This code was also checking if the internal attribute was non-zero, which wasn't correct.
+  // Most/all zip writers (hopefully) set DOS file/directory attributes in the low 16-bits, so check for the DOS directory flag and ignore the source OS ID in the created by field.
+  // FIXME: Remove this check? Is it necessary - we already check the filename.
+  external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS);
+  if ((external_attr & 0x10) != 0)
+    return MZ_TRUE;
+
+  return MZ_FALSE;
+}
+
+mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat)
+{
+  mz_uint n;
+  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
+  if ((!p) || (!pStat))
+    return MZ_FALSE;
+
+  // Unpack the central directory record.
+  pStat->m_file_index = file_index;
+  pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index);
+  pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS);
+  pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS);
+  pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
+  pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS);
+#ifndef MINIZ_NO_TIME
+  pStat->m_time = mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS), MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS));
+#endif
+  pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS);
+  pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
+  pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
+  pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS);
+  pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS);
+  pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS);
+
+  // Copy as much of the filename and comment as possible.
+  n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1);
+  memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); pStat->m_filename[n] = '\0';
+
+  n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1);
+  pStat->m_comment_size = n;
+  memcpy(pStat->m_comment, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS), n); pStat->m_comment[n] = '\0';
+
+  return MZ_TRUE;
+}
+
+mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size)
+{
+  mz_uint n;
+  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
+  if (!p) { if (filename_buf_size) pFilename[0] = '\0'; return 0; }
+  n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+  if (filename_buf_size)
+  {
+    n = MZ_MIN(n, filename_buf_size - 1);
+    memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n);
+    pFilename[n] = '\0';
+  }
+  return n + 1;
+}
+
+static MZ_FORCEINLINE mz_bool mz_zip_reader_string_equal(const char *pA, const char *pB, mz_uint len, mz_uint flags)
+{
+  mz_uint i;
+  if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE)
+    return 0 == memcmp(pA, pB, len);
+  for (i = 0; i < len; ++i)
+    if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i]))
+      return MZ_FALSE;
+  return MZ_TRUE;
+}
+
+static MZ_FORCEINLINE int mz_zip_reader_filename_compare(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR, mz_uint r_len)
+{
+  const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE;
+  mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+  mz_uint8 l = 0, r = 0;
+  pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
+  pE = pL + MZ_MIN(l_len, r_len);
+  while (pL < pE)
+  {
+    if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR)))
+      break;
+    pL++; pR++;
+  }
+  return (pL == pE) ? (int)(l_len - r_len) : (l - r);
+}
+
+static int mz_zip_reader_locate_file_binary_search(mz_zip_archive *pZip, const char *pFilename)
+{
+  mz_zip_internal_state *pState = pZip->m_pState;
+  const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets;
+  const mz_zip_array *pCentral_dir = &pState->m_central_dir;
+  mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0);
+  const int size = pZip->m_total_files;
+  const mz_uint filename_len = (mz_uint)strlen(pFilename);
+  int l = 0, h = size - 1;
+  while (l <= h)
+  {
+    int m = (l + h) >> 1, file_index = pIndices[m], comp = mz_zip_reader_filename_compare(pCentral_dir, pCentral_dir_offsets, file_index, pFilename, filename_len);
+    if (!comp)
+      return file_index;
+    else if (comp < 0)
+      l = m + 1;
+    else
+      h = m - 1;
+  }
+  return -1;
+}
+
+int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags)
+{
+  mz_uint file_index; size_t name_len, comment_len;
+  if ((!pZip) || (!pZip->m_pState) || (!pName) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
+    return -1;
+  if (((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_size))
+    return mz_zip_reader_locate_file_binary_search(pZip, pName);
+  name_len = strlen(pName); if (name_len > 0xFFFF) return -1;
+  comment_len = pComment ? strlen(pComment) : 0; if (comment_len > 0xFFFF) return -1;
+  for (file_index = 0; file_index < pZip->m_total_files; file_index++)
+  {
+    const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index));
+    mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+    const char *pFilename = (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
+    if (filename_len < name_len)
+      continue;
+    if (comment_len)
+    {
+      mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS), file_comment_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS);
+      const char *pFile_comment = pFilename + filename_len + file_extra_len;
+      if ((file_comment_len != comment_len) || (!mz_zip_reader_string_equal(pComment, pFile_comment, file_comment_len, flags)))
+        continue;
+    }
+    if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len))
+    {
+      int ofs = filename_len - 1;
+      do
+      {
+        if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || (pFilename[ofs] == ':'))
+          break;
+      } while (--ofs >= 0);
+      ofs++;
+      pFilename += ofs; filename_len -= ofs;
+    }
+    if ((filename_len == name_len) && (mz_zip_reader_string_equal(pName, pFilename, filename_len, flags)))
+      return file_index;
+  }
+  return -1;
+}
+
+mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size)
+{
+  int status = TINFL_STATUS_DONE;
+  mz_uint64 needed_size, cur_file_ofs, comp_remaining, out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail;
+  mz_zip_archive_file_stat file_stat;
+  void *pRead_buf;
+  mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
+  tinfl_decompressor inflator;
+
+  if ((buf_size) && (!pBuf))
+    return MZ_FALSE;
+
+  if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
+    return MZ_FALSE;
+
+  // Empty file, or a directory (but not always a directory - I've seen odd zips with directories that have compressed data which inflates to 0 bytes)
+  if (!file_stat.m_comp_size)
+    return MZ_TRUE;
+
+  // Entry is a subdirectory (I've seen old zips with dir entries which have compressed deflate data which inflates to 0 bytes, but these entries claim to uncompress to 512 bytes in the headers).
+  // I'm torn how to handle this case - should it fail instead?
+  if (mz_zip_reader_is_file_a_directory(pZip, file_index))
+    return MZ_TRUE;
+
+  // Encryption and patch files are not supported.
+  if (file_stat.m_bit_flag & (1 | 32))
+    return MZ_FALSE;
+
+  // This function only supports stored and deflate.
+  if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED))
+    return MZ_FALSE;
+
+  // Ensure supplied output buffer is large enough.
+  needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size : file_stat.m_uncomp_size;
+  if (buf_size < needed_size)
+    return MZ_FALSE;
+
+  // Read and parse the local directory entry.
+  cur_file_ofs = file_stat.m_local_header_ofs;
+  if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
+    return MZ_FALSE;
+  if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
+    return MZ_FALSE;
+
+  cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
+  if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size)
+    return MZ_FALSE;
+
+  if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method))
+  {
+    // The file is stored or the caller has requested the compressed data.
+    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, (size_t)needed_size) != needed_size)
+      return MZ_FALSE;
+    return ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) != 0) || (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) == file_stat.m_crc32);
+  }
+
+  // Decompress the file either directly from memory or from a file input buffer.
+  tinfl_init(&inflator);
+
+  if (pZip->m_pState->m_pMem)
+  {
+    // Read directly from the archive in memory.
+    pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs;
+    read_buf_size = read_buf_avail = file_stat.m_comp_size;
+    comp_remaining = 0;
+  }
+  else if (pUser_read_buf)
+  {
+    // Use a user provided read buffer.
+    if (!user_read_buf_size)
+      return MZ_FALSE;
+    pRead_buf = (mz_uint8 *)pUser_read_buf;
+    read_buf_size = user_read_buf_size;
+    read_buf_avail = 0;
+    comp_remaining = file_stat.m_comp_size;
+  }
+  else
+  {
+    // Temporarily allocate a read buffer.
+    read_buf_size = MZ_MIN(file_stat.m_comp_size, MZ_ZIP_MAX_IO_BUF_SIZE);
+#ifdef _MSC_VER
+    if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF))
+#else
+    if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF))
+#endif
+      return MZ_FALSE;
+    if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size)))
+      return MZ_FALSE;
+    read_buf_avail = 0;
+    comp_remaining = file_stat.m_comp_size;
+  }
+
+  do
+  {
+    size_t in_buf_size, out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs);
+    if ((!read_buf_avail) && (!pZip->m_pState->m_pMem))
+    {
+      read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
+      if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail)
+      {
+        status = TINFL_STATUS_FAILED;
+        break;
+      }
+      cur_file_ofs += read_buf_avail;
+      comp_remaining -= read_buf_avail;
+      read_buf_ofs = 0;
+    }
+    in_buf_size = (size_t)read_buf_avail;
+    status = tinfl_decompress(&inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size, TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0));
+    read_buf_avail -= in_buf_size;
+    read_buf_ofs += in_buf_size;
+    out_buf_ofs += out_buf_size;
+  } while (status == TINFL_STATUS_NEEDS_MORE_INPUT);
+
+  if (status == TINFL_STATUS_DONE)
+  {
+    // Make sure the entire file was decompressed, and check its CRC.
+    if ((out_buf_ofs != file_stat.m_uncomp_size) || (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32))
+      status = TINFL_STATUS_FAILED;
+  }
+
+  if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf))
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+
+  return status == TINFL_STATUS_DONE;
+}
+
+mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size)
+{
+  int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags);
+  if (file_index < 0)
+    return MZ_FALSE;
+  return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, pUser_read_buf, user_read_buf_size);
+}
+
+mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags)
+{
+  return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, NULL, 0);
+}
+
+mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags)
+{
+  return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, buf_size, flags, NULL, 0);
+}
+
+void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags)
+{
+  mz_uint64 comp_size, uncomp_size, alloc_size;
+  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
+  void *pBuf;
+
+  if (pSize)
+    *pSize = 0;
+  if (!p)
+    return NULL;
+
+  comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
+  uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
+
+  alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size;
+#ifdef _MSC_VER
+  if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF))
+#else
+  if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF))
+#endif
+    return NULL;
+  if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size)))
+    return NULL;
+
+  if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size, flags))
+  {
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+    return NULL;
+  }
+
+  if (pSize) *pSize = (size_t)alloc_size;
+  return pBuf;
+}
+
+void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags)
+{
+  int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags);
+  if (file_index < 0)
+  {
+    if (pSize) *pSize = 0;
+    return MZ_FALSE;
+  }
+  return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags);
+}
+
+mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags)
+{
+  int status = TINFL_STATUS_DONE; mz_uint file_crc32 = MZ_CRC32_INIT;
+  mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, out_buf_ofs = 0, cur_file_ofs;
+  mz_zip_archive_file_stat file_stat;
+  void *pRead_buf = NULL; void *pWrite_buf = NULL;
+  mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
+
+  if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
+    return MZ_FALSE;
+
+  // Empty file, or a directory (but not always a directory - I've seen odd zips with directories that have compressed data which inflates to 0 bytes)
+  if (!file_stat.m_comp_size)
+    return MZ_TRUE;
+
+  // Entry is a subdirectory (I've seen old zips with dir entries which have compressed deflate data which inflates to 0 bytes, but these entries claim to uncompress to 512 bytes in the headers).
+  // I'm torn how to handle this case - should it fail instead?
+  if (mz_zip_reader_is_file_a_directory(pZip, file_index))
+    return MZ_TRUE;
+
+  // Encryption and patch files are not supported.
+  if (file_stat.m_bit_flag & (1 | 32))
+    return MZ_FALSE;
+
+  // This function only supports stored and deflate.
+  if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED))
+    return MZ_FALSE;
+
+  // Read and parse the local directory entry.
+  cur_file_ofs = file_stat.m_local_header_ofs;
+  if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
+    return MZ_FALSE;
+  if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
+    return MZ_FALSE;
+
+  cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
+  if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size)
+    return MZ_FALSE;
+
+  // Decompress the file either directly from memory or from a file input buffer.
+  if (pZip->m_pState->m_pMem)
+  {
+    pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs;
+    read_buf_size = read_buf_avail = file_stat.m_comp_size;
+    comp_remaining = 0;
+  }
+  else
+  {
+    read_buf_size = MZ_MIN(file_stat.m_comp_size, MZ_ZIP_MAX_IO_BUF_SIZE);
+    if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size)))
+      return MZ_FALSE;
+    read_buf_avail = 0;
+    comp_remaining = file_stat.m_comp_size;
+  }
+
+  if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method))
+  {
+    // The file is stored or the caller has requested the compressed data.
+    if (pZip->m_pState->m_pMem)
+    {
+#ifdef _MSC_VER
+      if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > 0xFFFFFFFF))
+#else
+      if (((sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > 0xFFFFFFFF))
+#endif
+        return MZ_FALSE;
+      if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)file_stat.m_comp_size) != file_stat.m_comp_size)
+        status = TINFL_STATUS_FAILED;
+      else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
+        file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)file_stat.m_comp_size);
+      cur_file_ofs += file_stat.m_comp_size;
+      out_buf_ofs += file_stat.m_comp_size;
+      comp_remaining = 0;
+    }
+    else
+    {
+      while (comp_remaining)
+      {
+        read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
+        if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail)
+        {
+          status = TINFL_STATUS_FAILED;
+          break;
+        }
+
+        if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
+          file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail);
+
+        if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail)
+        {
+          status = TINFL_STATUS_FAILED;
+          break;
+        }
+        cur_file_ofs += read_buf_avail;
+        out_buf_ofs += read_buf_avail;
+        comp_remaining -= read_buf_avail;
+      }
+    }
+  }
+  else
+  {
+    tinfl_decompressor inflator;
+    tinfl_init(&inflator);
+
+    if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE)))
+      status = TINFL_STATUS_FAILED;
+    else
+    {
+      do
+      {
+        mz_uint8 *pWrite_buf_cur = (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
+        size_t in_buf_size, out_buf_size = TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
+        if ((!read_buf_avail) && (!pZip->m_pState->m_pMem))
+        {
+          read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
+          if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail)
+          {
+            status = TINFL_STATUS_FAILED;
+            break;
+          }
+          cur_file_ofs += read_buf_avail;
+          comp_remaining -= read_buf_avail;
+          read_buf_ofs = 0;
+        }
+
+        in_buf_size = (size_t)read_buf_avail;
+        status = tinfl_decompress(&inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size, comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0);
+        read_buf_avail -= in_buf_size;
+        read_buf_ofs += in_buf_size;
+
+        if (out_buf_size)
+        {
+          if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) != out_buf_size)
+          {
+            status = TINFL_STATUS_FAILED;
+            break;
+          }
+          file_crc32 = (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size);
+          if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size)
+          {
+            status = TINFL_STATUS_FAILED;
+            break;
+          }
+        }
+      } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) || (status == TINFL_STATUS_HAS_MORE_OUTPUT));
+    }
+  }
+
+  if ((status == TINFL_STATUS_DONE) && (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)))
+  {
+    // Make sure the entire file was decompressed, and check its CRC.
+    if ((out_buf_ofs != file_stat.m_uncomp_size) || (file_crc32 != file_stat.m_crc32))
+      status = TINFL_STATUS_FAILED;
+  }
+
+  if (!pZip->m_pState->m_pMem)
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+  if (pWrite_buf)
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf);
+
+  return status == TINFL_STATUS_DONE;
+}
+
+mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags)
+{
+  int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags);
+  if (file_index < 0)
+    return MZ_FALSE;
+  return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, flags);
+}
+
+#ifndef MINIZ_NO_STDIO
+static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs, const void *pBuf, size_t n)
+{
+  (void)ofs; return MZ_FWRITE(pBuf, 1, n, (MZ_FILE*)pOpaque);
+}
+
+mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags)
+{
+  mz_bool status;
+  mz_zip_archive_file_stat file_stat;
+  MZ_FILE *pFile;
+  if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
+    return MZ_FALSE;
+  pFile = MZ_FOPEN(pDst_filename, "wb");
+  if (!pFile)
+    return MZ_FALSE;
+  status = mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile, flags);
+  if (MZ_FCLOSE(pFile) == EOF)
+    return MZ_FALSE;
+#ifndef MINIZ_NO_TIME
+  if (status)
+    mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time);
+#endif
+  return status;
+}
+#endif // #ifndef MINIZ_NO_STDIO
+
+mz_bool mz_zip_reader_end(mz_zip_archive *pZip)
+{
+  if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
+    return MZ_FALSE;
+
+  if (pZip->m_pState)
+  {
+    mz_zip_internal_state *pState = pZip->m_pState; pZip->m_pState = NULL;
+    mz_zip_array_clear(pZip, &pState->m_central_dir);
+    mz_zip_array_clear(pZip, &pState->m_central_dir_offsets);
+    mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets);
+
+#ifndef MINIZ_NO_STDIO
+    if (pState->m_pFile)
+    {
+      MZ_FCLOSE(pState->m_pFile);
+      pState->m_pFile = NULL;
+    }
+#endif // #ifndef MINIZ_NO_STDIO
+
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
+  }
+  pZip->m_zip_mode = MZ_ZIP_MODE_INVALID;
+
+  return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_STDIO
+mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags)
+{
+  int file_index = mz_zip_reader_locate_file(pZip, pArchive_filename, NULL, flags);
+  if (file_index < 0)
+    return MZ_FALSE;
+  return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags);
+}
+#endif
+
+// ------------------- .ZIP archive writing
+
+#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+
+static void mz_write_le16(mz_uint8 *p, mz_uint16 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); }
+static void mz_write_le32(mz_uint8 *p, mz_uint32 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); p[2] = (mz_uint8)(v >> 16); p[3] = (mz_uint8)(v >> 24); }
+#define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v))
+#define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v))
+
+mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size)
+{
+  if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID))
+    return MZ_FALSE;
+
+  if (pZip->m_file_offset_alignment)
+  {
+    // Ensure user specified file offset alignment is a power of 2.
+    if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1))
+      return MZ_FALSE;
+  }
+
+  if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func;
+  if (!pZip->m_pFree) pZip->m_pFree = def_free_func;
+  if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func;
+
+  pZip->m_zip_mode = MZ_ZIP_MODE_WRITING;
+  pZip->m_archive_size = existing_size;
+  pZip->m_central_directory_file_ofs = 0;
+  pZip->m_total_files = 0;
+
+  if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state))))
+    return MZ_FALSE;
+  memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state));
+  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8));
+  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32));
+  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32));
+  return MZ_TRUE;
+}
+
+static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n)
+{
+  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
+  mz_zip_internal_state *pState = pZip->m_pState;
+  mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size);
+#ifdef _MSC_VER
+  if ((!n) || ((0, sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF)))
+#else
+  if ((!n) || ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF)))
+#endif
+    return 0;
+  if (new_size > pState->m_mem_capacity)
+  {
+    void *pNew_block;
+    size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity); while (new_capacity < new_size) new_capacity *= 2;
+    if (NULL == (pNew_block = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity)))
+      return 0;
+    pState->m_pMem = pNew_block; pState->m_mem_capacity = new_capacity;
+  }
+  memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n);
+  pState->m_mem_size = (size_t)new_size;
+  return n;
+}
+
+mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size)
+{
+  pZip->m_pWrite = mz_zip_heap_write_func;
+  pZip->m_pIO_opaque = pZip;
+  if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning))
+    return MZ_FALSE;
+  if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size, size_to_reserve_at_beginning)))
+  {
+    if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, initial_allocation_size)))
+    {
+      mz_zip_writer_end(pZip);
+      return MZ_FALSE;
+    }
+    pZip->m_pState->m_mem_capacity = initial_allocation_size;
+  }
+  return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_STDIO
+static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n)
+{
+  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
+  mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile);
+  if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET))))
+    return 0;
+  return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile);
+}
+
+mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning)
+{
+  MZ_FILE *pFile;
+  pZip->m_pWrite = mz_zip_file_write_func;
+  pZip->m_pIO_opaque = pZip;
+  if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning))
+    return MZ_FALSE;
+  if (NULL == (pFile = MZ_FOPEN(pFilename, "wb")))
+  {
+    mz_zip_writer_end(pZip);
+    return MZ_FALSE;
+  }
+  pZip->m_pState->m_pFile = pFile;
+  if (size_to_reserve_at_beginning)
+  {
+    mz_uint64 cur_ofs = 0; char buf[4096]; MZ_CLEAR_OBJ(buf);
+    do
+    {
+      size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning);
+      if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n)
+      {
+        mz_zip_writer_end(pZip);
+        return MZ_FALSE;
+      }
+      cur_ofs += n; size_to_reserve_at_beginning -= n;
+    } while (size_to_reserve_at_beginning);
+  }
+  return MZ_TRUE;
+}
+#endif // #ifndef MINIZ_NO_STDIO
+
+mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename)
+{
+  mz_zip_internal_state *pState;
+  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
+    return MZ_FALSE;
+  // No sense in trying to write to an archive that's already at the support max size
+  if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  pState = pZip->m_pState;
+
+  if (pState->m_pFile)
+  {
+#ifdef MINIZ_NO_STDIO
+    pFilename; return MZ_FALSE;
+#else
+    // Archive is being read from stdio - try to reopen as writable.
+    if (pZip->m_pIO_opaque != pZip)
+      return MZ_FALSE;
+    if (!pFilename)
+      return MZ_FALSE;
+    pZip->m_pWrite = mz_zip_file_write_func;
+    if (NULL == (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile)))
+    {
+      // The mz_zip_archive is now in a bogus state because pState->m_pFile is NULL, so just close it.
+      mz_zip_reader_end(pZip);
+      return MZ_FALSE;
+    }
+#endif // #ifdef MINIZ_NO_STDIO
+  }
+  else if (pState->m_pMem)
+  {
+    // Archive lives in a memory block. Assume it's from the heap that we can resize using the realloc callback.
+    if (pZip->m_pIO_opaque != pZip)
+      return MZ_FALSE;
+    pState->m_mem_capacity = pState->m_mem_size;
+    pZip->m_pWrite = mz_zip_heap_write_func;
+  }
+  // Archive is being read via a user provided read function - make sure the user has specified a write function too.
+  else if (!pZip->m_pWrite)
+    return MZ_FALSE;
+
+  // Start writing new files at the archive's current central directory location.
+  pZip->m_archive_size = pZip->m_central_directory_file_ofs;
+  pZip->m_zip_mode = MZ_ZIP_MODE_WRITING;
+  pZip->m_central_directory_file_ofs = 0;
+
+  return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags)
+{
+  return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, level_and_flags, 0, 0);
+}
+
+typedef struct
+{
+  mz_zip_archive *m_pZip;
+  mz_uint64 m_cur_archive_file_ofs;
+  mz_uint64 m_comp_size;
+} mz_zip_writer_add_state;
+
+static mz_bool mz_zip_writer_add_put_buf_callback(const void* pBuf, int len, void *pUser)
+{
+  mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser;
+  if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, pState->m_cur_archive_file_ofs, pBuf, len) != len)
+    return MZ_FALSE;
+  pState->m_cur_archive_file_ofs += len;
+  pState->m_comp_size += len;
+  return MZ_TRUE;
+}
+
+static mz_bool mz_zip_writer_create_local_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date)
+{
+  (void)pZip;
+  memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, comp_size);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, uncomp_size);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size);
+  return MZ_TRUE;
+}
+
+static mz_bool mz_zip_writer_create_central_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes)
+{
+  (void)pZip;
+  memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, comp_size);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, uncomp_size);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_header_ofs);
+  return MZ_TRUE;
+}
+
+static mz_bool mz_zip_writer_add_to_central_dir(mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size, const void *pExtra, mz_uint16 extra_size, const void *pComment, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes)
+{
+  mz_zip_internal_state *pState = pZip->m_pState;
+  mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size;
+  size_t orig_central_dir_size = pState->m_central_dir.m_size;
+  mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE];
+
+  // No zip64 support yet
+  if ((local_header_ofs > 0xFFFFFFFF) || (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size + comment_size) > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  if (!mz_zip_writer_create_central_dir_header(pZip, central_dir_header, filename_size, extra_size, comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_header_ofs, ext_attributes))
+    return MZ_FALSE;
+
+  if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) ||
+      (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, filename_size)) ||
+      (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, extra_size)) ||
+      (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, comment_size)) ||
+      (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &central_dir_ofs, 1)))
+  {
+    // Try to push the central directory array back into its original state.
+    mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
+    return MZ_FALSE;
+  }
+
+  return MZ_TRUE;
+}
+
+static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name)
+{
+  // Basic ZIP archive filename validity checks: Valid filenames cannot start with a forward slash, cannot contain a drive letter, and cannot use DOS-style backward slashes.
+  if (*pArchive_name == '/')
+    return MZ_FALSE;
+  while (*pArchive_name)
+  {
+    if ((*pArchive_name == '\\') || (*pArchive_name == ':'))
+      return MZ_FALSE;
+    pArchive_name++;
+  }
+  return MZ_TRUE;
+}
+
+static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment(mz_zip_archive *pZip)
+{
+  mz_uint32 n;
+  if (!pZip->m_file_offset_alignment)
+    return 0;
+  n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1));
+  return (pZip->m_file_offset_alignment - n) & (pZip->m_file_offset_alignment - 1);
+}
+
+static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip, mz_uint64 cur_file_ofs, mz_uint32 n)
+{
+  char buf[4096];
+  memset(buf, 0, MZ_MIN(sizeof(buf), n));
+  while (n)
+  {
+    mz_uint32 s = MZ_MIN(sizeof(buf), n);
+    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s)
+      return MZ_FALSE;
+    cur_file_ofs += s; n -= s;
+  }
+  return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32)
+{
+  mz_uint16 method = 0, dos_time = 0, dos_date = 0;
+  mz_uint level, ext_attributes = 0, num_alignment_padding_bytes;
+  mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0;
+  size_t archive_name_size;
+  mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE];
+  tdefl_compressor *pComp = NULL;
+  mz_bool store_data_uncompressed;
+  mz_zip_internal_state *pState;
+
+  if ((int)level_and_flags < 0)
+    level_and_flags = MZ_DEFAULT_LEVEL;
+  level = level_and_flags & 0xF;
+  store_data_uncompressed = ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA));
+
+  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) || (!pArchive_name) || ((comment_size) && (!pComment)) || (pZip->m_total_files == 0xFFFF) || (level > MZ_UBER_COMPRESSION))
+    return MZ_FALSE;
+
+  pState = pZip->m_pState;
+
+  if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size))
+    return MZ_FALSE;
+  // No zip64 support yet
+  if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF))
+    return MZ_FALSE;
+  if (!mz_zip_writer_validate_archive_name(pArchive_name))
+    return MZ_FALSE;
+
+#ifndef MINIZ_NO_TIME
+  {
+    time_t cur_time; time(&cur_time);
+    mz_zip_time_to_dos_time(cur_time, &dos_time, &dos_date);
+  }
+#endif // #ifndef MINIZ_NO_TIME
+
+  archive_name_size = strlen(pArchive_name);
+  if (archive_name_size > 0xFFFF)
+    return MZ_FALSE;
+
+  num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
+
+  // no zip64 support yet
+  if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + comment_size + archive_name_size) > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/'))
+  {
+    // Set DOS Subdirectory attribute bit.
+    ext_attributes |= 0x10;
+    // Subdirectories cannot contain data.
+    if ((buf_size) || (uncomp_size))
+      return MZ_FALSE;
+  }
+
+  // Try to do any allocations before writing to the archive, so if an allocation fails the file remains unmodified. (A good idea if we're doing an in-place modification.)
+  if ((!mz_zip_array_ensure_room(pZip, &pState->m_central_dir, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size)) || (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1)))
+    return MZ_FALSE;
+
+  if ((!store_data_uncompressed) && (buf_size))
+  {
+    if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor))))
+      return MZ_FALSE;
+  }
+
+  if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes + sizeof(local_dir_header)))
+  {
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+    return MZ_FALSE;
+  }
+  local_dir_header_ofs += num_alignment_padding_bytes;
+  if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); }
+  cur_archive_file_ofs += num_alignment_padding_bytes + sizeof(local_dir_header);
+
+  MZ_CLEAR_OBJ(local_dir_header);
+  if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size)
+  {
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+    return MZ_FALSE;
+  }
+  cur_archive_file_ofs += archive_name_size;
+
+  if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
+  {
+    uncomp_crc32 = (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8*)pBuf, buf_size);
+    uncomp_size = buf_size;
+    if (uncomp_size <= 3)
+    {
+      level = 0;
+      store_data_uncompressed = MZ_TRUE;
+    }
+  }
+
+  if (store_data_uncompressed)
+  {
+    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, buf_size) != buf_size)
+    {
+      pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+      return MZ_FALSE;
+    }
+
+    cur_archive_file_ofs += buf_size;
+    comp_size = buf_size;
+
+    if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)
+      method = MZ_DEFLATED;
+  }
+  else if (buf_size)
+  {
+    mz_zip_writer_add_state state;
+
+    state.m_pZip = pZip;
+    state.m_cur_archive_file_ofs = cur_archive_file_ofs;
+    state.m_comp_size = 0;
+
+    if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) ||
+        (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != TDEFL_STATUS_DONE))
+    {
+      pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+      return MZ_FALSE;
+    }
+
+    comp_size = state.m_comp_size;
+    cur_archive_file_ofs = state.m_cur_archive_file_ofs;
+
+    method = MZ_DEFLATED;
+  }
+
+  pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+  pComp = NULL;
+
+  // no zip64 support yet
+  if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date))
+    return MZ_FALSE;
+
+  if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header))
+    return MZ_FALSE;
+
+  if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date, local_dir_header_ofs, ext_attributes))
+    return MZ_FALSE;
+
+  pZip->m_total_files++;
+  pZip->m_archive_size = cur_archive_file_ofs;
+
+  return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_STDIO
+mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags)
+{
+  mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes;
+  mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0;
+  mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = 0, comp_size = 0;
+  size_t archive_name_size;
+  mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE];
+  MZ_FILE *pSrc_file = NULL;
+
+  if ((int)level_and_flags < 0)
+    level_and_flags = MZ_DEFAULT_LEVEL;
+  level = level_and_flags & 0xF;
+
+  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) || ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION))
+    return MZ_FALSE;
+  if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)
+    return MZ_FALSE;
+  if (!mz_zip_writer_validate_archive_name(pArchive_name))
+    return MZ_FALSE;
+
+  archive_name_size = strlen(pArchive_name);
+  if (archive_name_size > 0xFFFF)
+    return MZ_FALSE;
+
+  num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
+
+  // no zip64 support yet
+  if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + comment_size + archive_name_size) > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  if (!mz_zip_get_file_modified_time(pSrc_filename, &dos_time, &dos_date))
+    return MZ_FALSE;
+    
+  pSrc_file = MZ_FOPEN(pSrc_filename, "rb");
+  if (!pSrc_file)
+    return MZ_FALSE;
+  MZ_FSEEK64(pSrc_file, 0, SEEK_END);
+  uncomp_size = MZ_FTELL64(pSrc_file);
+  MZ_FSEEK64(pSrc_file, 0, SEEK_SET);
+
+  if (uncomp_size > 0xFFFFFFFF)
+  {
+    // No zip64 support yet
+    MZ_FCLOSE(pSrc_file);
+    return MZ_FALSE;
+  }
+  if (uncomp_size <= 3)
+    level = 0;
+
+  if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes + sizeof(local_dir_header)))
+  {
+    MZ_FCLOSE(pSrc_file);
+    return MZ_FALSE;
+  }
+  local_dir_header_ofs += num_alignment_padding_bytes;
+  if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); }
+  cur_archive_file_ofs += num_alignment_padding_bytes + sizeof(local_dir_header);
+
+  MZ_CLEAR_OBJ(local_dir_header);
+  if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size)
+  {
+    MZ_FCLOSE(pSrc_file);
+    return MZ_FALSE;
+  }
+  cur_archive_file_ofs += archive_name_size;
+
+  if (uncomp_size)
+  {
+    mz_uint64 uncomp_remaining = uncomp_size;
+    void *pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE);
+    if (!pRead_buf)
+    {
+      MZ_FCLOSE(pSrc_file);
+      return MZ_FALSE;
+    }
+
+    if (!level)
+    {
+      while (uncomp_remaining)
+      {
+        mz_uint n = (mz_uint)MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining);
+        if ((MZ_FREAD(pRead_buf, 1, n, pSrc_file) != n) || (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, n) != n))
+        {
+          pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+          MZ_FCLOSE(pSrc_file);
+          return MZ_FALSE;
+        }
+        uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n);
+        uncomp_remaining -= n;
+        cur_archive_file_ofs += n;
+      }
+      comp_size = uncomp_size;
+    }
+    else
+    {
+      mz_bool result = MZ_FALSE;
+      mz_zip_writer_add_state state;
+      tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor));
+      if (!pComp)
+      {
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+        MZ_FCLOSE(pSrc_file);
+        return MZ_FALSE;
+      }
+
+      state.m_pZip = pZip;
+      state.m_cur_archive_file_ofs = cur_archive_file_ofs;
+      state.m_comp_size = 0;
+
+      if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY)
+      {
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+        MZ_FCLOSE(pSrc_file);
+        return MZ_FALSE;
+      }
+
+      for ( ; ; )
+      {
+        size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining, MZ_ZIP_MAX_IO_BUF_SIZE);
+        tdefl_status status;
+
+        if (MZ_FREAD(pRead_buf, 1, in_buf_size, pSrc_file) != in_buf_size)
+          break;
+
+        uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, in_buf_size);
+        uncomp_remaining -= in_buf_size;
+
+        status = tdefl_compress_buffer(pComp, pRead_buf, in_buf_size, uncomp_remaining ? TDEFL_NO_FLUSH : TDEFL_FINISH);
+        if (status == TDEFL_STATUS_DONE)
+        {
+          result = MZ_TRUE;
+          break;
+        }
+        else if (status != TDEFL_STATUS_OKAY)
+          break;
+      }
+
+      pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+
+      if (!result)
+      {
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+        MZ_FCLOSE(pSrc_file);
+        return MZ_FALSE;
+      }
+
+      comp_size = state.m_comp_size;
+      cur_archive_file_ofs = state.m_cur_archive_file_ofs;
+
+      method = MZ_DEFLATED;
+    }
+
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+  }
+
+  MZ_FCLOSE(pSrc_file); pSrc_file = NULL;
+
+  // no zip64 support yet
+  if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date))
+    return MZ_FALSE;
+
+  if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header))
+    return MZ_FALSE;
+
+  if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date, local_dir_header_ofs, ext_attributes))
+    return MZ_FALSE;
+
+  pZip->m_total_files++;
+  pZip->m_archive_size = cur_archive_file_ofs;
+
+  return MZ_TRUE;
+}
+#endif // #ifndef MINIZ_NO_STDIO
+
+mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint file_index)
+{
+  mz_uint n, bit_flags, num_alignment_padding_bytes;
+  mz_uint64 comp_bytes_remaining, local_dir_header_ofs;
+  mz_uint64 cur_src_file_ofs, cur_dst_file_ofs;
+  mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
+  mz_uint8 central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE];
+  size_t orig_central_dir_size;
+  mz_zip_internal_state *pState;
+  void *pBuf; const mz_uint8 *pSrc_central_header;
+
+  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING))
+    return MZ_FALSE;
+  if (NULL == (pSrc_central_header = mz_zip_reader_get_cdh(pSource_zip, file_index)))
+    return MZ_FALSE;
+  pState = pZip->m_pState;
+
+  num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
+
+  // no zip64 support yet
+  if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  cur_src_file_ofs = MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS);
+  cur_dst_file_ofs = pZip->m_archive_size;
+
+  if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
+    return MZ_FALSE;
+  if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
+    return MZ_FALSE;
+  cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE;
+
+  if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, num_alignment_padding_bytes))
+    return MZ_FALSE;
+  cur_dst_file_ofs += num_alignment_padding_bytes;
+  local_dir_header_ofs = cur_dst_file_ofs;
+  if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); }
+
+  if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
+    return MZ_FALSE;
+  cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE;
+
+  n = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
+  comp_bytes_remaining = n + MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
+
+  if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)MZ_MAX(sizeof(mz_uint32) * 4, MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining)))))
+    return MZ_FALSE;
+
+  while (comp_bytes_remaining)
+  {
+    n = (mz_uint)MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining);
+    if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, n) != n)
+    {
+      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+      return MZ_FALSE;
+    }
+    cur_src_file_ofs += n;
+
+    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n)
+    {
+      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+      return MZ_FALSE;
+    }
+    cur_dst_file_ofs += n;
+
+    comp_bytes_remaining -= n;
+  }
+
+  bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS);
+  if (bit_flags & 8)
+  {
+    // Copy data descriptor
+    if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4)
+    {
+      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+      return MZ_FALSE;
+    }
+
+    n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == 0x08074b50) ? 4 : 3);
+    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n)
+    {
+      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+      return MZ_FALSE;
+    }
+
+    cur_src_file_ofs += n;
+    cur_dst_file_ofs += n;
+  }
+  pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+
+  // no zip64 support yet
+  if (cur_dst_file_ofs > 0xFFFFFFFF)
+    return MZ_FALSE;
+
+  orig_central_dir_size = pState->m_central_dir.m_size;
+
+  memcpy(central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE);
+  MZ_WRITE_LE32(central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_dir_header_ofs);
+  if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE))
+    return MZ_FALSE;
+
+  n = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS);
+  if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n))
+  {
+    mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
+    return MZ_FALSE;
+  }
+
+  if (pState->m_central_dir.m_size > 0xFFFFFFFF)
+    return MZ_FALSE;
+  n = (mz_uint32)orig_central_dir_size;
+  if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1))
+  {
+    mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
+    return MZ_FALSE;
+  }
+
+  pZip->m_total_files++;
+  pZip->m_archive_size = cur_dst_file_ofs;
+
+  return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip)
+{
+  mz_zip_internal_state *pState;
+  mz_uint64 central_dir_ofs, central_dir_size;
+  mz_uint8 hdr[MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE];
+
+  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING))
+    return MZ_FALSE;
+
+  pState = pZip->m_pState;
+
+  // no zip64 support yet
+  if ((pZip->m_total_files > 0xFFFF) || ((pZip->m_archive_size + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  central_dir_ofs = 0;
+  central_dir_size = 0;
+  if (pZip->m_total_files)
+  {
+    // Write central directory
+    central_dir_ofs = pZip->m_archive_size;
+    central_dir_size = pState->m_central_dir.m_size;
+    pZip->m_central_directory_file_ofs = central_dir_ofs;
+    if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, pState->m_central_dir.m_p, (size_t)central_dir_size) != central_dir_size)
+      return MZ_FALSE;
+    pZip->m_archive_size += central_dir_size;
+  }
+
+  // Write end of central directory record
+  MZ_CLEAR_OBJ(hdr);
+  MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG);
+  MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, pZip->m_total_files);
+  MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files);
+  MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, central_dir_size);
+  MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, central_dir_ofs);
+
+  if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, sizeof(hdr)) != sizeof(hdr))
+    return MZ_FALSE;
+#ifndef MINIZ_NO_STDIO
+  if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF))
+    return MZ_FALSE;
+#endif // #ifndef MINIZ_NO_STDIO
+
+  pZip->m_archive_size += sizeof(hdr);
+
+  pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED;
+  return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, size_t *pSize)
+{
+  if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pSize))
+    return MZ_FALSE;
+  if (pZip->m_pWrite != mz_zip_heap_write_func)
+    return MZ_FALSE;
+  if (!mz_zip_writer_finalize_archive(pZip))
+    return MZ_FALSE;
+
+  *pBuf = pZip->m_pState->m_pMem;
+  *pSize = pZip->m_pState->m_mem_size;
+  pZip->m_pState->m_pMem = NULL;
+  pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0;
+  return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_end(mz_zip_archive *pZip)
+{
+  mz_zip_internal_state *pState;
+  mz_bool status = MZ_TRUE;
+  if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) && (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED)))
+    return MZ_FALSE;
+
+  pState = pZip->m_pState;
+  pZip->m_pState = NULL;
+  mz_zip_array_clear(pZip, &pState->m_central_dir);
+  mz_zip_array_clear(pZip, &pState->m_central_dir_offsets);
+  mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets);
+
+#ifndef MINIZ_NO_STDIO
+  if (pState->m_pFile)
+  {
+    MZ_FCLOSE(pState->m_pFile);
+    pState->m_pFile = NULL;
+  }
+#endif // #ifndef MINIZ_NO_STDIO
+
+  if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem))
+  {
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem);
+    pState->m_pMem = NULL;
+  }
+
+  pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
+  pZip->m_zip_mode = MZ_ZIP_MODE_INVALID;
+  return status;
+}
+
+#ifndef MINIZ_NO_STDIO
+mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags)
+{
+  mz_bool status, created_new_archive = MZ_FALSE;
+  mz_zip_archive zip_archive;
+  struct MZ_FILE_STAT_STRUCT file_stat;
+  MZ_CLEAR_OBJ(zip_archive);
+  if ((int)level_and_flags < 0)
+     level_and_flags = MZ_DEFAULT_LEVEL;
+  if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) || ((comment_size) && (!pComment)) || ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION))
+    return MZ_FALSE;
+  if (!mz_zip_writer_validate_archive_name(pArchive_name))
+    return MZ_FALSE;
+  if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0)
+  {
+    // Create a new archive.
+    if (!mz_zip_writer_init_file(&zip_archive, pZip_filename, 0))
+      return MZ_FALSE;
+    created_new_archive = MZ_TRUE;
+  }
+  else
+  {
+    // Append to an existing archive.
+    if (!mz_zip_reader_init_file(&zip_archive, pZip_filename, level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY))
+      return MZ_FALSE;
+    if (!mz_zip_writer_init_from_reader(&zip_archive, pZip_filename))
+    {
+      mz_zip_reader_end(&zip_archive);
+      return MZ_FALSE;
+    }
+  }
+  status = mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, 0, 0);
+  // Always finalize, even if adding failed for some reason, so we have a valid central directory. (This may not always succeed, but we can try.)
+  if (!mz_zip_writer_finalize_archive(&zip_archive))
+    status = MZ_FALSE;
+  if (!mz_zip_writer_end(&zip_archive))
+    status = MZ_FALSE;
+  if ((!status) && (created_new_archive))
+  {
+    // It's a new archive and something went wrong, so just delete it.
+    int ignoredStatus = MZ_DELETE_FILE(pZip_filename);
+    (void)ignoredStatus;
+  }
+  return status;
+}
+
+void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags)
+{
+  int file_index;
+  mz_zip_archive zip_archive;
+  void *p = NULL;
+
+  if (pSize)
+    *pSize = 0;
+
+  if ((!pZip_filename) || (!pArchive_name))
+    return NULL;
+
+  MZ_CLEAR_OBJ(zip_archive);
+  if (!mz_zip_reader_init_file(&zip_archive, pZip_filename, flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY))
+    return NULL;
+
+  if ((file_index = mz_zip_reader_locate_file(&zip_archive, pArchive_name, NULL, flags)) >= 0)
+    p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags);
+
+  mz_zip_reader_end(&zip_archive);
+  return p;
+}
+
+#endif // #ifndef MINIZ_NO_STDIO
+
+#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+
+#endif // #ifndef MINIZ_NO_ARCHIVE_APIS
+
+#ifdef __cplusplus
+}
+#endif
+
+/*
+  This is free and unencumbered software released into the public domain.
+
+  Anyone is free to copy, modify, publish, use, compile, sell, or
+  distribute this software, either in source code form or as a compiled
+  binary, for any purpose, commercial or non-commercial, and by any
+  means.
+
+  In jurisdictions that recognize copyright laws, the author or authors
+  of this software dedicate any and all copyright interest in the
+  software to the public domain. We make this dedication for the benefit
+  of the public at large and to the detriment of our heirs and
+  successors. We intend this dedication to be an overt act of
+  relinquishment in perpetuity of all present and future rights to this
+  software under copyright law.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  OTHER DEALINGS IN THE SOFTWARE.
+
+  For more information, please refer to <http://unlicense.org/>
+*/
diff --git a/Utilities/miniz.h b/Utilities/miniz.h
new file mode 100644
index 0000000..a7e550f
--- /dev/null
+++ b/Utilities/miniz.h
@@ -0,0 +1,930 @@
+/* miniz.c v1.15 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing
+   See "unlicense" statement at the end of this file.
+   Rich Geldreich <richgel99@gmail.com>, last updated Oct. 13, 2013
+   Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt
+
+   Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define
+   MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros).
+
+   * Change History
+     10/13/13 v1.15 r4 - Interim bugfix release while I work on the next major release with Zip64 support (almost there!):
+       - Critical fix for the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY bug (thanks kahmyong.moon@hp.com) which could cause locate files to not find files. This bug
+        would only have occured in earlier versions if you explicitly used this flag, OR if you used mz_zip_extract_archive_file_to_heap() or mz_zip_add_mem_to_archive_file_in_place()
+        (which used this flag). If you can't switch to v1.15 but want to fix this bug, just remove the uses of this flag from both helper funcs (and of course don't use the flag).
+       - Bugfix in mz_zip_reader_extract_to_mem_no_alloc() from kymoon when pUser_read_buf is not NULL and compressed size is > uncompressed size
+       - Fixing mz_zip_reader_extract_*() funcs so they don't try to extract compressed data from directory entries, to account for weird zipfiles which contain zero-size compressed data on dir entries.
+         Hopefully this fix won't cause any issues on weird zip archives, because it assumes the low 16-bits of zip external attributes are DOS attributes (which I believe they always are in practice).
+       - Fixing mz_zip_reader_is_file_a_directory() so it doesn't check the internal attributes, just the filename and external attributes
+       - mz_zip_reader_init_file() - missing MZ_FCLOSE() call if the seek failed
+       - Added cmake support for Linux builds which builds all the examples, tested with clang v3.3 and gcc v4.6.
+       - Clang fix for tdefl_write_image_to_png_file_in_memory() from toffaletti
+       - Merged MZ_FORCEINLINE fix from hdeanclark
+       - Fix <time.h> include before config #ifdef, thanks emil.brink
+       - Added tdefl_write_image_to_png_file_in_memory_ex(): supports Y flipping (super useful for OpenGL apps), and explicit control over the compression level (so you can
+        set it to 1 for real-time compression).
+       - Merged in some compiler fixes from paulharris's github repro.
+       - Retested this build under Windows (VS 2010, including static analysis), tcc  0.9.26, gcc v4.6 and clang v3.3.
+       - Added example6.c, which dumps an image of the mandelbrot set to a PNG file.
+       - Modified example2 to help test the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY flag more.
+       - In r3: Bugfix to mz_zip_writer_add_file() found during merge: Fix possible src file fclose() leak if alignment bytes+local header file write faiiled
+       - In r4: Minor bugfix to mz_zip_writer_add_from_zip_reader(): Was pushing the wrong central dir header offset, appears harmless in this release, but it became a problem in the zip64 branch
+     5/20/12 v1.14 - MinGW32/64 GCC 4.6.1 compiler fixes: added MZ_FORCEINLINE, #include <time.h> (thanks fermtect).
+     5/19/12 v1.13 - From jason@cornsyrup.org and kelwert@mtu.edu - Fix mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bit.
+       - Temporarily/locally slammed in "typedef unsigned long mz_ulong" and re-ran a randomized regression test on ~500k files.
+       - Eliminated a bunch of warnings when compiling with GCC 32-bit/64.
+       - Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze (static analysis) option and fixed all warnings (except for the silly
+        "Use of the comma-operator in a tested expression.." analysis warning, which I purposely use to work around a MSVC compiler warning).
+       - Created 32-bit and 64-bit Codeblocks projects/workspace. Built and tested Linux executables. The codeblocks workspace is compatible with Linux+Win32/x64.
+       - Added miniz_tester solution/project, which is a useful little app derived from LZHAM's tester app that I use as part of the regression test.
+       - Ran miniz.c and tinfl.c through another series of regression testing on ~500,000 files and archives.
+       - Modified example5.c so it purposely disables a bunch of high-level functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the MINIZ_NO_STDIO bug report.)
+       - Fix ftell() usage in examples so they exit with an error on files which are too large (a limitation of the examples, not miniz itself).
+     4/12/12 v1.12 - More comments, added low-level example5.c, fixed a couple minor level_and_flags issues in the archive API's.
+      level_and_flags can now be set to MZ_DEFAULT_COMPRESSION. Thanks to Bruce Dawson <bruced@valvesoftware.com> for the feedback/bug report.
+     5/28/11 v1.11 - Added statement from unlicense.org
+     5/27/11 v1.10 - Substantial compressor optimizations:
+      - Level 1 is now ~4x faster than before. The L1 compressor's throughput now varies between 70-110MB/sec. on a
+      - Core i7 (actual throughput varies depending on the type of data, and x64 vs. x86).
+      - Improved baseline L2-L9 compression perf. Also, greatly improved compression perf. issues on some file types.
+      - Refactored the compression code for better readability and maintainability.
+      - Added level 10 compression level (L10 has slightly better ratio than level 9, but could have a potentially large
+       drop in throughput on some files).
+     5/15/11 v1.09 - Initial stable release.
+
+   * Low-level Deflate/Inflate implementation notes:
+
+     Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or
+     greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses
+     approximately as well as zlib.
+
+     Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function
+     coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory
+     block large enough to hold the entire file.
+
+     The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation.
+
+   * zlib-style API notes:
+
+     miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in
+     zlib replacement in many apps:
+        The z_stream struct, optional memory allocation callbacks
+        deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound
+        inflateInit/inflateInit2/inflate/inflateEnd
+        compress, compress2, compressBound, uncompress
+        CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines.
+        Supports raw deflate streams or standard zlib streams with adler-32 checking.
+
+     Limitations:
+      The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries.
+      I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but
+      there are no guarantees that miniz.c pulls this off perfectly.
+
+   * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by
+     Alex Evans. Supports 1-4 bytes/pixel images.
+
+   * ZIP archive API notes:
+
+     The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to
+     get the job done with minimal fuss. There are simple API's to retrieve file information, read files from
+     existing archives, create new archives, append new files to existing archives, or clone archive data from
+     one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h),
+     or you can specify custom file read/write callbacks.
+
+     - Archive reading: Just call this function to read a single file from a disk archive:
+
+      void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name,
+        size_t *pSize, mz_uint zip_flags);
+
+     For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central
+     directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files.
+
+     - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file:
+
+     int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags);
+
+     The locate operation can optionally check file comments too, which (as one example) can be used to identify
+     multiple versions of the same file in an archive. This function uses a simple linear search through the central
+     directory, so it's not very fast.
+
+     Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and
+     retrieve detailed info on each file by calling mz_zip_reader_file_stat().
+
+     - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data
+     to disk and builds an exact image of the central directory in memory. The central directory image is written
+     all at once at the end of the archive file when the archive is finalized.
+
+     The archive writer can optionally align each file's local header and file data to any power of 2 alignment,
+     which can be useful when the archive will be read from optical media. Also, the writer supports placing
+     arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still
+     readable by any ZIP tool.
+
+     - Archive appending: The simple way to add a single file to an archive is to call this function:
+
+      mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name,
+        const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
+
+     The archive will be created if it doesn't already exist, otherwise it'll be appended to.
+     Note the appending is done in-place and is not an atomic operation, so if something goes wrong
+     during the operation it's possible the archive could be left without a central directory (although the local
+     file headers and file data will be fine, so the archive will be recoverable).
+
+     For more complex archive modification scenarios:
+     1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to
+     preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the
+     compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and
+     you're done. This is safe but requires a bunch of temporary disk space or heap memory.
+
+     2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(),
+     append new files as needed, then finalize the archive which will write an updated central directory to the
+     original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a
+     possibility that the archive's central directory could be lost with this method if anything goes wrong, though.
+
+     - ZIP archive support limitations:
+     No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files.
+     Requires streams capable of seeking.
+
+   * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the
+     below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it.
+
+   * Important: For best perf. be sure to customize the below macros for your target platform:
+     #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
+     #define MINIZ_LITTLE_ENDIAN 1
+     #define MINIZ_HAS_64BIT_REGISTERS 1
+
+   * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz
+     uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files
+     (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes).
+*/
+
+#pragma once
+#include "stdafx.h"
+
+#ifndef MINIZ_HEADER_INCLUDED
+#define MINIZ_HEADER_INCLUDED
+
+#include <stdlib.h>
+
+// Defines to completely disable specific portions of miniz.c:
+// If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl.
+
+// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O.
+//#define MINIZ_NO_STDIO
+
+// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or
+// get/set file times, and the C run-time funcs that get/set times won't be called.
+// The current downside is the times written to your archives will be from 1979.
+//#define MINIZ_NO_TIME
+
+// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's.
+//#define MINIZ_NO_ARCHIVE_APIS
+
+// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive API's.
+//#define MINIZ_NO_ARCHIVE_WRITING_APIS
+
+// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's.
+//#define MINIZ_NO_ZLIB_APIS
+
+// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib.
+//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES
+
+// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc.
+// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc
+// callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user
+// functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work.
+//#define MINIZ_NO_MALLOC
+
+#if defined(__TINYC__) && (defined(__linux) || defined(__linux__))
+  // TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux
+  #define MINIZ_NO_TIME
+#endif
+
+#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS)
+  #include <time.h>
+#endif
+
+#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__)
+// MINIZ_X86_OR_X64_CPU is only used to help set the below macros.
+#define MINIZ_X86_OR_X64_CPU 1
+#endif
+
+#if (__BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU
+// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian.
+#define MINIZ_LITTLE_ENDIAN 1
+#endif
+
+#if MINIZ_X86_OR_X64_CPU
+// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses.
+#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
+#endif
+
+#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__)
+// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions).
+#define MINIZ_HAS_64BIT_REGISTERS 1
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// ------------------- zlib-style API Definitions.
+
+// For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits!
+typedef unsigned long mz_ulong;
+
+// mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap.
+void mz_free(void *p);
+
+#define MZ_ADLER32_INIT (1)
+// mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL.
+mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len);
+
+#define MZ_CRC32_INIT (0)
+// mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL.
+mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len);
+
+// Compression strategies.
+enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 };
+
+// Method
+#define MZ_DEFLATED 8
+
+#ifndef MINIZ_NO_ZLIB_APIS
+
+// Heap allocation callbacks.
+// Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long.
+typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size);
+typedef void (*mz_free_func)(void *opaque, void *address);
+typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size);
+
+#define MZ_VERSION          "9.1.15"
+#define MZ_VERNUM           0x91F0
+#define MZ_VER_MAJOR        9
+#define MZ_VER_MINOR        1
+#define MZ_VER_REVISION     15
+#define MZ_VER_SUBREVISION  0
+
+// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs).
+enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 };
+
+// Return status codes. MZ_PARAM_ERROR is non-standard.
+enum { MZ_OK = 0, MZ_STREAM_END = 1, MZ_NEED_DICT = 2, MZ_ERRNO = -1, MZ_STREAM_ERROR = -2, MZ_DATA_ERROR = -3, MZ_MEM_ERROR = -4, MZ_BUF_ERROR = -5, MZ_VERSION_ERROR = -6, MZ_PARAM_ERROR = -10000 };
+
+// Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL.
+enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 };
+
+// Window bits
+#define MZ_DEFAULT_WINDOW_BITS 15
+
+struct mz_internal_state;
+
+// Compression/decompression stream struct.
+typedef struct mz_stream_s
+{
+  const unsigned char *next_in;     // pointer to next byte to read
+  unsigned int avail_in;            // number of bytes available at next_in
+  mz_ulong total_in;                // total number of bytes consumed so far
+
+  unsigned char *next_out;          // pointer to next byte to write
+  unsigned int avail_out;           // number of bytes that can be written to next_out
+  mz_ulong total_out;               // total number of bytes produced so far
+
+  char *msg;                        // error msg (unused)
+  struct mz_internal_state *state;  // internal state, allocated by zalloc/zfree
+
+  mz_alloc_func zalloc;             // optional heap allocation function (defaults to malloc)
+  mz_free_func zfree;               // optional heap free function (defaults to free)
+  void *opaque;                     // heap alloc function user pointer
+
+  int data_type;                    // data_type (unused)
+  mz_ulong adler;                   // adler32 of the source or uncompressed data
+  mz_ulong reserved;                // not used
+} mz_stream;
+
+typedef mz_stream *mz_streamp;
+
+// Returns the version string of miniz.c.
+const char *mz_version(void);
+
+// mz_deflateInit() initializes a compressor with default options:
+// Parameters:
+//  pStream must point to an initialized mz_stream struct.
+//  level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION].
+//  level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio.
+//  (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.)
+// Return values:
+//  MZ_OK on success.
+//  MZ_STREAM_ERROR if the stream is bogus.
+//  MZ_PARAM_ERROR if the input parameters are bogus.
+//  MZ_MEM_ERROR on out of memory.
+int mz_deflateInit(mz_streamp pStream, int level);
+
+// mz_deflateInit2() is like mz_deflate(), except with more control:
+// Additional parameters:
+//   method must be MZ_DEFLATED
+//   window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer)
+//   mem_level must be between [1, 9] (it's checked but ignored by miniz.c)
+int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy);
+
+// Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2().
+int mz_deflateReset(mz_streamp pStream);
+
+// mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible.
+// Parameters:
+//   pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
+//   flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH.
+// Return values:
+//   MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full).
+//   MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore.
+//   MZ_STREAM_ERROR if the stream is bogus.
+//   MZ_PARAM_ERROR if one of the parameters is invalid.
+//   MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.)
+int mz_deflate(mz_streamp pStream, int flush);
+
+// mz_deflateEnd() deinitializes a compressor:
+// Return values:
+//  MZ_OK on success.
+//  MZ_STREAM_ERROR if the stream is bogus.
+int mz_deflateEnd(mz_streamp pStream);
+
+// mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH.
+mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len);
+
+// Single-call compression functions mz_compress() and mz_compress2():
+// Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure.
+int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
+int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level);
+
+// mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress().
+mz_ulong mz_compressBound(mz_ulong source_len);
+
+// Initializes a decompressor.
+int mz_inflateInit(mz_streamp pStream);
+
+// mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer:
+// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate).
+int mz_inflateInit2(mz_streamp pStream, int window_bits);
+
+// Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible.
+// Parameters:
+//   pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
+//   flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH.
+//   On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster).
+//   MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data.
+// Return values:
+//   MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full.
+//   MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified.
+//   MZ_STREAM_ERROR if the stream is bogus.
+//   MZ_DATA_ERROR if the deflate stream is invalid.
+//   MZ_PARAM_ERROR if one of the parameters is invalid.
+//   MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again
+//   with more input data, or with more room in the output buffer (except when using single call decompression, described above).
+int mz_inflate(mz_streamp pStream, int flush);
+
+// Deinitializes a decompressor.
+int mz_inflateEnd(mz_streamp pStream);
+
+// Single-call decompression.
+// Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure.
+int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
+
+// Returns a string description of the specified error code, or NULL if the error code is invalid.
+const char *mz_error(int err);
+
+// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports.
+// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project.
+#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
+  typedef unsigned char Byte;
+  typedef unsigned int uInt;
+  typedef mz_ulong uLong;
+  typedef Byte Bytef;
+  typedef uInt uIntf;
+  typedef char charf;
+  typedef int intf;
+  typedef void *voidpf;
+  typedef uLong uLongf;
+  typedef void *voidp;
+  typedef void *const voidpc;
+  #define Z_NULL                0
+  #define Z_NO_FLUSH            MZ_NO_FLUSH
+  #define Z_PARTIAL_FLUSH       MZ_PARTIAL_FLUSH
+  #define Z_SYNC_FLUSH          MZ_SYNC_FLUSH
+  #define Z_FULL_FLUSH          MZ_FULL_FLUSH
+  #define Z_FINISH              MZ_FINISH
+  #define Z_BLOCK               MZ_BLOCK
+  #define Z_OK                  MZ_OK
+  #define Z_STREAM_END          MZ_STREAM_END
+  #define Z_NEED_DICT           MZ_NEED_DICT
+  #define Z_ERRNO               MZ_ERRNO
+  #define Z_STREAM_ERROR        MZ_STREAM_ERROR
+  #define Z_DATA_ERROR          MZ_DATA_ERROR
+  #define Z_MEM_ERROR           MZ_MEM_ERROR
+  #define Z_BUF_ERROR           MZ_BUF_ERROR
+  #define Z_VERSION_ERROR       MZ_VERSION_ERROR
+  #define Z_PARAM_ERROR         MZ_PARAM_ERROR
+  #define Z_NO_COMPRESSION      MZ_NO_COMPRESSION
+  #define Z_BEST_SPEED          MZ_BEST_SPEED
+  #define Z_BEST_COMPRESSION    MZ_BEST_COMPRESSION
+  #define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION
+  #define Z_DEFAULT_STRATEGY    MZ_DEFAULT_STRATEGY
+  #define Z_FILTERED            MZ_FILTERED
+  #define Z_HUFFMAN_ONLY        MZ_HUFFMAN_ONLY
+  #define Z_RLE                 MZ_RLE
+  #define Z_FIXED               MZ_FIXED
+  #define Z_DEFLATED            MZ_DEFLATED
+  #define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS
+  #define alloc_func            mz_alloc_func
+  #define free_func             mz_free_func
+  #define internal_state        mz_internal_state
+  #define z_stream              mz_stream
+  #define deflateInit           mz_deflateInit
+  #define deflateInit2          mz_deflateInit2
+  #define deflateReset          mz_deflateReset
+  #define deflate               mz_deflate
+  #define deflateEnd            mz_deflateEnd
+  #define deflateBound          mz_deflateBound
+  #define compress              mz_compress
+  #define compress2             mz_compress2
+  #define compressBound         mz_compressBound
+  #define inflateInit           mz_inflateInit
+  #define inflateInit2          mz_inflateInit2
+  #define inflate               mz_inflate
+  #define inflateEnd            mz_inflateEnd
+  #define uncompress            mz_uncompress
+  #define crc32                 mz_crc32
+  #define adler32               mz_adler32
+  #define MAX_WBITS             15
+  #define MAX_MEM_LEVEL         9
+  #define zError                mz_error
+  #define ZLIB_VERSION          MZ_VERSION
+  #define ZLIB_VERNUM           MZ_VERNUM
+  #define ZLIB_VER_MAJOR        MZ_VER_MAJOR
+  #define ZLIB_VER_MINOR        MZ_VER_MINOR
+  #define ZLIB_VER_REVISION     MZ_VER_REVISION
+  #define ZLIB_VER_SUBREVISION  MZ_VER_SUBREVISION
+  #define zlibVersion           mz_version
+  #define zlib_version          mz_version()
+#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
+
+#endif // MINIZ_NO_ZLIB_APIS
+
+// ------------------- Types and macros
+
+typedef unsigned char mz_uint8;
+typedef signed short mz_int16;
+typedef unsigned short mz_uint16;
+typedef unsigned int mz_uint32;
+typedef unsigned int mz_uint;
+typedef long long mz_int64;
+typedef unsigned long long mz_uint64;
+typedef int mz_bool;
+
+#define MZ_FALSE (0)
+#define MZ_TRUE (1)
+
+// An attempt to work around MSVC's spammy "warning C4127: conditional expression is constant" message.
+#ifdef _MSC_VER
+   #define MZ_MACRO_END while (0, 0)
+#else
+   #define MZ_MACRO_END while (0)
+#endif
+
+// ------------------- ZIP archive reading/writing
+
+#ifndef MINIZ_NO_ARCHIVE_APIS
+
+enum
+{
+  MZ_ZIP_MAX_IO_BUF_SIZE = 64*1024,
+  MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260,
+  MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256
+};
+
+typedef struct
+{
+  mz_uint32 m_file_index;
+  mz_uint32 m_central_dir_ofs;
+  mz_uint16 m_version_made_by;
+  mz_uint16 m_version_needed;
+  mz_uint16 m_bit_flag;
+  mz_uint16 m_method;
+#ifndef MINIZ_NO_TIME
+  time_t m_time;
+#endif
+  mz_uint32 m_crc32;
+  mz_uint64 m_comp_size;
+  mz_uint64 m_uncomp_size;
+  mz_uint16 m_internal_attr;
+  mz_uint32 m_external_attr;
+  mz_uint64 m_local_header_ofs;
+  mz_uint32 m_comment_size;
+  char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE];
+  char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE];
+} mz_zip_archive_file_stat;
+
+typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n);
+typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n);
+
+struct mz_zip_internal_state_tag;
+typedef struct mz_zip_internal_state_tag mz_zip_internal_state;
+
+typedef enum
+{
+  MZ_ZIP_MODE_INVALID = 0,
+  MZ_ZIP_MODE_READING = 1,
+  MZ_ZIP_MODE_WRITING = 2,
+  MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3
+} mz_zip_mode;
+
+typedef struct mz_zip_archive_tag
+{
+  mz_uint64 m_archive_size;
+  mz_uint64 m_central_directory_file_ofs;
+  mz_uint m_total_files;
+  mz_zip_mode m_zip_mode;
+
+  mz_uint m_file_offset_alignment;
+
+  mz_alloc_func m_pAlloc;
+  mz_free_func m_pFree;
+  mz_realloc_func m_pRealloc;
+  void *m_pAlloc_opaque;
+
+  mz_file_read_func m_pRead;
+  mz_file_write_func m_pWrite;
+  void *m_pIO_opaque;
+
+  mz_zip_internal_state *m_pState;
+
+} mz_zip_archive;
+
+typedef enum
+{
+  MZ_ZIP_FLAG_CASE_SENSITIVE                = 0x0100,
+  MZ_ZIP_FLAG_IGNORE_PATH                   = 0x0200,
+  MZ_ZIP_FLAG_COMPRESSED_DATA               = 0x0400,
+  MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800
+} mz_zip_flags;
+
+// ZIP archive reading
+
+// Inits a ZIP archive reader.
+// These functions read and validate the archive's central directory.
+mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint32 flags);
+mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags);
+
+#ifndef MINIZ_NO_STDIO
+mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags);
+#endif
+
+// Returns the total number of files in the archive.
+mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip);
+
+// Returns detailed information about an archive file entry.
+mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat);
+
+// Determines if an archive file entry is a directory entry.
+mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index);
+mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index);
+
+// Retrieves the filename of an archive file entry.
+// Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename.
+mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size);
+
+// Attempts to locates a file in the archive's central directory.
+// Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH
+// Returns -1 if the file cannot be found.
+int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags);
+
+// Extracts a archive file to a memory buffer using no memory allocation.
+mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
+mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
+
+// Extracts a archive file to a memory buffer.
+mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags);
+mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags);
+
+// Extracts a archive file to a dynamically allocated heap buffer.
+void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags);
+void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags);
+
+// Extracts a archive file using a callback function to output the file's data.
+mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);
+mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);
+
+#ifndef MINIZ_NO_STDIO
+// Extracts a archive file to a disk file and sets its last accessed and modified times.
+// This function only extracts files, not archive directory records.
+mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags);
+mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags);
+#endif
+
+// Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used.
+mz_bool mz_zip_reader_end(mz_zip_archive *pZip);
+
+// ZIP archive writing
+
+#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+
+// Inits a ZIP archive writer.
+mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size);
+mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size);
+
+#ifndef MINIZ_NO_STDIO
+mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning);
+#endif
+
+// Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive.
+// For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called.
+// For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it).
+// Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL.
+// Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before
+// the archive is finalized the file's central directory will be hosed.
+mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename);
+
+// Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive.
+// To add a directory entry, call this method with an archive name ending in a forwardslash with empty buffer.
+// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
+mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags);
+mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32);
+
+#ifndef MINIZ_NO_STDIO
+// Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive.
+// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
+mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
+#endif
+
+// Adds a file to an archive by fully cloning the data from another archive.
+// This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data, and comment fields.
+mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint file_index);
+
+// Finalizes the archive by writing the central directory records followed by the end of central directory record.
+// After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end().
+// An archive must be manually finalized by calling this function for it to be valid.
+mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip);
+mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, size_t *pSize);
+
+// Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used.
+// Note for the archive to be valid, it must have been finalized before ending.
+mz_bool mz_zip_writer_end(mz_zip_archive *pZip);
+
+// Misc. high-level helper functions:
+
+// mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive.
+// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
+mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
+
+// Reads a single file from an archive into a heap block.
+// Returns NULL on failure.
+void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint zip_flags);
+
+#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+
+#endif // #ifndef MINIZ_NO_ARCHIVE_APIS
+
+// ------------------- Low-level Decompression API Definitions
+
+// Decompression flags used by tinfl_decompress().
+// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream.
+// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input.
+// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB).
+// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes.
+enum
+{
+  TINFL_FLAG_PARSE_ZLIB_HEADER = 1,
+  TINFL_FLAG_HAS_MORE_INPUT = 2,
+  TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4,
+  TINFL_FLAG_COMPUTE_ADLER32 = 8
+};
+
+// High level decompression functions:
+// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc().
+// On entry:
+//  pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress.
+// On return:
+//  Function returns a pointer to the decompressed data, or NULL on failure.
+//  *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data.
+//  The caller must call mz_free() on the returned block when it's no longer needed.
+void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
+
+// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory.
+// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success.
+#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1))
+size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
+
+// tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer.
+// Returns 1 on success or 0 on failure.
+typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser);
+int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
+
+struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor;
+
+// Max size of LZ dictionary.
+#define TINFL_LZ_DICT_SIZE 32768
+
+// Return status.
+typedef enum
+{
+  TINFL_STATUS_BAD_PARAM = -3,
+  TINFL_STATUS_ADLER32_MISMATCH = -2,
+  TINFL_STATUS_FAILED = -1,
+  TINFL_STATUS_DONE = 0,
+  TINFL_STATUS_NEEDS_MORE_INPUT = 1,
+  TINFL_STATUS_HAS_MORE_OUTPUT = 2
+} tinfl_status;
+
+// Initializes the decompressor to its initial state.
+#define tinfl_init(r) do { (r)->m_state = 0; } MZ_MACRO_END
+#define tinfl_get_adler32(r) (r)->m_check_adler32
+
+// Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability.
+// This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output.
+tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags);
+
+// Internal/private bits follow.
+enum
+{
+  TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19,
+  TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS
+};
+
+typedef struct
+{
+  mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0];
+  mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2];
+} tinfl_huff_table;
+
+#if MINIZ_HAS_64BIT_REGISTERS
+  #define TINFL_USE_64BIT_BITBUF 1
+#endif
+
+#if TINFL_USE_64BIT_BITBUF
+  typedef mz_uint64 tinfl_bit_buf_t;
+  #define TINFL_BITBUF_SIZE (64)
+#else
+  typedef mz_uint32 tinfl_bit_buf_t;
+  #define TINFL_BITBUF_SIZE (32)
+#endif
+
+struct tinfl_decompressor_tag
+{
+  mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES];
+  tinfl_bit_buf_t m_bit_buf;
+  size_t m_dist_from_out_buf_start;
+  tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES];
+  mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137];
+};
+
+// ------------------- Low-level Compression API Definitions
+
+// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently).
+#define TDEFL_LESS_MEMORY 0
+
+// tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search):
+// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression).
+enum
+{
+  TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF
+};
+
+// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data.
+// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers).
+// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing.
+// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory).
+// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1)
+// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled.
+// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables.
+// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks.
+// The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK).
+enum
+{
+  TDEFL_WRITE_ZLIB_HEADER             = 0x01000,
+  TDEFL_COMPUTE_ADLER32               = 0x02000,
+  TDEFL_GREEDY_PARSING_FLAG           = 0x04000,
+  TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000,
+  TDEFL_RLE_MATCHES                   = 0x10000,
+  TDEFL_FILTER_MATCHES                = 0x20000,
+  TDEFL_FORCE_ALL_STATIC_BLOCKS       = 0x40000,
+  TDEFL_FORCE_ALL_RAW_BLOCKS          = 0x80000
+};
+
+// High level compression functions:
+// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc().
+// On entry:
+//  pSrc_buf, src_buf_len: Pointer and size of source block to compress.
+//  flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression.
+// On return:
+//  Function returns a pointer to the compressed data, or NULL on failure.
+//  *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data.
+//  The caller must free() the returned block when it's no longer needed.
+void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
+
+// tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory.
+// Returns 0 on failure.
+size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
+
+// Compresses an image to a compressed PNG file in memory.
+// On entry:
+//  pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. 
+//  The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory.
+//  level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL
+//  If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps).
+// On return:
+//  Function returns a pointer to the compressed data, or NULL on failure.
+//  *pLen_out will be set to the size of the PNG image file.
+//  The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed.
+void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip);
+void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out);
+
+// Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time.
+typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser);
+
+// tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally.
+mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
+
+enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 };
+
+// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes).
+#if TDEFL_LESS_MEMORY
+enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
+#else
+enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
+#endif
+
+// The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions.
+typedef enum
+{
+  TDEFL_STATUS_BAD_PARAM = -2,
+  TDEFL_STATUS_PUT_BUF_FAILED = -1,
+  TDEFL_STATUS_OKAY = 0,
+  TDEFL_STATUS_DONE = 1,
+} tdefl_status;
+
+// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums
+typedef enum
+{
+  TDEFL_NO_FLUSH = 0,
+  TDEFL_SYNC_FLUSH = 2,
+  TDEFL_FULL_FLUSH = 3,
+  TDEFL_FINISH = 4
+} tdefl_flush;
+
+// tdefl's compression state structure.
+typedef struct
+{
+  tdefl_put_buf_func_ptr m_pPut_buf_func;
+  void *m_pPut_buf_user;
+  mz_uint m_flags, m_max_probes[2];
+  int m_greedy_parsing;
+  mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size;
+  mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end;
+  mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer;
+  mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish;
+  tdefl_status m_prev_return_status;
+  const void *m_pIn_buf;
+  void *m_pOut_buf;
+  size_t *m_pIn_buf_size, *m_pOut_buf_size;
+  tdefl_flush m_flush;
+  const mz_uint8 *m_pSrc;
+  size_t m_src_buf_left, m_out_buf_ofs;
+  mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1];
+  mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
+  mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
+  mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
+  mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE];
+  mz_uint16 m_next[TDEFL_LZ_DICT_SIZE];
+  mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE];
+  mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE];
+} tdefl_compressor;
+
+// Initializes the compressor.
+// There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory.
+// pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression.
+// If pBut_buf_func is NULL the user should always call the tdefl_compress() API.
+// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.)
+tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
+
+// Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible.
+tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush);
+
+// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr.
+// tdefl_compress_buffer() always consumes the entire input buffer.
+tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush);
+
+tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d);
+mz_uint32 tdefl_get_adler32(tdefl_compressor *d);
+
+// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros.
+#ifndef MINIZ_NO_ZLIB_APIS
+// Create tdefl_compress() flags given zlib-style compression parameters.
+// level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files)
+// window_bits may be -15 (raw deflate) or 15 (zlib)
+// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED
+mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy);
+#endif // #ifndef MINIZ_NO_ZLIB_APIS
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // MINIZ_HEADER_INCLUDED
+
diff --git a/Utilities/nes_ntsc.cpp b/Utilities/nes_ntsc.cpp
new file mode 100644
index 0000000..9c0a9be
--- /dev/null
+++ b/Utilities/nes_ntsc.cpp
@@ -0,0 +1,291 @@
+#include "stdafx.h"
+
+/* nes_ntsc 0.2.2. http://www.slack.net/~ant/ */
+
+#include "nes_ntsc.h"
+
+/* Copyright (C) 2006-2007 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+nes_ntsc_setup_t const nes_ntsc_monochrome = { 0,-1, 0, 0,.2,  0,.2,-.2,-.2,-1, 1, 0, 0, 0, 0 };
+nes_ntsc_setup_t const nes_ntsc_composite  = { 0, 0, 0, 0, 0,  0, 0,  0,  0, 0, 1, 0, 0, 0, 0 };
+nes_ntsc_setup_t const nes_ntsc_svideo     = { 0, 0, 0, 0,.2,  0,.2, -1, -1, 0, 1, 0, 0, 0, 0 };
+nes_ntsc_setup_t const nes_ntsc_rgb        = { 0, 0, 0, 0,.2,  0,.7, -1, -1,-1, 1, 0, 0, 0, 0 };
+
+#define alignment_count 3
+#define burst_count     3
+#define rescale_in      8
+#define rescale_out     7
+
+#define artifacts_mid   1.0f
+#define fringing_mid    1.0f
+#define std_decoder_hue -15
+
+#define STD_HUE_CONDITION( setup ) !(setup->base_palette || setup->palette)
+
+#include "nes_ntsc_impl.h"
+
+/* 3 input pixels -> 8 composite samples */
+pixel_info_t const nes_ntsc_pixels [alignment_count] = {
+	{ PIXEL_OFFSET( -4, -9 ), { 1, 1, .6667f, 0 } },
+	{ PIXEL_OFFSET( -2, -7 ), {       .3333f, 1, 1, .3333f } },
+	{ PIXEL_OFFSET(  0, -5 ), {                  0, .6667f, 1, 1 } },
+};
+
+static void merge_kernel_fields( nes_ntsc_rgb_t* io )
+{
+	int n;
+	for ( n = burst_size; n; --n )
+	{
+		nes_ntsc_rgb_t p0 = io [burst_size * 0] + rgb_bias;
+		nes_ntsc_rgb_t p1 = io [burst_size * 1] + rgb_bias;
+		nes_ntsc_rgb_t p2 = io [burst_size * 2] + rgb_bias;
+		/* merge colors without losing precision */
+		io [burst_size * 0] =
+				((p0 + p1 - ((p0 ^ p1) & nes_ntsc_rgb_builder)) >> 1) - rgb_bias;
+		io [burst_size * 1] =
+				((p1 + p2 - ((p1 ^ p2) & nes_ntsc_rgb_builder)) >> 1) - rgb_bias;
+		io [burst_size * 2] =
+				((p2 + p0 - ((p2 ^ p0) & nes_ntsc_rgb_builder)) >> 1) - rgb_bias;
+		++io;
+	}
+}
+
+static void correct_errors( nes_ntsc_rgb_t color, nes_ntsc_rgb_t* out )
+{
+	int n;
+	for ( n = burst_count; n; --n )
+	{
+		unsigned i;
+		for ( i = 0; i < rgb_kernel_size / 2; i++ )
+		{
+			nes_ntsc_rgb_t error = color -
+					out [i    ] - out [(i+12)%14+14] - out [(i+10)%14+28] -
+					out [i + 7] - out [i + 5    +14] - out [i + 3    +28];
+			DISTRIBUTE_ERROR( i+3+28, i+5+14, i+7 );
+		}
+		out += alignment_count * rgb_kernel_size;
+	}
+}
+
+void nes_ntsc_init( nes_ntsc_t* ntsc, nes_ntsc_setup_t const* setup )
+{
+	int merge_fields;
+	int entry;
+	init_t impl;
+	float gamma_factor;
+	
+	if ( !setup )
+		setup = &nes_ntsc_composite;
+	init( &impl, setup );
+	
+	/* setup fast gamma */
+	{
+		float gamma = (float) setup->gamma * -0.5f;
+		if ( STD_HUE_CONDITION( setup ) )
+			gamma += 0.1333f;
+		
+		gamma_factor = (float) pow( (float) fabs( gamma ), 0.73f );
+		if ( gamma < 0 )
+			gamma_factor = -gamma_factor;
+	}
+	
+	merge_fields = setup->merge_fields;
+	if ( setup->artifacts <= -1 && setup->fringing <= -1 )
+		merge_fields = 1;
+	
+	for ( entry = 0; entry < nes_ntsc_palette_size; entry++ )
+	{
+		/* Base 64-color generation */
+		static float const lo_levels [4] = { -0.12f, 0.00f, 0.31f, 0.72f };
+		static float const hi_levels [4] = {  0.40f, 0.68f, 1.00f, 1.00f };
+		int level = entry >> 4 & 0x03;
+		float lo = lo_levels [level];
+		float hi = hi_levels [level];
+		
+		int color = entry & 0x0F;
+		if ( color == 0 )
+			lo = hi;
+		if ( color == 0x0D )
+			hi = lo;
+		if ( color > 0x0D )
+			hi = lo = 0.0f;
+		
+		{
+			/* phases [i] = cos( i * PI / 6 ) */
+			static float const phases [0x10 + 3] = {
+				-1.0f, -0.866025f, -0.5f, 0.0f,  0.5f,  0.866025f,
+				 1.0f,  0.866025f,  0.5f, 0.0f, -0.5f, -0.866025f,
+				-1.0f, -0.866025f, -0.5f, 0.0f,  0.5f,  0.866025f,
+				 1.0f
+			};
+			#define TO_ANGLE_SIN( color )   phases [color]
+			#define TO_ANGLE_COS( color )   phases [(color) + 3]
+			
+			/* Convert raw waveform to YIQ */
+			float sat = (hi - lo) * 0.5f;
+			float i = TO_ANGLE_SIN( color ) * sat;
+			float q = TO_ANGLE_COS( color ) * sat;
+			float y = (hi + lo) * 0.5f;
+			
+			/* Optionally use base palette instead */
+			if ( setup->base_palette )
+			{
+				unsigned char const* in = &setup->base_palette [(entry & 0x3F) * 3];
+				static float const to_float = 1.0f / 0xFF;
+				float r = to_float * in [0];
+				float g = to_float * in [1];
+				float b = to_float * in [2];
+				q = RGB_TO_YIQ( r, g, b, y, i );
+			}
+			
+			/* Apply color emphasis */
+			#ifdef NES_NTSC_EMPHASIS
+			{
+				int tint = entry >> 6 & 7;
+				if ( tint && color <= 0x0D )
+				{
+					static float const atten_mul = 0.79399f;
+					static float const atten_sub = 0.0782838f;
+					
+					if ( tint == 7 )
+					{
+						y = y * (atten_mul * 1.13f) - (atten_sub * 1.13f);
+					}
+					else
+					{
+						static unsigned char const tints [8] = { 0, 6, 10, 8, 2, 4, 0, 0 };
+						int const tint_color = tints [tint];
+						float sat = hi * (0.5f - atten_mul * 0.5f) + atten_sub * 0.5f;
+						y -= sat * 0.5f;
+						if ( tint >= 3 && tint != 4 )
+						{
+							/* combined tint bits */
+							sat *= 0.6f;
+							y -= sat;
+						}
+						i += TO_ANGLE_SIN( tint_color ) * sat;
+						q += TO_ANGLE_COS( tint_color ) * sat;
+					}
+				}
+			}
+			#endif
+			
+			/* Optionally use palette instead */
+			if ( setup->palette )
+			{
+				unsigned char const* in = &setup->palette [entry * 3];
+				static float const to_float = 1.0f / 0xFF;
+				float r = to_float * in [0];
+				float g = to_float * in [1];
+				float b = to_float * in [2];
+				q = RGB_TO_YIQ( r, g, b, y, i );
+			}
+			
+			/* Apply brightness, contrast, and gamma */
+			y *= (float) setup->contrast * 0.5f + 1;
+			/* adjustment reduces error when using input palette */
+			y += (float) setup->brightness * 0.5f - 0.5f / 256;
+			
+			{
+				float r, g, b = YIQ_TO_RGB( y, i, q, default_decoder, float, r, g );
+				
+				/* fast approximation of n = pow( n, gamma ) */
+				r = (r * gamma_factor - gamma_factor) * r + r;
+				g = (g * gamma_factor - gamma_factor) * g + g;
+				b = (b * gamma_factor - gamma_factor) * b + b;
+				
+				q = RGB_TO_YIQ( r, g, b, y, i );
+			}
+			
+			i *= rgb_unit;
+			q *= rgb_unit;
+			y *= rgb_unit;
+			y += rgb_offset;
+			
+			/* Generate kernel */
+			{
+				int r, g, b = YIQ_TO_RGB( y, i, q, impl.to_rgb, int, r, g );
+				/* blue tends to overflow, so clamp it */
+				nes_ntsc_rgb_t rgb = PACK_RGB( r, g, (b < 0x3E0 ? b: 0x3E0) );
+				
+				if ( setup->palette_out )
+					RGB_PALETTE_OUT( rgb, &setup->palette_out [entry * 3] );
+				
+				if ( ntsc )
+				{
+					nes_ntsc_rgb_t* kernel = ntsc->table [entry];
+					gen_kernel( &impl, y, i, q, kernel );
+					if ( merge_fields )
+						merge_kernel_fields( kernel );
+					correct_errors( rgb, kernel );
+				}
+			}
+		}
+	}
+}
+
+#ifndef NES_NTSC_NO_BLITTERS
+
+void nes_ntsc_blit( nes_ntsc_t const* ntsc, NES_NTSC_IN_T const* input, long in_row_width,
+		int burst_phase, int in_width, int in_height, void* rgb_out, long out_pitch )
+{
+	int chunk_count = (in_width - 1) / nes_ntsc_in_chunk;
+	for ( ; in_height; --in_height )
+	{
+		NES_NTSC_IN_T const* line_in = input;
+		NES_NTSC_BEGIN_ROW( ntsc, burst_phase,
+				nes_ntsc_black, nes_ntsc_black, NES_NTSC_ADJ_IN( *line_in ) );
+		nes_ntsc_out_t* restrict line_out = (nes_ntsc_out_t*) rgb_out;
+		int n;
+		++line_in;
+		
+		for ( n = chunk_count; n; --n )
+		{
+			/* order of input and output pixels must not be altered */
+			NES_NTSC_COLOR_IN( 0, NES_NTSC_ADJ_IN( line_in [0] ) );
+			NES_NTSC_RGB_OUT( 0, line_out [0], NES_NTSC_OUT_DEPTH );
+			NES_NTSC_RGB_OUT( 1, line_out [1], NES_NTSC_OUT_DEPTH );
+			
+			NES_NTSC_COLOR_IN( 1, NES_NTSC_ADJ_IN( line_in [1] ) );
+			NES_NTSC_RGB_OUT( 2, line_out [2], NES_NTSC_OUT_DEPTH );
+			NES_NTSC_RGB_OUT( 3, line_out [3], NES_NTSC_OUT_DEPTH );
+			
+			NES_NTSC_COLOR_IN( 2, NES_NTSC_ADJ_IN( line_in [2] ) );
+			NES_NTSC_RGB_OUT( 4, line_out [4], NES_NTSC_OUT_DEPTH );
+			NES_NTSC_RGB_OUT( 5, line_out [5], NES_NTSC_OUT_DEPTH );
+			NES_NTSC_RGB_OUT( 6, line_out [6], NES_NTSC_OUT_DEPTH );
+			
+			line_in  += 3;
+			line_out += 7;
+		}
+		
+		/* finish final pixels */
+		NES_NTSC_COLOR_IN( 0, nes_ntsc_black );
+		NES_NTSC_RGB_OUT( 0, line_out [0], NES_NTSC_OUT_DEPTH );
+		NES_NTSC_RGB_OUT( 1, line_out [1], NES_NTSC_OUT_DEPTH );
+		
+		NES_NTSC_COLOR_IN( 1, nes_ntsc_black );
+		NES_NTSC_RGB_OUT( 2, line_out [2], NES_NTSC_OUT_DEPTH );
+		NES_NTSC_RGB_OUT( 3, line_out [3], NES_NTSC_OUT_DEPTH );
+		
+		NES_NTSC_COLOR_IN( 2, nes_ntsc_black );
+		NES_NTSC_RGB_OUT( 4, line_out [4], NES_NTSC_OUT_DEPTH );
+		NES_NTSC_RGB_OUT( 5, line_out [5], NES_NTSC_OUT_DEPTH );
+		NES_NTSC_RGB_OUT( 6, line_out [6], NES_NTSC_OUT_DEPTH );
+		
+		burst_phase = (burst_phase + 1) % nes_ntsc_burst_count;
+		input += in_row_width;
+		rgb_out = (char*) rgb_out + out_pitch;
+	}
+}
+
+#endif
diff --git a/Utilities/nes_ntsc.h b/Utilities/nes_ntsc.h
new file mode 100644
index 0000000..dd55f0f
--- /dev/null
+++ b/Utilities/nes_ntsc.h
@@ -0,0 +1,199 @@
+#pragma once
+/* NES NTSC video filter */
+
+/* nes_ntsc 0.2.2 */
+#ifndef NES_NTSC_H
+#define NES_NTSC_H
+
+#if defined(_MSC_VER)
+    #define EXPORT __declspec(dllexport)
+#else
+    #define EXPORT 
+#endif 
+
+#include "nes_ntsc_config.h"
+
+#ifdef __cplusplus
+	extern "C" {
+#endif
+
+/* Image parameters, ranging from -1.0 to 1.0. Actual internal values shown
+in parenthesis and should remain fairly stable in future versions. */
+typedef struct nes_ntsc_setup_t
+{
+	/* Basic parameters */
+	double hue;        /* -1 = -180 degrees     +1 = +180 degrees */
+	double saturation; /* -1 = grayscale (0.0)  +1 = oversaturated colors (2.0) */
+	double contrast;   /* -1 = dark (0.5)       +1 = light (1.5) */
+	double brightness; /* -1 = dark (0.5)       +1 = light (1.5) */
+	double sharpness;  /* edge contrast enhancement/blurring */
+	
+	/* Advanced parameters */
+	double gamma;      /* -1 = dark (1.5)       +1 = light (0.5) */
+	double resolution; /* image resolution */
+	double artifacts;  /* artifacts caused by color changes */
+	double fringing;   /* color artifacts caused by brightness changes */
+	double bleed;      /* color bleed (color resolution reduction) */
+	int merge_fields;  /* if 1, merges even and odd fields together to reduce flicker */
+	float const* decoder_matrix; /* optional RGB decoder matrix, 6 elements */
+	
+	unsigned char* palette_out;  /* optional RGB palette out, 3 bytes per color */
+	
+	/* You can replace the standard NES color generation with an RGB palette. The
+	first replaces all color generation, while the second replaces only the core
+	64-color generation and does standard color emphasis calculations on it. */
+	unsigned char const* palette;/* optional 512-entry RGB palette in, 3 bytes per color */
+	unsigned char const* base_palette;/* optional 64-entry RGB palette in, 3 bytes per color */
+} nes_ntsc_setup_t;
+
+/* Video format presets */
+extern nes_ntsc_setup_t const nes_ntsc_composite; /* color bleeding + artifacts */
+extern nes_ntsc_setup_t const nes_ntsc_svideo;    /* color bleeding only */
+extern nes_ntsc_setup_t const nes_ntsc_rgb;       /* crisp image */
+extern nes_ntsc_setup_t const nes_ntsc_monochrome;/* desaturated + artifacts */
+
+#ifdef NES_NTSC_EMPHASIS
+	enum { nes_ntsc_palette_size = 64 * 8 };
+#else
+	enum { nes_ntsc_palette_size = 64 };
+#endif
+
+/* Initializes and adjusts parameters. Can be called multiple times on the same
+nes_ntsc_t object. Can pass NULL for either parameter. */
+typedef struct nes_ntsc_t nes_ntsc_t;
+EXPORT void nes_ntsc_init( nes_ntsc_t* ntsc, nes_ntsc_setup_t const* setup );
+
+/* Filters one or more rows of pixels. Input pixels are 6/9-bit palette indicies.
+In_row_width is the number of pixels to get to the next input row. Out_pitch
+is the number of *bytes* to get to the next output row. Output pixel format
+is set by NES_NTSC_OUT_DEPTH (defaults to 16-bit RGB). */
+EXPORT void nes_ntsc_blit( nes_ntsc_t const* ntsc, NES_NTSC_IN_T const* nes_in,
+		long in_row_width, int burst_phase, int in_width, int in_height,
+		void* rgb_out, long out_pitch );
+
+/* Number of output pixels written by blitter for given input width. Width might
+be rounded down slightly; use NES_NTSC_IN_WIDTH() on result to find rounded
+value. Guaranteed not to round 256 down at all. */
+#define NES_NTSC_OUT_WIDTH( in_width ) \
+	((((in_width) - 1) / nes_ntsc_in_chunk + 1) * nes_ntsc_out_chunk)
+
+/* Number of input pixels that will fit within given output width. Might be
+rounded down slightly; use NES_NTSC_OUT_WIDTH() on result to find rounded
+value. */
+#define NES_NTSC_IN_WIDTH( out_width ) \
+	(((out_width) / nes_ntsc_out_chunk - 1) * nes_ntsc_in_chunk + 1)
+
+
+/* Interface for user-defined custom blitters */
+
+enum { nes_ntsc_in_chunk    = 3  }; /* number of input pixels read per chunk */
+enum { nes_ntsc_out_chunk   = 7  }; /* number of output pixels generated per chunk */
+enum { nes_ntsc_black       = 15 }; /* palette index for black */
+enum { nes_ntsc_burst_count = 3  }; /* burst phase cycles through 0, 1, and 2 */
+
+/* Begins outputting row and starts three pixels. First pixel will be cut off a bit.
+Use nes_ntsc_black for unused pixels. Declares variables, so must be before first
+statement in a block (unless you're using C++). */
+#define NES_NTSC_BEGIN_ROW( ntsc, burst, pixel0, pixel1, pixel2 ) \
+	char const* const ktable = \
+		(char const*) (ntsc)->table [0] + burst * (nes_ntsc_burst_size * sizeof (nes_ntsc_rgb_t));\
+	NES_NTSC_BEGIN_ROW_6_( pixel0, pixel1, pixel2, NES_NTSC_ENTRY_, ktable )
+
+/* Begins input pixel */
+#define NES_NTSC_COLOR_IN( in_index, color_in ) \
+	NES_NTSC_COLOR_IN_( in_index, color_in, NES_NTSC_ENTRY_, ktable )
+
+/* Generates output pixel. Bits can be 24, 16, 15, 32 (treated as 24), or 0:
+24:          RRRRRRRR GGGGGGGG BBBBBBBB (8-8-8 RGB)
+16:                   RRRRRGGG GGGBBBBB (5-6-5 RGB)
+15:                    RRRRRGG GGGBBBBB (5-5-5 RGB)
+ 0: xxxRRRRR RRRxxGGG GGGGGxxB BBBBBBBx (native internal format; x = junk bits) */
+#define NES_NTSC_RGB_OUT( index, rgb_out, bits ) \
+	NES_NTSC_RGB_OUT_14_( index, rgb_out, bits, 0 )
+
+
+/* private */
+enum { nes_ntsc_entry_size = 128 };
+typedef unsigned long nes_ntsc_rgb_t;
+struct nes_ntsc_t {
+	nes_ntsc_rgb_t table [nes_ntsc_palette_size] [nes_ntsc_entry_size];
+};
+enum { nes_ntsc_burst_size = nes_ntsc_entry_size / nes_ntsc_burst_count };
+
+#define NES_NTSC_ENTRY_( ktable, n ) \
+	(nes_ntsc_rgb_t const*) (ktable + (n) * (nes_ntsc_entry_size * sizeof (nes_ntsc_rgb_t)))
+
+/* deprecated */
+#define NES_NTSC_RGB24_OUT( x, out ) NES_NTSC_RGB_OUT( x, out, 24 )
+#define NES_NTSC_RGB16_OUT( x, out ) NES_NTSC_RGB_OUT( x, out, 16 )
+#define NES_NTSC_RGB15_OUT( x, out ) NES_NTSC_RGB_OUT( x, out, 15 )
+#define NES_NTSC_RAW_OUT( x, out )   NES_NTSC_RGB_OUT( x, out,  0 )
+
+enum { nes_ntsc_min_in_width  = 256 };
+enum { nes_ntsc_min_out_width = NES_NTSC_OUT_WIDTH( nes_ntsc_min_in_width ) };
+
+enum { nes_ntsc_640_in_width  = 271 };
+enum { nes_ntsc_640_out_width = NES_NTSC_OUT_WIDTH( nes_ntsc_640_in_width ) };
+enum { nes_ntsc_640_overscan_left  = 8 };
+enum { nes_ntsc_640_overscan_right = nes_ntsc_640_in_width - 256 - nes_ntsc_640_overscan_left };
+
+enum { nes_ntsc_full_in_width  = 283 };
+enum { nes_ntsc_full_out_width = NES_NTSC_OUT_WIDTH( nes_ntsc_full_in_width ) };
+enum { nes_ntsc_full_overscan_left  = 16 };
+enum { nes_ntsc_full_overscan_right = nes_ntsc_full_in_width - 256 - nes_ntsc_full_overscan_left };
+
+/* common 3->7 ntsc macros */
+#define NES_NTSC_BEGIN_ROW_6_( pixel0, pixel1, pixel2, ENTRY, table ) \
+	unsigned const nes_ntsc_pixel0_ = (pixel0);\
+	nes_ntsc_rgb_t const* kernel0  = ENTRY( table, nes_ntsc_pixel0_ );\
+	unsigned const nes_ntsc_pixel1_ = (pixel1);\
+	nes_ntsc_rgb_t const* kernel1  = ENTRY( table, nes_ntsc_pixel1_ );\
+	unsigned const nes_ntsc_pixel2_ = (pixel2);\
+	nes_ntsc_rgb_t const* kernel2  = ENTRY( table, nes_ntsc_pixel2_ );\
+	nes_ntsc_rgb_t const* kernelx0;\
+	nes_ntsc_rgb_t const* kernelx1 = kernel0;\
+	nes_ntsc_rgb_t const* kernelx2 = kernel0
+
+#define NES_NTSC_RGB_OUT_14_( x, rgb_out, bits, shift ) {\
+	nes_ntsc_rgb_t raw_ =\
+		kernel0  [x       ] + kernel1  [(x+12)%7+14] + kernel2  [(x+10)%7+28] +\
+		kernelx0 [(x+7)%14] + kernelx1 [(x+ 5)%7+21] + kernelx2 [(x+ 3)%7+35];\
+	NES_NTSC_CLAMP_( raw_, shift );\
+	NES_NTSC_RGB_OUT_( rgb_out, bits, shift );\
+}
+
+/* common ntsc macros */
+#define nes_ntsc_rgb_builder    ((1L << 21) | (1 << 11) | (1 << 1))
+#define nes_ntsc_clamp_mask     (nes_ntsc_rgb_builder * 3 / 2)
+#define nes_ntsc_clamp_add      (nes_ntsc_rgb_builder * 0x101)
+#define NES_NTSC_CLAMP_( io, shift ) {\
+	nes_ntsc_rgb_t sub = (io) >> (9-(shift)) & nes_ntsc_clamp_mask;\
+	nes_ntsc_rgb_t clamp = nes_ntsc_clamp_add - sub;\
+	io |= clamp;\
+	clamp -= sub;\
+	io &= clamp;\
+}
+
+#define NES_NTSC_COLOR_IN_( index, color, ENTRY, table ) {\
+	unsigned color_;\
+	kernelx##index = kernel##index;\
+	kernel##index = (color_ = (color), ENTRY( table, color_ ));\
+}
+
+/* x is always zero except in snes_ntsc library */
+#define NES_NTSC_RGB_OUT_( rgb_out, bits, x ) {\
+	if ( bits == 16 )\
+		rgb_out = (raw_>>(13-x)& 0xF800)|(raw_>>(8-x)&0x07E0)|(raw_>>(4-x)&0x001F);\
+	if ( bits == 32 || bits == 24 )\
+		rgb_out = (raw_>>(5-x)&0xFF0000)|(raw_>>(3-x)&0xFF00)|(raw_>>(1-x)&0xFF);\
+	if ( bits == 15 )\
+		rgb_out = (raw_>>(14-x)& 0x7C00)|(raw_>>(9-x)&0x03E0)|(raw_>>(4-x)&0x001F);\
+	if ( bits == 0 )\
+		rgb_out = raw_ << x;\
+}
+
+#ifdef __cplusplus
+	}
+#endif
+
+#endif
diff --git a/Utilities/nes_ntsc_config.h b/Utilities/nes_ntsc_config.h
new file mode 100644
index 0000000..2527822
--- /dev/null
+++ b/Utilities/nes_ntsc_config.h
@@ -0,0 +1,27 @@
+/* Configure library by modifying this file */
+
+#ifndef NES_NTSC_CONFIG_H
+#define NES_NTSC_CONFIG_H
+
+/* Uncomment to enable emphasis support and use a 512 color palette instead
+of the base 64 color palette. */
+#define NES_NTSC_EMPHASIS 1
+
+/* The following affect the built-in blitter only; a custom blitter can
+handle things however it wants. */
+
+/* Bits per pixel of output. Can be 15, 16, 32, or 24 (same as 32). */
+#define NES_NTSC_OUT_DEPTH 32
+
+/* Type of input pixel values. You'll probably use unsigned short
+if you enable emphasis above. */
+#define NES_NTSC_IN_T unsigned short
+
+/* Each raw pixel input value is passed through this. You might want to mask
+the pixel index if you use the high bits as flags, etc. */
+#define NES_NTSC_ADJ_IN( in ) in
+
+/* For each pixel, this is the basic operation:
+output_color = color_palette [NES_NTSC_ADJ_IN( NES_NTSC_IN_T )] */
+
+#endif
diff --git a/Utilities/nes_ntsc_impl.h b/Utilities/nes_ntsc_impl.h
new file mode 100644
index 0000000..de3672b
--- /dev/null
+++ b/Utilities/nes_ntsc_impl.h
@@ -0,0 +1,439 @@
+/* nes_ntsc 0.2.2. http://www.slack.net/~ant/ */
+
+/* Common implementation of NTSC filters */
+
+#include <assert.h>
+#include <math.h>
+
+/* Copyright (C) 2006 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#define DISABLE_CORRECTION 0
+
+#undef PI
+#define PI 3.14159265358979323846f
+
+#ifndef LUMA_CUTOFF
+	#define LUMA_CUTOFF 0.20
+#endif
+#ifndef gamma_size
+	#define gamma_size 1
+#endif
+#ifndef rgb_bits
+	#define rgb_bits 8
+#endif
+#ifndef artifacts_max
+	#define artifacts_max (artifacts_mid * 1.5f)
+#endif
+#ifndef fringing_max
+	#define fringing_max (fringing_mid * 2)
+#endif
+#ifndef STD_HUE_CONDITION
+	#define STD_HUE_CONDITION( setup ) 1
+#endif
+
+#define ext_decoder_hue     (std_decoder_hue + 15)
+#define rgb_unit            (1 << rgb_bits)
+#define rgb_offset          (rgb_unit * 2 + 0.5f)
+
+enum { burst_size  = nes_ntsc_entry_size / burst_count };
+enum { kernel_half = 16 };
+enum { kernel_size = kernel_half * 2 + 1 };
+
+typedef struct init_t
+{
+	float to_rgb [burst_count * 6];
+	float to_float [gamma_size];
+	float contrast;
+	float brightness;
+	float artifacts;
+	float fringing;
+	float kernel [rescale_out * kernel_size * 2];
+} init_t;
+
+#define ROTATE_IQ( i, q, sin_b, cos_b ) {\
+	float t;\
+	t = i * cos_b - q * sin_b;\
+	q = i * sin_b + q * cos_b;\
+	i = t;\
+}
+
+static void init_filters( init_t* impl, nes_ntsc_setup_t const* setup )
+{
+#if rescale_out > 1
+	float kernels [kernel_size * 2];
+#else
+	float* const kernels = impl->kernel;
+#endif
+
+	/* generate luma (y) filter using sinc kernel */
+	{
+		/* sinc with rolloff (dsf) */
+		float const rolloff = 1 + (float) setup->sharpness * (float) 0.032;
+		float const maxh = 32;
+		float const pow_a_n = (float) pow( rolloff, maxh );
+		float sum;
+		int i;
+		/* quadratic mapping to reduce negative (blurring) range */
+		float to_angle = (float) setup->resolution + 1;
+		to_angle = PI / maxh * (float) LUMA_CUTOFF * (to_angle * to_angle + 1);
+		
+		kernels [kernel_size * 3 / 2] = maxh; /* default center value */
+		for ( i = 0; i < kernel_half * 2 + 1; i++ )
+		{
+			int x = i - kernel_half;
+			float angle = x * to_angle;
+			/* instability occurs at center point with rolloff very close to 1.0 */
+			if ( x || pow_a_n > (float) 1.056 || pow_a_n < (float) 0.981 )
+			{
+				float rolloff_cos_a = rolloff * (float) cos( angle );
+				float num = 1 - rolloff_cos_a -
+						pow_a_n * (float) cos( maxh * angle ) +
+						pow_a_n * rolloff * (float) cos( (maxh - 1) * angle );
+				float den = 1 - rolloff_cos_a - rolloff_cos_a + rolloff * rolloff;
+				float dsf = num / den;
+				kernels [kernel_size * 3 / 2 - kernel_half + i] = dsf - (float) 0.5;
+			}
+		}
+		
+		/* apply blackman window and find sum */
+		sum = 0;
+		for ( i = 0; i < kernel_half * 2 + 1; i++ )
+		{
+			float x = PI * 2 / (kernel_half * 2) * i;
+			float blackman = 0.42f - 0.5f * (float) cos( x ) + 0.08f * (float) cos( x * 2 );
+			sum += (kernels [kernel_size * 3 / 2 - kernel_half + i] *= blackman);
+		}
+		
+		/* normalize kernel */
+		sum = 1.0f / sum;
+		for ( i = 0; i < kernel_half * 2 + 1; i++ )
+		{
+			int x = kernel_size * 3 / 2 - kernel_half + i;
+			kernels [x] *= sum;
+			assert( kernels [x] == kernels [x] ); /* catch numerical instability */
+		}
+	}
+
+	/* generate chroma (iq) filter using gaussian kernel */
+	{
+		float const cutoff_factor = -0.03125f;
+		float cutoff = (float) setup->bleed;
+		int i;
+		
+		if ( cutoff < 0 )
+		{
+			/* keep extreme value accessible only near upper end of scale (1.0) */
+			cutoff *= cutoff;
+			cutoff *= cutoff;
+			cutoff *= cutoff;
+			cutoff *= -30.0f / 0.65f;
+		}
+		cutoff = cutoff_factor - 0.65f * cutoff_factor * cutoff;
+		
+		for ( i = -kernel_half; i <= kernel_half; i++ )
+			kernels [kernel_size / 2 + i] = (float) exp( i * i * cutoff );
+		
+		/* normalize even and odd phases separately */
+		for ( i = 0; i < 2; i++ )
+		{
+			float sum = 0;
+			int x;
+			for ( x = i; x < kernel_size; x += 2 )
+				sum += kernels [x];
+			
+			sum = 1.0f / sum;
+			for ( x = i; x < kernel_size; x += 2 )
+			{
+				kernels [x] *= sum;
+				assert( kernels [x] == kernels [x] ); /* catch numerical instability */
+			}
+		}
+	}
+	
+	/*
+	printf( "luma:\n" );
+	for ( i = kernel_size; i < kernel_size * 2; i++ )
+		printf( "%f\n", kernels [i] );
+	printf( "chroma:\n" );
+	for ( i = 0; i < kernel_size; i++ )
+		printf( "%f\n", kernels [i] );
+	*/
+	
+	/* generate linear rescale kernels */
+	#if rescale_out > 1
+	{
+		float weight = 1.0f;
+		float* out = impl->kernel;
+		int n = rescale_out;
+		do
+		{
+			float remain = 0;
+			int i;
+			weight -= 1.0f / rescale_in;
+			for ( i = 0; i < kernel_size * 2; i++ )
+			{
+				float cur = kernels [i];
+				float m = cur * weight;
+				*out++ = m + remain;
+				remain = cur - m;
+			}
+		}
+		while ( --n );
+	}
+	#endif
+}
+
+static float const default_decoder [6] =
+	{ 0.956f, 0.621f, -0.272f, -0.647f, -1.105f, 1.702f };
+
+static void init( init_t* impl, nes_ntsc_setup_t const* setup )
+{
+	impl->brightness = (float) setup->brightness * (0.5f * rgb_unit) + rgb_offset;
+	impl->contrast   = (float) setup->contrast   * (0.5f * rgb_unit) + rgb_unit;
+	#ifdef default_palette_contrast
+		if ( !setup->palette )
+			impl->contrast *= default_palette_contrast;
+	#endif
+	
+	impl->artifacts = (float) setup->artifacts;
+	if ( impl->artifacts > 0 )
+		impl->artifacts *= artifacts_max - artifacts_mid;
+	impl->artifacts = impl->artifacts * artifacts_mid + artifacts_mid;
+
+	impl->fringing = (float) setup->fringing;
+	if ( impl->fringing > 0 )
+		impl->fringing *= fringing_max - fringing_mid;
+	impl->fringing = impl->fringing * fringing_mid + fringing_mid;
+	
+	init_filters( impl, setup );
+	
+	/* generate gamma table */
+	if ( gamma_size > 1 )
+	{
+		float const to_float = 1.0f / (gamma_size - (gamma_size > 1));
+		float const gamma = 1.1333f - (float) setup->gamma * 0.5f;
+		/* match common PC's 2.2 gamma to TV's 2.65 gamma */
+		int i;
+		for ( i = 0; i < gamma_size; i++ )
+			impl->to_float [i] =
+					(float) pow( i * to_float, gamma ) * impl->contrast + impl->brightness;
+	}
+	
+	/* setup decoder matricies */
+	{
+		float hue = (float) setup->hue * PI + PI / 180 * ext_decoder_hue;
+		float sat = (float) setup->saturation + 1;
+		float const* decoder = setup->decoder_matrix;
+		if ( !decoder )
+		{
+			decoder = default_decoder;
+			if ( STD_HUE_CONDITION( setup ) )
+				hue += PI / 180 * (std_decoder_hue - ext_decoder_hue);
+		}
+		
+		{
+			float s = (float) sin( hue ) * sat;
+			float c = (float) cos( hue ) * sat;
+			float* out = impl->to_rgb;
+			int n;
+			
+			n = burst_count;
+			do
+			{
+				float const* in = decoder;
+				int n = 3;
+				do
+				{
+					float i = *in++;
+					float q = *in++;
+					*out++ = i * c - q * s;
+					*out++ = i * s + q * c;
+				}
+				while ( --n );
+				if ( burst_count <= 1 )
+					break;
+				ROTATE_IQ( s, c, 0.866025f, -0.5f ); /* +120 degrees */
+			}
+			while ( --n );
+		}
+	}
+}
+
+/* kernel generation */
+
+#define RGB_TO_YIQ( r, g, b, y, i ) (\
+	(y = (r) * 0.299f + (g) * 0.587f + (b) * 0.114f),\
+	(i = (r) * 0.596f - (g) * 0.275f - (b) * 0.321f),\
+	((r) * 0.212f - (g) * 0.523f + (b) * 0.311f)\
+)
+
+#define YIQ_TO_RGB( y, i, q, to_rgb, type, r, g ) (\
+	r = (type) (y + to_rgb [0] * i + to_rgb [1] * q),\
+	g = (type) (y + to_rgb [2] * i + to_rgb [3] * q),\
+	(type) (y + to_rgb [4] * i + to_rgb [5] * q)\
+)
+
+#define PACK_RGB( r, g, b ) ((r) << 21 | (g) << 11 | (b) << 1)
+
+enum { rgb_kernel_size = burst_size / alignment_count };
+enum { rgb_bias = rgb_unit * 2 * nes_ntsc_rgb_builder };
+
+typedef struct pixel_info_t
+{
+	int offset;
+	float negate;
+	float kernel [4];
+} pixel_info_t;
+
+#if rescale_in > 1
+	#define PIXEL_OFFSET_( ntsc, scaled ) \
+		(kernel_size / 2 + ntsc + (scaled != 0) + (rescale_out - scaled) % rescale_out + \
+				(kernel_size * 2 * scaled))
+
+	#define PIXEL_OFFSET( ntsc, scaled ) \
+		PIXEL_OFFSET_( ((ntsc) - (scaled) / rescale_out * rescale_in),\
+				(((scaled) + rescale_out * 10) % rescale_out) ),\
+		(1.0f - (((ntsc) + 100) & 2))
+#else
+	#define PIXEL_OFFSET( ntsc, scaled ) \
+		(kernel_size / 2 + (ntsc) - (scaled)),\
+		(1.0f - (((ntsc) + 100) & 2))
+#endif
+
+extern pixel_info_t const nes_ntsc_pixels [alignment_count];
+
+/* Generate pixel at all burst phases and column alignments */
+static void gen_kernel( init_t* impl, float y, float i, float q, nes_ntsc_rgb_t* out )
+{
+	/* generate for each scanline burst phase */
+	float const* to_rgb = impl->to_rgb;
+	int burst_remain = burst_count;
+	y -= rgb_offset;
+	do
+	{
+		/* Encode yiq into *two* composite signals (to allow control over artifacting).
+		Convolve these with kernels which: filter respective components, apply
+		sharpening, and rescale horizontally. Convert resulting yiq to rgb and pack
+		into integer. Based on algorithm by NewRisingSun. */
+		pixel_info_t const* pixel = nes_ntsc_pixels;
+		int alignment_remain = alignment_count;
+		do
+		{
+			/* negate is -1 when composite starts at odd multiple of 2 */
+			float const yy = y * impl->fringing * pixel->negate;
+			float const ic0 = (i + yy) * pixel->kernel [0];
+			float const qc1 = (q + yy) * pixel->kernel [1];
+			float const ic2 = (i - yy) * pixel->kernel [2];
+			float const qc3 = (q - yy) * pixel->kernel [3];
+			
+			float const factor = impl->artifacts * pixel->negate;
+			float const ii = i * factor;
+			float const yc0 = (y + ii) * pixel->kernel [0];
+			float const yc2 = (y - ii) * pixel->kernel [2];
+			
+			float const qq = q * factor;
+			float const yc1 = (y + qq) * pixel->kernel [1];
+			float const yc3 = (y - qq) * pixel->kernel [3];
+			
+			float const* k = &impl->kernel [pixel->offset];
+			int n;
+			++pixel;
+			for ( n = rgb_kernel_size; n; --n )
+			{
+				float i = k[0]*ic0 + k[2]*ic2;
+				float q = k[1]*qc1 + k[3]*qc3;
+				float y = k[kernel_size+0]*yc0 + k[kernel_size+1]*yc1 +
+				          k[kernel_size+2]*yc2 + k[kernel_size+3]*yc3 + rgb_offset;
+				if ( rescale_out <= 1 )
+					k--;
+				else if ( k < &impl->kernel [kernel_size * 2 * (rescale_out - 1)] )
+					k += kernel_size * 2 - 1;
+				else
+					k -= kernel_size * 2 * (rescale_out - 1) + 2;
+				{
+					int r, g, b = YIQ_TO_RGB( y, i, q, to_rgb, int, r, g );
+					*out++ = PACK_RGB( r, g, b ) - rgb_bias;
+				}
+			}
+		}
+		while ( alignment_count > 1 && --alignment_remain );
+		
+		if ( burst_count <= 1 )
+			break;
+		
+		to_rgb += 6;
+		
+		ROTATE_IQ( i, q, -0.866025f, -0.5f ); /* -120 degrees */
+	}
+	while ( --burst_remain );
+}
+
+static void correct_errors( nes_ntsc_rgb_t color, nes_ntsc_rgb_t* out );
+
+#if DISABLE_CORRECTION
+	#define CORRECT_ERROR( a ) { out [i] += rgb_bias; }
+	#define DISTRIBUTE_ERROR( a, b, c ) { out [i] += rgb_bias; }
+#else
+	#define CORRECT_ERROR( a ) { out [a] += error; }
+	#define DISTRIBUTE_ERROR( a, b, c ) {\
+		nes_ntsc_rgb_t fourth = (error + 2 * nes_ntsc_rgb_builder) >> 2;\
+		fourth &= (rgb_bias >> 1) - nes_ntsc_rgb_builder;\
+		fourth -= rgb_bias >> 2;\
+		out [a] += fourth;\
+		out [b] += fourth;\
+		out [c] += fourth;\
+		out [i] += error - (fourth * 3);\
+	}
+#endif
+
+#define RGB_PALETTE_OUT( rgb, out_ )\
+{\
+	unsigned char* out = (out_);\
+	nes_ntsc_rgb_t clamped = (rgb);\
+	NES_NTSC_CLAMP_( clamped, (8 - rgb_bits) );\
+	out [0] = (unsigned char) (clamped >> 21);\
+	out [1] = (unsigned char) (clamped >> 11);\
+	out [2] = (unsigned char) (clamped >>  1);\
+}
+
+/* blitter related */
+
+#ifndef restrict
+	#if defined (__GNUC__)
+		#define restrict __restrict__
+	#elif defined (_MSC_VER) && _MSC_VER > 1300
+		#define restrict __restrict
+	#else
+		/* no support for restricted pointers */
+		#define restrict
+	#endif
+#endif
+
+#include <limits.h>
+
+#if NES_NTSC_OUT_DEPTH <= 16
+	#if USHRT_MAX == 0xFFFF
+		typedef unsigned short nes_ntsc_out_t;
+	#else
+		#error "Need 16-bit int type"
+	#endif
+
+#else
+	#if UINT_MAX == 0xFFFFFFFF
+		typedef unsigned int  nes_ntsc_out_t;
+	#elif ULONG_MAX == 0xFFFFFFFF
+		typedef unsigned long nes_ntsc_out_t;
+	#else
+		#error "Need 32-bit int type"
+	#endif
+
+#endif
diff --git a/Utilities/orfanidis_eq.h b/Utilities/orfanidis_eq.h
new file mode 100644
index 0000000..012f448
--- /dev/null
+++ b/Utilities/orfanidis_eq.h
@@ -0,0 +1,1170 @@
+#ifndef ORFANIDIS_EQ_H_
+#define ORFANIDIS_EQ_H_
+
+#include <math.h>
+#include <vector>
+
+using namespace std;
+
+namespace orfanidis_eq {
+	//Eq data types.
+	typedef double eq_single_t;
+	typedef double eq_double_t;
+	//NOTE: the default float type usage
+	//can have shortage of precision
+
+	//Eq types
+	typedef enum
+	{
+		none,
+		butterworth,
+		chebyshev1,
+		chebyshev2
+	} filter_type;
+
+	static const char *get_eq_text(filter_type type) {
+		switch(type) {
+			case none:
+				return "not initialized";
+			case butterworth:
+				return "butterworth";
+			case chebyshev1:
+				return "chebyshev1";
+			case chebyshev2:
+				return "chebyshev2";
+			default:
+				return "none";
+		}
+	}
+
+	//Eq errors
+	typedef enum
+	{
+		no_error,
+		invalid_input_data_error,
+		processing_error
+	} eq_error_t;
+
+	//Constants
+	static const eq_double_t pi = 3.1415926535897932384626433832795;
+	static const unsigned int fo_section_order = 4;
+
+	//Default gains
+	static const int max_base_gain_db = 0;
+	static const int min_base_gain_db = -60;
+	static const int butterworth_band_gain_db = -3;
+	static const int chebyshev1_band_base_gain_db = -6;
+	static const int chebyshev2_band_base_gain_db = -40;
+	static const int eq_min_max_gain_db = 46;
+
+	//Default freq's
+	static const eq_double_t lowest_grid_center_freq_hz = 31.25;
+	static const eq_double_t bands_grid_center_freq_hz = 1000;
+	static const eq_double_t lowest_audio_freq_hz = 20;
+	static const eq_double_t highest_audio_freq_hz = 20000;
+
+	//Eq config constants
+	static const unsigned int default_eq_band_filters_order = 4; //>2
+	static const eq_double_t default_sample_freq_hz = 48000;
+
+	//Precomputed Eq (eq2) config constants
+	static const eq_double_t p_eq_min_max_gain_db = 40;
+	static const eq_double_t p_eq_gain_step_db = 1;
+	static const eq_double_t common_base_gain_db = 3;
+	static const eq_double_t p_eq_default_gain_db = 0;
+
+	//Version
+	static const char* eq_version = "0.01";
+
+
+	//------------ Conversion functions class ------------
+	class conversions
+	{
+		int db_min_max;
+		std::vector<eq_double_t> lin_gains;
+
+		int lin_gains_index(eq_double_t x) {
+			int int_x = (int)x;
+			if((x >= -db_min_max) && (x < db_min_max - 1))
+				return db_min_max + int_x;
+
+			return db_min_max;
+		}
+
+		conversions() {}
+
+	public:
+		conversions(int min_max) {
+			db_min_max = min_max;
+			//Update table (vector elements) for fast conversions
+			int step = -min_max;
+			while(step <= min_max)
+				lin_gains.push_back(db_2_lin(step++));
+		}
+
+		inline eq_double_t fast_db_2_lin(eq_double_t x) {
+			int int_part = (int)x;
+			eq_double_t frac_part = x - int_part;
+			return lin_gains[lin_gains_index(int_part)] * (1 - frac_part) +
+				(lin_gains[lin_gains_index(int_part + 1)])*frac_part;
+		}
+
+		inline eq_double_t fast_lin_2_db(eq_double_t x) {
+			if((x >= lin_gains[0]) && (x < lin_gains[lin_gains.size() - 1])) {
+				for(unsigned int i = 0; i < lin_gains.size() - 2; i++)
+					if((x >= lin_gains[i]) && (x < lin_gains[i + 1])) {
+						int int_part = i - db_min_max;
+						eq_double_t frac_part = x - (int)(x);
+						return int_part + frac_part;
+					}
+			}
+			return 0;
+		}
+
+		inline static eq_double_t db_2_lin(eq_double_t x) {
+			return pow(10, x / 20);
+		}
+
+		inline static eq_double_t lin_2_db(eq_double_t x) {
+			return 20 * log10(x);
+		}
+
+		inline static eq_double_t rad_2_hz(eq_double_t x, eq_double_t fs) {
+			return 2 * pi / x*fs;
+		}
+
+		inline static eq_double_t hz_2_rad(eq_double_t x, eq_double_t fs) {
+			return 2 * pi*x / fs;
+		}
+	};
+
+	//------------ Band freq's structure ------------
+	struct band_freqs
+	{
+	private:
+		band_freqs();
+
+	public:
+		eq_double_t min_freq;
+		eq_double_t center_freq;
+		eq_double_t max_freq;
+
+		band_freqs(eq_double_t f1, eq_double_t f0, eq_double_t f2) :
+			min_freq(f1), center_freq(f0), max_freq(f2) {}
+
+		~band_freqs() {}
+	};
+
+	//------------ Frequency grid class ------------
+	class freq_grid
+	{
+	private:
+		std::vector<band_freqs> freqs_;
+
+	public:
+		freq_grid() {}
+		freq_grid(const freq_grid& fg) { this->freqs_ = fg.freqs_; }
+		~freq_grid() {}
+
+		eq_error_t set_band(eq_double_t fmin, eq_double_t f0, eq_double_t fmax) {
+			freqs_.clear();
+			return add_band(fmin, f0, fmax);
+		}
+
+		//fc, fmin, fmax
+		eq_error_t add_band(eq_double_t fmin, eq_double_t f0, eq_double_t fmax) {
+			if(fmin < f0 && f0 < fmax)
+				freqs_.push_back(band_freqs(fmin, f0, fmax));
+			else
+				return invalid_input_data_error;
+			return no_error;
+		}
+
+		//f0, deltaf = fmax - fmin
+		eq_error_t add_band(eq_double_t f0, eq_double_t df) {
+			if(f0 >= df / 2) {
+				eq_double_t fmin = f0 - df / 2;
+				eq_double_t fmax = f0 + df / 2;
+				freqs_.push_back(band_freqs(fmin, f0, fmax));
+			} else
+				return invalid_input_data_error;
+			return no_error;
+		}
+
+		eq_error_t set_5_bands(eq_double_t center_freq = bands_grid_center_freq_hz) {
+			freqs_.clear();
+			if(lowest_audio_freq_hz < center_freq &&
+				center_freq < highest_audio_freq_hz) {
+
+				//Find lowest center frequency in band
+				eq_double_t lowest_center_freq = center_freq;
+				while(lowest_center_freq > lowest_grid_center_freq_hz)
+					lowest_center_freq /= 4.0;
+				if(lowest_center_freq < lowest_grid_center_freq_hz)
+					lowest_center_freq *= 4.0;
+
+				//Calculate freq's
+				eq_double_t f0 = lowest_center_freq;
+				for(unsigned int i = 0; i < 5; i++) {
+					freqs_.push_back(band_freqs(f0 / 2, f0, f0 * 2));
+					f0 *= 4;
+				}
+			} else
+				return invalid_input_data_error;
+			return no_error;
+		}
+
+		eq_error_t set_10_bands(eq_double_t center_freq = bands_grid_center_freq_hz) {
+			freqs_.clear();
+			if(lowest_audio_freq_hz < center_freq &&
+				center_freq < highest_audio_freq_hz) {
+
+				//Find lowest center frequency in band
+				eq_double_t lowest_center_freq = center_freq;
+				while(lowest_center_freq > lowest_grid_center_freq_hz)
+					lowest_center_freq /= 2;
+				if(lowest_center_freq < lowest_grid_center_freq_hz)
+					lowest_center_freq *= 2;
+
+				//Calculate freq's
+				eq_double_t f0 = lowest_center_freq;
+				for(unsigned int i = 0; i < 10; i++) {
+					freqs_.push_back(band_freqs(f0 / pow(2, 0.5), f0, f0*pow(2, 0.5)));
+					f0 *= 2;
+				}
+			} else
+				return invalid_input_data_error;
+			return no_error;
+		}
+
+		eq_error_t set_20_bands(eq_double_t center_freq = bands_grid_center_freq_hz) {
+			freqs_.clear();
+			if(lowest_audio_freq_hz < center_freq &&
+				center_freq < highest_audio_freq_hz) {
+
+				//Find lowest center frequency in band
+				eq_double_t lowest_center_freq = center_freq;
+				while(lowest_center_freq > lowest_audio_freq_hz)
+					lowest_center_freq /= pow(2, 0.5);
+				if(lowest_center_freq < lowest_audio_freq_hz)
+					lowest_center_freq *= pow(2, 0.5);
+
+				//Calculate freq's
+				eq_double_t f0 = lowest_center_freq;
+				for(unsigned int i = 0; i < 20; i++) {
+					freqs_.push_back(band_freqs(f0 / pow(2, 0.25),
+						f0, f0*pow(2, 0.25)));
+					f0 *= pow(2, 0.5);
+				}
+			} else
+				return invalid_input_data_error;
+			return no_error;
+		}
+
+		eq_error_t set_30_bands(eq_double_t center_freq = bands_grid_center_freq_hz) {
+			freqs_.clear();
+			if(lowest_audio_freq_hz < center_freq &&
+				center_freq < highest_audio_freq_hz) {
+
+				//Find lowest center frequency in band
+				eq_double_t lowest_center_freq = center_freq;
+				while(lowest_center_freq > lowest_audio_freq_hz)
+					lowest_center_freq /= pow(2.0, 1.0 / 3.0);
+				if(lowest_center_freq < lowest_audio_freq_hz)
+					lowest_center_freq *= pow(2.0, 1.0 / 3.0);
+
+				//Calculate freq's
+				eq_double_t f0 = lowest_center_freq;
+				for(unsigned int i = 0; i < 30; i++) {
+					freqs_.push_back(band_freqs(f0 / pow(2.0, 1.0 / 6.0),
+						f0, f0*pow(2.0, 1.0 / 6.0)));
+					f0 *= pow(2, 1.0 / 3.0);
+				}
+			} else
+				return invalid_input_data_error;
+			return no_error;
+		}
+
+		unsigned int get_number_of_bands() { return (unsigned int)freqs_.size(); }
+
+		std::vector<band_freqs> get_freqs() { return freqs_; }
+
+		unsigned int get_freq(unsigned int number) {
+			if(number < freqs_.size())
+				return (unsigned int)freqs_[number].center_freq;
+			else
+				return 0;
+		}
+
+		unsigned int get_rounded_freq(unsigned int number) {
+			if(number < freqs_.size()) {
+				unsigned int freq = (unsigned int)freqs_[number].center_freq;
+				if(freq < 100)
+					return freq;
+				else if(freq >= 100 && freq < 1000) {
+					unsigned int rest = freq % 10;
+					if(rest < 5)
+						return freq - rest;
+					else
+						return freq - rest + 10;
+				} else if(freq >= 1000 && freq < 10000) {
+					unsigned int rest = freq % 100;
+					if(rest < 50)
+						return freq - rest;
+					else
+						return freq - rest + 100;
+				} else if(freq >= 10000) {
+					unsigned int rest = freq % 1000;
+					if(rest < 500)
+						return freq - rest;
+					else
+						return freq - rest + 1000;
+				}
+			}
+			return 0;
+		}
+	};
+
+	//------------ Forth order sections ------------
+	class fo_section
+	{
+	protected:
+		eq_single_t b0; eq_single_t b1; eq_single_t b2; eq_single_t b3; eq_single_t b4;
+		eq_single_t a0; eq_single_t a1; eq_single_t a2; eq_single_t a3; eq_single_t a4;
+
+		eq_single_t numBuf[fo_section_order];
+		eq_single_t denumBuf[fo_section_order];
+
+		inline eq_single_t df1_fo_process(eq_single_t in) {
+			eq_single_t out = 0;
+			out += b0*in;
+			out += (b1*numBuf[0] - denumBuf[0] * a1);
+			out += (b2*numBuf[1] - denumBuf[1] * a2);
+			out += (b3*numBuf[2] - denumBuf[2] * a3);
+			out += (b4*numBuf[3] - denumBuf[3] * a4);
+
+			numBuf[3] = numBuf[2];
+			numBuf[2] = numBuf[1];
+			numBuf[1] = numBuf[0];
+			if(in < 0.000000000001 && in > -0.000000000001) {
+				//Prevent denormalized values (causes extreme performance loss)
+				in = 0;
+			}
+			*numBuf = in;
+
+			denumBuf[3] = denumBuf[2];
+			denumBuf[2] = denumBuf[1];
+			denumBuf[1] = denumBuf[0];
+			if(out < 0.000000000001 && out > -0.000000000001) {
+				//Prevent denormalized values (causes extreme performance loss)
+				out = 0;
+			}
+			*denumBuf = out;
+
+			return(out);
+		}
+
+	public:
+		fo_section() {
+			b0 = 1; b1 = 0; b2 = 0; b3 = 0; b4 = 0;
+			a0 = 1; a1 = 0; a2 = 0; a3 = 0; a4 = 0;
+
+			for(unsigned int i = 0; i < fo_section_order; i++) {
+				numBuf[i] = 0;
+				denumBuf[i] = 0;
+			}
+		}
+
+		virtual ~fo_section() {}
+
+		eq_single_t process(eq_single_t in) {
+			return df1_fo_process(in);
+		}
+
+		virtual fo_section get() {
+			return *this;
+		}
+	};
+
+	class butterworth_fo_section : public fo_section
+	{
+		butterworth_fo_section() {}
+		butterworth_fo_section(butterworth_fo_section&) {}
+	public:
+		butterworth_fo_section(eq_double_t beta,
+			eq_double_t s, eq_double_t g, eq_double_t g0,
+			eq_double_t D, eq_double_t c0) {
+			b0 = (g*g*beta*beta + 2 * g*g0*s*beta + g0*g0) / D;
+			b1 = -4 * c0*(g0*g0 + g*g0*s*beta) / D;
+			b2 = 2 * (g0*g0*(1 + 2 * c0*c0) - g*g*beta*beta) / D;
+			b3 = -4 * c0*(g0*g0 - g*g0*s*beta) / D;
+			b4 = (g*g*beta*beta - 2 * g*g0*s*beta + g0*g0) / D;
+
+			a0 = 1;
+			a1 = -4 * c0*(1 + s*beta) / D;
+			a2 = 2 * (1 + 2 * c0*c0 - beta*beta) / D;
+			a3 = -4 * c0*(1 - s*beta) / D;
+			a4 = (beta*beta - 2 * s*beta + 1) / D;
+		}
+
+		fo_section get() { return *this; }
+	};
+
+	class chebyshev_type1_fo_section : public fo_section
+	{
+		chebyshev_type1_fo_section() {}
+		chebyshev_type1_fo_section(chebyshev_type1_fo_section&) {}
+	public:
+		chebyshev_type1_fo_section(eq_double_t a,
+			eq_double_t c, eq_double_t tetta_b,
+			eq_double_t g0, eq_double_t s, eq_double_t b,
+			eq_double_t D, eq_double_t c0) {
+			b0 = ((b*b + g0*g0*c*c)*tetta_b*tetta_b + 2 * g0*b*s*tetta_b + g0*g0) / D;
+			b1 = -4 * c0*(g0*g0 + g0*b*s*tetta_b) / D;
+			b2 = 2 * (g0*g0*(1 + 2 * c0*c0) - (b*b + g0*g0*c*c)*tetta_b*tetta_b) / D;
+			b3 = -4 * c0*(g0*g0 - g0*b*s*tetta_b) / D;
+			b4 = ((b*b + g0*g0*c*c)*tetta_b*tetta_b - 2 * g0*b*s*tetta_b + g0*g0) / D;
+
+			a0 = 1;
+			a1 = -4 * c0*(1 + a*s*tetta_b) / D;
+			a2 = 2 * (1 + 2 * c0*c0 - (a*a + c*c)*tetta_b*tetta_b) / D;
+			a3 = -4 * c0*(1 - a*s*tetta_b) / D;
+			a4 = ((a*a + c*c)*tetta_b*tetta_b - 2 * a*s*tetta_b + 1) / D;
+		}
+
+		fo_section get() { return *this; }
+	};
+
+	class chebyshev_type2_fo_section : public fo_section
+	{
+		chebyshev_type2_fo_section() {}
+		chebyshev_type2_fo_section(chebyshev_type2_fo_section&) {}
+	public:
+		chebyshev_type2_fo_section(eq_double_t a,
+			eq_double_t c, eq_double_t tetta_b,
+			eq_double_t g, eq_double_t s, eq_double_t b,
+			eq_double_t D, eq_double_t c0) {
+			b0 = (g*g*tetta_b*tetta_b + 2 * g*b*s*tetta_b + b*b + g*g*c*c) / D;
+			b1 = -4 * c0*(b*b + g*g*c*c + g*b*s*tetta_b) / D;
+			b2 = 2 * ((b*b + g*g*c*c)*(1 + 2 * c0*c0) - g*g*tetta_b*tetta_b) / D;
+			b3 = -4 * c0*(b*b + g*g*c*c - g*b*s*tetta_b) / D;
+			b4 = (g*g*tetta_b*tetta_b - 2 * g*b*s*tetta_b + b*b + g*g*c*c) / D;
+
+			a0 = 1;
+			a1 = -4 * c0*(a*a + c*c + a*s*tetta_b) / D;
+			a2 = 2 * ((a*a + c*c)*(1 + 2 * c0*c0) - tetta_b*tetta_b) / D;
+			a3 = -4 * c0*(a*a + c*c - a*s*tetta_b) / D;
+			a4 = (tetta_b*tetta_b - 2 * a*s*tetta_b + a*a + c*c) / D;
+		}
+
+		fo_section get() { return *this; }
+	};
+
+	//------------ Bandpass filters ------------
+	class bp_filter
+	{
+	public:
+		bp_filter() {}
+		virtual ~bp_filter() {}
+
+		virtual eq_single_t process(eq_single_t in) = 0;
+	};
+
+	class butterworth_bp_filter : public bp_filter
+	{
+	private:
+		std::vector<fo_section> sections_;
+
+		butterworth_bp_filter() {}
+	public:
+		butterworth_bp_filter(butterworth_bp_filter& f) {
+			this->sections_ = f.sections_;
+		}
+
+		butterworth_bp_filter(unsigned int N,
+			eq_double_t w0, eq_double_t wb,
+			eq_double_t G, eq_double_t Gb, eq_double_t G0) {
+			//Case if G == 0 : allpass
+			if(G == 0 && G0 == 0) {
+				sections_.push_back(fo_section());
+				return;
+			}
+
+			//Get number of analog sections
+			unsigned int r = N % 2;
+			unsigned int L = (N - r) / 2;
+
+			//Convert gains to linear scale
+			G = conversions::db_2_lin(G);
+			Gb = conversions::db_2_lin(Gb);
+			G0 = conversions::db_2_lin(G0);
+
+			eq_double_t epsilon = pow(((eq_double_t)(G*G - Gb*Gb)) /
+				(Gb*Gb - G0*G0), 0.5);
+			eq_double_t g = pow(((eq_double_t)G), 1.0 / ((eq_double_t)N));
+			eq_double_t g0 = pow(((eq_double_t)G0), 1.0 / ((eq_double_t)N));
+			eq_double_t beta = pow(((eq_double_t)epsilon), -1.0 / ((eq_double_t)N))*
+				tan(wb / 2.0);
+
+			eq_double_t c0 = cos(w0);
+			if(w0 == 0) c0 = 1;
+			if(w0 == pi / 2) c0 = 0;
+			if(w0 == pi) c0 = -1;
+
+			//Calculate every section
+			for(unsigned int i = 1; i <= L; i++) {
+				eq_double_t ui = (2.0*i - 1) / N;
+				eq_double_t si = sin(pi*ui / 2.0);
+
+				eq_double_t Di = beta*beta + 2 * si*beta + 1;
+
+				sections_.push_back
+				(butterworth_fo_section(beta, si, g, g0, Di, c0));
+			}
+		}
+
+		~butterworth_bp_filter() {}
+
+		static eq_single_t compute_bw_gain_db(eq_single_t gain) {
+			eq_single_t bw_gain = 0;
+			if(gain <= -6)
+				bw_gain = gain + common_base_gain_db;
+			else if(gain > -6 && gain < 6)
+				bw_gain = gain*0.5;
+			else if(gain >= 6)
+				bw_gain = gain - common_base_gain_db;
+
+			return bw_gain;
+		}
+
+		virtual eq_single_t process(eq_single_t in) {
+			eq_single_t p0 = in;
+			eq_single_t p1 = 0;
+			//Process FO sections in serial connection
+			for(size_t i = 0, len = sections_.size(); i < len; i++) {
+				p1 = sections_[i].process(p0);
+				p0 = p1;
+			}
+
+			return p1;
+		}
+	};
+
+	class chebyshev_type1_bp_filter : public bp_filter
+	{
+	private:
+		std::vector<fo_section> sections_;
+
+		chebyshev_type1_bp_filter() {}
+	public:
+		chebyshev_type1_bp_filter(unsigned int N,
+			eq_double_t w0, eq_double_t wb,
+			eq_double_t G, eq_double_t Gb, eq_double_t G0) {
+			//Case if G == 0 : allpass
+			if(G == 0 && G0 == 0) {
+				sections_.push_back(fo_section());
+				return;
+			}
+
+			//Get number of analog sections
+			unsigned int r = N % 2;
+			unsigned int L = (N - r) / 2;
+
+			//Convert gains to linear scale
+			G = conversions::db_2_lin(G);
+			Gb = conversions::db_2_lin(Gb);
+			G0 = conversions::db_2_lin(G0);
+
+			eq_double_t epsilon = pow((eq_double_t)(G*G - Gb*Gb) /
+				(Gb*Gb - G0*G0), 0.5);
+			eq_double_t g0 = pow((eq_double_t)(G0), 1.0 / N);
+			eq_double_t alfa =
+				pow(1.0 / epsilon + pow(1 + pow(epsilon, -2.0), 0.5), 1.0 / N);
+			eq_double_t beta =
+				pow(G / epsilon + Gb*pow(1 + pow(epsilon, -2.0), 0.5), 1.0 / N);
+			eq_double_t a = 0.5*(alfa - 1.0 / alfa);
+			eq_double_t b = 0.5*(beta - g0*g0*(1 / beta));
+			eq_double_t tetta_b = tan(wb / 2);
+
+			eq_double_t c0 = cos(w0);
+			if(w0 == 0) c0 = 1;
+			if(w0 == pi / 2) c0 = 0;
+			if(w0 == pi) c0 = -1;
+
+			//Calculate every section
+			for(unsigned int i = 1; i <= L; i++) {
+				eq_double_t ui = (2.0*i - 1.0) / N;
+				eq_double_t ci = cos(pi*ui / 2.0);
+				eq_double_t si = sin(pi*ui / 2.0);
+
+				eq_double_t Di = (a*a + ci*ci)*tetta_b*tetta_b +
+					2.0*a*si*tetta_b + 1;
+				sections_.push_back(
+					chebyshev_type1_fo_section(a, ci, tetta_b, g0, si, b, Di, c0));
+			}
+		}
+
+
+		~chebyshev_type1_bp_filter() {}
+
+		static eq_single_t compute_bw_gain_db(eq_single_t gain) {
+			eq_single_t bw_gain = 0;
+			if(gain <= -6)
+				bw_gain = gain + 1;
+			else if(gain > -6 && gain < 6)
+				bw_gain = gain*0.9;
+			else if(gain >= 6)
+				bw_gain = gain - 1;
+
+			return bw_gain;
+		}
+
+		eq_single_t process(eq_single_t in) {
+			eq_single_t p0 = in;
+			eq_single_t p1 = 0;
+			//Process FO sections in serial connection
+			for(size_t i = 0, len = sections_.size(); i < len; i++) {
+				p1 = sections_[i].process(p0);
+				p0 = p1;
+			}
+
+			return p1;
+		}
+	};
+
+	class chebyshev_type2_bp_filter : public bp_filter
+	{
+	private:
+		std::vector<fo_section> sections_;
+
+		chebyshev_type2_bp_filter() {}
+	public:
+		chebyshev_type2_bp_filter(unsigned int N,
+			eq_double_t w0, eq_double_t wb,
+			eq_double_t G, eq_double_t Gb, eq_double_t G0) {
+			//Case if G == 0 : allpass
+			if(G == 0 && G0 == 0) {
+				sections_.push_back(fo_section());
+				return;
+			}
+
+			//Get number of analog sections
+			unsigned int r = N % 2;
+			unsigned int L = (N - r) / 2;
+
+			//Convert gains to linear scale
+			G = conversions::db_2_lin(G);
+			Gb = conversions::db_2_lin(Gb);
+			G0 = conversions::db_2_lin(G0);
+
+			eq_double_t epsilon = pow((eq_double_t)((G*G - Gb*Gb) /
+				(Gb*Gb - G0*G0)), 0.5);
+			eq_double_t g = pow((eq_double_t)(G), 1.0 / N);
+			eq_double_t eu = pow(epsilon + sqrt(1 + epsilon*epsilon), 1.0 / N);
+			eq_double_t ew = pow(G0*epsilon + Gb*sqrt(1 + epsilon*epsilon), 1.0 / N);
+			eq_double_t a = (eu - 1.0 / eu) / 2.0;
+			eq_double_t b = (ew - g*g / ew) / 2.0;
+			eq_double_t tetta_b = tan(wb / 2);
+
+			eq_double_t c0 = cos(w0);
+			if(w0 == 0) c0 = 1;
+			if(w0 == pi / 2) c0 = 0;
+			if(w0 == pi) c0 = -1;
+
+			//Calculate every section
+			for(unsigned int i = 1; i <= L; i++) {
+				eq_double_t ui = (2.0*i - 1.0) / N;
+				eq_double_t ci = cos(pi*ui / 2.0);
+				eq_double_t si = sin(pi*ui / 2.0);
+				eq_double_t Di = tetta_b*tetta_b + 2 * a*si*tetta_b + a*a + ci*ci;
+
+				sections_.push_back(
+					chebyshev_type2_fo_section(a, ci, tetta_b, g, si, b, Di, c0));
+			}
+		}
+
+		~chebyshev_type2_bp_filter() {}
+
+		static eq_single_t compute_bw_gain_db(eq_single_t gain) {
+			eq_single_t bw_gain = 0;
+			if(gain <= -6)
+				bw_gain = -common_base_gain_db;
+			else if(gain > -6 && gain < 6)
+				bw_gain = gain*0.3;
+			else if(gain >= 6)
+				bw_gain = common_base_gain_db;
+
+			return bw_gain;
+		}
+
+		eq_single_t process(eq_single_t in) {
+			eq_single_t p0 = in;
+			eq_single_t p1 = 0;
+
+			//Process FO sections in serial connection
+			for(size_t i = 0, len = sections_.size(); i < len; i++) {
+				p1 = sections_[i].process(p0);
+				p0 = p1;
+			}
+
+			return p1;
+		}
+	};
+
+	// ------------ eq1 ------------
+	// Equalizer with single precomputed filter for every band
+	class eq1
+	{
+	private:
+		conversions conv_;
+		eq_double_t sampling_frequency_;
+		freq_grid freq_grid_;
+		std::vector<eq_single_t> band_gains_;
+		std::vector<bp_filter*> filters_;
+		filter_type current_eq_type_;
+
+		eq1() :conv_(eq_min_max_gain_db) {}
+		eq1(const eq1&) :conv_(eq_min_max_gain_db) {}
+
+		void cleanup_filters_array() {
+			for(unsigned int j = 0; j < filters_.size(); j++)
+				delete filters_[j];
+		}
+
+	public:
+		eq1(const freq_grid *fg, filter_type eq_t) : conv_(eq_min_max_gain_db) {
+			sampling_frequency_ = default_sample_freq_hz;
+			freq_grid_ = *fg;
+			current_eq_type_ = eq_t;
+			set_eq(freq_grid_, eq_t);
+		}
+		~eq1() { cleanup_filters_array(); }
+
+		eq_error_t set_eq(freq_grid& fg, filter_type eqt) {
+			band_gains_.clear();
+			cleanup_filters_array();
+			filters_.clear();
+			freq_grid_ = fg;
+
+			for(unsigned int i = 0; i < freq_grid_.get_number_of_bands(); i++) {
+
+				eq_double_t wb = conversions::hz_2_rad(
+					freq_grid_.get_freqs()[i].max_freq -
+					freq_grid_.get_freqs()[i].min_freq,
+					sampling_frequency_);
+
+				eq_double_t w0 = conversions::hz_2_rad(
+					freq_grid_.get_freqs()[i].center_freq,
+					sampling_frequency_);
+
+				switch(eqt) {
+					case (butterworth): {
+						butterworth_bp_filter* bf =
+							new butterworth_bp_filter(
+								default_eq_band_filters_order,
+								w0,
+								wb,
+								max_base_gain_db,
+								butterworth_band_gain_db,
+								min_base_gain_db
+							);
+
+						filters_.push_back(bf);
+						break;
+					}
+
+					case (chebyshev1): {
+						chebyshev_type1_bp_filter* cf1 =
+							new chebyshev_type1_bp_filter(
+								default_eq_band_filters_order,
+								w0,
+								wb,
+								max_base_gain_db,
+								chebyshev1_band_base_gain_db,
+								min_base_gain_db
+							);
+
+						filters_.push_back(cf1);
+						break;
+					}
+
+					case (chebyshev2): {
+						chebyshev_type2_bp_filter* cf2 =
+							new chebyshev_type2_bp_filter(
+								default_eq_band_filters_order,
+								w0,
+								wb,
+								max_base_gain_db,
+								chebyshev2_band_base_gain_db,
+								min_base_gain_db
+							);
+
+						filters_.push_back(cf2);
+						break;
+					}
+
+					default:
+						current_eq_type_ = none;
+						return invalid_input_data_error;
+
+				}
+				band_gains_.push_back(max_base_gain_db);
+			}
+
+			current_eq_type_ = eqt;
+			return no_error;
+		}
+
+		eq_error_t set_eq(filter_type eqt)
+		{
+			return set_eq(freq_grid_, eqt);
+		}
+
+		eq_error_t set_sample_rate(eq_double_t sr) {
+			eq_error_t err = no_error;
+			sampling_frequency_ = sr;
+			err = set_eq(freq_grid_, current_eq_type_);
+
+			return err;
+		}
+
+		eq_error_t change_gains(std::vector<eq_single_t> band_gains) {
+			if(band_gains_.size() == band_gains.size())
+				band_gains_ = band_gains;
+			else
+				return invalid_input_data_error;
+
+			return no_error;
+		}
+
+		eq_error_t change_gains_db(std::vector<eq_single_t> band_gains) {
+			if(band_gains_.size() == band_gains.size())
+				for(unsigned int j = 0; j < get_number_of_bands(); j++)
+					band_gains_[j] = conv_.fast_db_2_lin(band_gains[j]);
+			else
+				return invalid_input_data_error;
+
+			return no_error;
+		}
+
+		eq_error_t change_band_gain(unsigned int band_number,
+			eq_single_t band_gain) {
+			if(band_number < get_number_of_bands())
+				band_gains_[band_number] = band_gain;
+			else
+				return invalid_input_data_error;
+
+			return no_error;
+		}
+
+		eq_error_t change_band_gain_db(unsigned int band_number, eq_single_t band_gain) {
+			if(band_number < get_number_of_bands())
+				band_gains_[band_number] = conv_.fast_db_2_lin(band_gain);
+			else
+				return invalid_input_data_error;
+
+			return no_error;
+		}
+
+		eq_error_t sbs_process_band(unsigned int band_number,	eq_single_t *in, eq_single_t *out) {
+			//if(band_number < get_number_of_bands())
+				*out = band_gains_[band_number] *
+				filters_[band_number]->process(*in);
+			//else
+				//return invalid_input_data_error;
+
+			return no_error;
+		}
+
+		eq_error_t sbs_process(eq_single_t *in, eq_single_t *out) {
+			eq_error_t err = no_error;
+			eq_single_t acc_out = 0;
+			for(unsigned int j = 0, len = get_number_of_bands(); j < len; j++) {
+				eq_single_t band_out = 0;
+				err = sbs_process_band(j, in, &band_out);
+				acc_out += band_out;
+			}
+			*out = acc_out;
+
+			return err;
+		}
+
+		filter_type get_eq_type() { return current_eq_type_; }
+		const char* get_string_eq_type() { return get_eq_text(current_eq_type_); }
+		unsigned int get_number_of_bands() {
+			return freq_grid_.get_number_of_bands();
+		}
+		const char* get_version() { return eq_version; }
+	};
+
+	//!!! New functionality
+
+	// ------------ eq_channel ------------
+	// Precomputed equalizer channel, 
+	// consists of vector of filters for every gain value
+	class eq_channel
+	{
+		eq_single_t f0_;
+		eq_single_t fb_;
+		eq_single_t sampling_frequency_;
+		eq_single_t min_max_gain_db_;
+		eq_single_t gain_step_db_;
+
+		unsigned int current_filter_index_;
+		eq_single_t current_gain_db_;
+
+		std::vector<bp_filter*> filters_;
+		filter_type current_channel_type_;
+
+		eq_channel() {}
+
+		unsigned int get_flt_index(eq_single_t gain_db) {
+			unsigned int number_of_filters = (unsigned int)filters_.size();
+			eq_single_t scale_coef = gain_db / min_max_gain_db_;
+			return (unsigned int)((number_of_filters / 2) + (number_of_filters / 2)*scale_coef);
+		}
+
+		void cleanup_filters_array() {
+			for(unsigned int j = 0; j < filters_.size(); j++)
+				delete filters_[j];
+		}
+
+	public:
+		eq_channel(filter_type ft,
+			eq_single_t fs, eq_single_t f0, eq_single_t fb,
+			eq_single_t min_max_gain_db = p_eq_min_max_gain_db,
+			eq_single_t step_db = p_eq_gain_step_db) {
+
+			//Init data fields
+			sampling_frequency_ = fs;
+			f0_ = f0;
+			fb_ = fb;
+			min_max_gain_db_ = min_max_gain_db;
+			gain_step_db_ = step_db;
+
+			current_gain_db_ = 0;
+			current_filter_index_ = 0;
+
+			current_channel_type_ = ft;
+
+			set_channel(current_channel_type_, sampling_frequency_);
+		}
+
+		~eq_channel() { cleanup_filters_array(); }
+
+		eq_error_t set_channel(filter_type ft, eq_single_t fs) {
+
+			eq_double_t wb = conversions::hz_2_rad(fb_, sampling_frequency_);
+			eq_double_t w0 = conversions::hz_2_rad(f0_, sampling_frequency_);
+
+			for(eq_single_t gain = -min_max_gain_db_; gain <= min_max_gain_db_;
+				gain += gain_step_db_) {
+
+				switch(ft) {
+					case (butterworth): {
+						eq_single_t bw_gain =
+							butterworth_bp_filter::compute_bw_gain_db(gain);
+
+						butterworth_bp_filter* bf =
+							new butterworth_bp_filter(
+								default_eq_band_filters_order,
+								w0,
+								wb,
+								gain,
+								bw_gain,
+								p_eq_default_gain_db
+							);
+
+						filters_.push_back(bf);
+						break;
+					}
+					case (chebyshev1): {
+						eq_single_t bw_gain =
+							chebyshev_type1_bp_filter::compute_bw_gain_db(gain);
+
+						chebyshev_type1_bp_filter* cf1 =
+							new chebyshev_type1_bp_filter(
+								default_eq_band_filters_order,
+								w0,
+								wb,
+								gain,
+								bw_gain,
+								p_eq_default_gain_db
+							);
+
+						filters_.push_back(cf1);
+						break;
+					}
+					case (chebyshev2): {
+						eq_single_t bw_gain =
+							chebyshev_type2_bp_filter::compute_bw_gain_db(gain);
+
+						chebyshev_type2_bp_filter* cf2 =
+							new chebyshev_type2_bp_filter(
+								default_eq_band_filters_order,
+								w0,
+								wb,
+								gain,
+								bw_gain,
+								p_eq_default_gain_db
+							);
+
+						filters_.push_back(cf2);
+						break;
+					}
+					default: {
+						current_channel_type_ = none;
+						return invalid_input_data_error;
+					}
+				}
+			}
+
+			//Get current filter index
+			current_gain_db_ = 0;
+			current_filter_index_ = get_flt_index(current_gain_db_);
+
+			return no_error;
+		}
+
+		eq_error_t set_gain_db(eq_single_t db) {
+			if(db > -min_max_gain_db_ && db < min_max_gain_db_) {
+				current_gain_db_ = db;
+				current_filter_index_ = get_flt_index(db);
+			} else
+				return invalid_input_data_error;
+
+			return no_error;
+		}
+
+		eq_error_t sbs_process(eq_single_t *in, eq_single_t *out) {
+			*out = filters_[current_filter_index_]->process(*in);
+			return no_error;
+		}
+	};
+
+	// ------------ eq2 ------------
+	// Precomputed equalizer 
+
+	class eq2
+	{
+		conversions conv_;
+		eq_double_t sampling_frequency_;
+		freq_grid freq_grid_;
+		std::vector<eq_channel*> channels_;
+		filter_type current_eq_type_;
+
+		void cleanup_channels_array() {
+			for(unsigned int j = 0; j < channels_.size(); j++)
+				delete channels_[j];
+		}
+
+	public:
+		eq2(freq_grid &fg, filter_type eq_t) : conv_(eq_min_max_gain_db) {
+			sampling_frequency_ = default_sample_freq_hz;
+			freq_grid_ = fg;
+			current_eq_type_ = eq_t;
+			set_eq(freq_grid_, eq_t);
+		}
+		~eq2() { cleanup_channels_array(); }
+
+		eq_error_t set_eq(const freq_grid& fg, filter_type ft) {
+			cleanup_channels_array();
+			channels_.clear();
+			freq_grid_ = fg;
+
+			for(unsigned int i = 0; i < freq_grid_.get_number_of_bands(); i++) {
+				band_freqs b_fres = freq_grid_.get_freqs()[i];
+
+				eq_channel* eq_ch = new eq_channel(ft, sampling_frequency_,
+					b_fres.center_freq, b_fres.max_freq - b_fres.min_freq);
+
+				channels_.push_back(eq_ch);
+				channels_[i]->set_gain_db(p_eq_default_gain_db);
+			}
+
+			current_eq_type_ = ft;
+			return no_error;
+		}
+
+		eq_error_t set_eq(filter_type ft) {
+			eq_error_t err = set_eq(freq_grid_, ft);
+			return err;
+		}
+
+		eq_error_t set_sample_rate(eq_double_t sr) {
+			sampling_frequency_ = sr;
+			eq_error_t err = set_eq(current_eq_type_);
+			return err;
+		}
+
+		eq_error_t change_gains(std::vector<eq_single_t> band_gains) {
+			if(channels_.size() == band_gains.size())
+				for(unsigned int j = 0; j < channels_.size(); j++)
+					channels_[j]->set_gain_db(conv_.fast_lin_2_db(band_gains[j]));
+			else
+				return invalid_input_data_error;
+
+			return no_error;
+		}
+
+		eq_error_t change_gains_db(std::vector<eq_single_t> band_gains) {
+			if(channels_.size() == band_gains.size())
+				for(unsigned int j = 0; j < channels_.size(); j++)
+					channels_[j]->set_gain_db(band_gains[j]);
+			else
+				return invalid_input_data_error;
+
+			return no_error;
+		}
+
+		eq_error_t change_band_gain(unsigned int band_number,
+			eq_single_t band_gain) {
+			if(band_number < channels_.size())
+				channels_[band_number]->set_gain_db(conv_.fast_lin_2_db(band_gain));
+			else
+				return invalid_input_data_error;
+
+			return no_error;
+		}
+
+		eq_error_t change_band_gain_db(unsigned int band_number,
+			eq_single_t band_gain) {
+			if(band_number < channels_.size())
+				channels_[band_number]->set_gain_db(band_gain);
+			else
+				return invalid_input_data_error;
+
+			return no_error;
+		}
+
+		eq_error_t sbs_process_band(unsigned int band_number,
+			eq_single_t *in, eq_single_t *out) {
+			if(band_number < get_number_of_bands())
+				channels_[band_number]->sbs_process(in, out);
+			else
+				return invalid_input_data_error;
+
+			return no_error;
+		}
+
+		eq_error_t sbs_process(eq_single_t *in, eq_single_t *out) {
+			eq_error_t err = no_error;
+			eq_single_t in_out = *in;
+			for(unsigned int i = 0; i < get_number_of_bands(); i++)
+				err = sbs_process_band(i, &in_out, &in_out);
+
+			*out = in_out;
+
+			return err;
+		}
+
+		filter_type get_eq_type() { return current_eq_type_; }
+		const char* get_string_eq_type() { return get_eq_text(current_eq_type_); }
+		unsigned int get_number_of_bands() {
+			return freq_grid_.get_number_of_bands();
+		}
+		const char* get_version() { return eq_version; }
+	};
+
+} //namespace orfanidis_eq
+#endif //ORFANIDIS_EQ_H_
diff --git a/Utilities/sha1.cpp b/Utilities/sha1.cpp
new file mode 100644
index 0000000..dbae07b
--- /dev/null
+++ b/Utilities/sha1.cpp
@@ -0,0 +1,319 @@
+/*
+	 sha1.cpp - source code of
+
+	 ============
+	 SHA-1 in C++
+	 ============
+
+	 100% Public Domain.
+
+	 Original C Code
+		  -- Steve Reid <steve@edmweb.com>
+	 Small changes to fit into bglibs
+		  -- Bruce Guenter <bruce@untroubled.org>
+	 Translation to simpler C++ Code
+		  -- Volker Grabsch <vog@notjusthosting.com>
+	 Safety fixes
+		  -- Eugene Hopkinson <slowriot at voxelstorm dot com>
+*/
+
+#include "stdafx.h"
+#include "sha1.h"
+#include <sstream>
+#include <iomanip>
+#include <fstream>
+
+
+static const size_t BLOCK_INTS = 16;  /* number of 32bit integers per SHA1 block */
+static const size_t BLOCK_BYTES = BLOCK_INTS * 4;
+
+
+static void reset(uint32_t digest[], std::string &buffer, uint64_t &transforms)
+{
+	/* SHA1 initialization constants */
+	digest[0] = 0x67452301;
+	digest[1] = 0xefcdab89;
+	digest[2] = 0x98badcfe;
+	digest[3] = 0x10325476;
+	digest[4] = 0xc3d2e1f0;
+
+	/* Reset counters */
+	buffer = "";
+	transforms = 0;
+}
+
+
+static uint32_t rol(const uint32_t value, const size_t bits)
+{
+	return (value << bits) | (value >> (32 - bits));
+}
+
+
+static uint32_t blk(const uint32_t block[BLOCK_INTS], const size_t i)
+{
+	return rol(block[(i + 13) & 15] ^ block[(i + 8) & 15] ^ block[(i + 2) & 15] ^ block[i], 1);
+}
+
+
+/*
+ * (R0+R1), R2, R3, R4 are the different operations used in SHA1
+ */
+
+static void R0(const uint32_t block[BLOCK_INTS], const uint32_t v, uint32_t &w, const uint32_t x, const uint32_t y, uint32_t &z, const size_t i)
+{
+	z += ((w&(x^y)) ^ y) + block[i] + 0x5a827999 + rol(v, 5);
+	w = rol(w, 30);
+}
+
+
+static void R1(uint32_t block[BLOCK_INTS], const uint32_t v, uint32_t &w, const uint32_t x, const uint32_t y, uint32_t &z, const size_t i)
+{
+	block[i] = blk(block, i);
+	z += ((w&(x^y)) ^ y) + block[i] + 0x5a827999 + rol(v, 5);
+	w = rol(w, 30);
+}
+
+
+static void R2(uint32_t block[BLOCK_INTS], const uint32_t v, uint32_t &w, const uint32_t x, const uint32_t y, uint32_t &z, const size_t i)
+{
+	block[i] = blk(block, i);
+	z += (w^x^y) + block[i] + 0x6ed9eba1 + rol(v, 5);
+	w = rol(w, 30);
+}
+
+
+static void R3(uint32_t block[BLOCK_INTS], const uint32_t v, uint32_t &w, const uint32_t x, const uint32_t y, uint32_t &z, const size_t i)
+{
+	block[i] = blk(block, i);
+	z += (((w | x)&y) | (w&x)) + block[i] + 0x8f1bbcdc + rol(v, 5);
+	w = rol(w, 30);
+}
+
+
+static void R4(uint32_t block[BLOCK_INTS], const uint32_t v, uint32_t &w, const uint32_t x, const uint32_t y, uint32_t &z, const size_t i)
+{
+	block[i] = blk(block, i);
+	z += (w^x^y) + block[i] + 0xca62c1d6 + rol(v, 5);
+	w = rol(w, 30);
+}
+
+
+/*
+ * Hash a single 512-bit block. This is the core of the algorithm.
+ */
+
+static void transform(uint32_t digest[], uint32_t block[BLOCK_INTS], uint64_t &transforms)
+{
+	/* Copy digest[] to working vars */
+	uint32_t a = digest[0];
+	uint32_t b = digest[1];
+	uint32_t c = digest[2];
+	uint32_t d = digest[3];
+	uint32_t e = digest[4];
+
+	/* 4 rounds of 20 operations each. Loop unrolled. */
+	R0(block, a, b, c, d, e, 0);
+	R0(block, e, a, b, c, d, 1);
+	R0(block, d, e, a, b, c, 2);
+	R0(block, c, d, e, a, b, 3);
+	R0(block, b, c, d, e, a, 4);
+	R0(block, a, b, c, d, e, 5);
+	R0(block, e, a, b, c, d, 6);
+	R0(block, d, e, a, b, c, 7);
+	R0(block, c, d, e, a, b, 8);
+	R0(block, b, c, d, e, a, 9);
+	R0(block, a, b, c, d, e, 10);
+	R0(block, e, a, b, c, d, 11);
+	R0(block, d, e, a, b, c, 12);
+	R0(block, c, d, e, a, b, 13);
+	R0(block, b, c, d, e, a, 14);
+	R0(block, a, b, c, d, e, 15);
+	R1(block, e, a, b, c, d, 0);
+	R1(block, d, e, a, b, c, 1);
+	R1(block, c, d, e, a, b, 2);
+	R1(block, b, c, d, e, a, 3);
+	R2(block, a, b, c, d, e, 4);
+	R2(block, e, a, b, c, d, 5);
+	R2(block, d, e, a, b, c, 6);
+	R2(block, c, d, e, a, b, 7);
+	R2(block, b, c, d, e, a, 8);
+	R2(block, a, b, c, d, e, 9);
+	R2(block, e, a, b, c, d, 10);
+	R2(block, d, e, a, b, c, 11);
+	R2(block, c, d, e, a, b, 12);
+	R2(block, b, c, d, e, a, 13);
+	R2(block, a, b, c, d, e, 14);
+	R2(block, e, a, b, c, d, 15);
+	R2(block, d, e, a, b, c, 0);
+	R2(block, c, d, e, a, b, 1);
+	R2(block, b, c, d, e, a, 2);
+	R2(block, a, b, c, d, e, 3);
+	R2(block, e, a, b, c, d, 4);
+	R2(block, d, e, a, b, c, 5);
+	R2(block, c, d, e, a, b, 6);
+	R2(block, b, c, d, e, a, 7);
+	R3(block, a, b, c, d, e, 8);
+	R3(block, e, a, b, c, d, 9);
+	R3(block, d, e, a, b, c, 10);
+	R3(block, c, d, e, a, b, 11);
+	R3(block, b, c, d, e, a, 12);
+	R3(block, a, b, c, d, e, 13);
+	R3(block, e, a, b, c, d, 14);
+	R3(block, d, e, a, b, c, 15);
+	R3(block, c, d, e, a, b, 0);
+	R3(block, b, c, d, e, a, 1);
+	R3(block, a, b, c, d, e, 2);
+	R3(block, e, a, b, c, d, 3);
+	R3(block, d, e, a, b, c, 4);
+	R3(block, c, d, e, a, b, 5);
+	R3(block, b, c, d, e, a, 6);
+	R3(block, a, b, c, d, e, 7);
+	R3(block, e, a, b, c, d, 8);
+	R3(block, d, e, a, b, c, 9);
+	R3(block, c, d, e, a, b, 10);
+	R3(block, b, c, d, e, a, 11);
+	R4(block, a, b, c, d, e, 12);
+	R4(block, e, a, b, c, d, 13);
+	R4(block, d, e, a, b, c, 14);
+	R4(block, c, d, e, a, b, 15);
+	R4(block, b, c, d, e, a, 0);
+	R4(block, a, b, c, d, e, 1);
+	R4(block, e, a, b, c, d, 2);
+	R4(block, d, e, a, b, c, 3);
+	R4(block, c, d, e, a, b, 4);
+	R4(block, b, c, d, e, a, 5);
+	R4(block, a, b, c, d, e, 6);
+	R4(block, e, a, b, c, d, 7);
+	R4(block, d, e, a, b, c, 8);
+	R4(block, c, d, e, a, b, 9);
+	R4(block, b, c, d, e, a, 10);
+	R4(block, a, b, c, d, e, 11);
+	R4(block, e, a, b, c, d, 12);
+	R4(block, d, e, a, b, c, 13);
+	R4(block, c, d, e, a, b, 14);
+	R4(block, b, c, d, e, a, 15);
+
+	/* Add the working vars back into digest[] */
+	digest[0] += a;
+	digest[1] += b;
+	digest[2] += c;
+	digest[3] += d;
+	digest[4] += e;
+
+	/* Count the number of transformations */
+	transforms++;
+}
+
+
+static void buffer_to_block(const std::string &buffer, uint32_t block[BLOCK_INTS])
+{
+	/* Convert the std::string (byte buffer) to a uint32_t array (MSB) */
+	for(size_t i = 0; i < BLOCK_INTS; i++) {
+		block[i] = (buffer[4 * i + 3] & 0xff)
+			| (buffer[4 * i + 2] & 0xff) << 8
+			| (buffer[4 * i + 1] & 0xff) << 16
+			| (buffer[4 * i + 0] & 0xff) << 24;
+	}
+}
+
+
+SHA1::SHA1()
+{
+	reset(digest, buffer, transforms);
+}
+
+
+void SHA1::update(const std::string &s)
+{
+	std::istringstream is(s);
+	update(is);
+}
+
+
+void SHA1::update(std::istream &is)
+{
+	char sbuf[BLOCK_BYTES];
+	uint32_t block[BLOCK_INTS];
+
+	while(true) {
+		is.read(sbuf, BLOCK_BYTES - buffer.size());
+		buffer.append(sbuf, (size_t)is.gcount());
+		if(buffer.size() != BLOCK_BYTES) {
+			return;
+		}
+
+		buffer_to_block(buffer, block);
+		transform(digest, block, transforms);
+		buffer.clear();
+	}
+}
+
+
+/*
+ * Add padding and return the message digest.
+ */
+
+std::string SHA1::final()
+{
+	/* Total number of hashed bits */
+	uint64_t total_bits = (transforms*BLOCK_BYTES + buffer.size()) * 8;
+
+	/* Padding */
+	buffer += (char)0x80;
+	size_t orig_size = buffer.size();
+	while(buffer.size() < BLOCK_BYTES) {
+		buffer += (char)0x00;
+	}
+
+	uint32_t block[BLOCK_INTS];
+	buffer_to_block(buffer, block);
+
+	if(orig_size > BLOCK_BYTES - 8) {
+		transform(digest, block, transforms);
+		for(size_t i = 0; i < BLOCK_INTS - 2; i++) {
+			block[i] = 0;
+		}
+	}
+
+	/* Append total_bits, split this uint64_t into two uint32_t */
+	block[BLOCK_INTS - 1] = (uint32_t)total_bits;
+	block[BLOCK_INTS - 2] = (uint32_t)(total_bits >> 32);
+	transform(digest, block, transforms);
+
+	/* Hex std::string */
+	std::ostringstream result;
+	for(size_t i = 0; i < sizeof(digest) / sizeof(digest[0]); i++) {
+		result << std::uppercase << std::hex << std::setfill('0') << std::setw(8);
+		result << digest[i];
+	}
+
+	/* Reset for next run */
+	reset(digest, buffer, transforms);
+
+	return result.str();
+}
+
+std::string SHA1::GetHash(vector<uint8_t> &data)
+{
+	std::stringstream ss;
+	ss.write((char*)data.data(), data.size());
+	
+	SHA1 checksum;
+	checksum.update(ss);
+	return checksum.final();
+}
+
+std::string SHA1::GetHash(std::istream &stream)
+{
+	SHA1 checksum;
+	checksum.update(stream);
+	return checksum.final();
+}
+
+std::string SHA1::GetHash(const std::string &filename)
+{
+	std::ifstream stream(filename.c_str(), std::ios::binary);
+	SHA1 checksum;
+	checksum.update(stream);
+	return checksum.final();
+}
diff --git a/Utilities/sha1.h b/Utilities/sha1.h
new file mode 100644
index 0000000..22b968f
--- /dev/null
+++ b/Utilities/sha1.h
@@ -0,0 +1,41 @@
+/*
+    sha1.h - header of
+
+    ============
+    SHA-1 in C++
+    ============
+
+    100% Public Domain.
+
+    Original C Code
+        -- Steve Reid <steve@edmweb.com>
+    Small changes to fit into bglibs
+        -- Bruce Guenter <bruce@untroubled.org>
+    Translation to simpler C++ Code
+        -- Volker Grabsch <vog@notjusthosting.com>
+    Safety fixes
+        -- Eugene Hopkinson <slowriot at voxelstorm dot com>
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <iostream>
+#include <string>
+
+class SHA1
+{
+public:
+    SHA1();
+    void update(const std::string &s);
+    void update(std::istream &is);
+    std::string final();
+    static std::string GetHash(const std::string &filename);
+	 static std::string GetHash(std::istream &stream);
+	 static std::string GetHash(vector<uint8_t> &data);
+
+private:
+    uint32_t digest[5];
+    std::string buffer;
+    uint64_t transforms;
+};
diff --git a/Utilities/stb_vorbis.cpp b/Utilities/stb_vorbis.cpp
new file mode 100644
index 0000000..4064326
--- /dev/null
+++ b/Utilities/stb_vorbis.cpp
@@ -0,0 +1,5112 @@
+#include "stdafx.h"
+#include "stb_vorbis.h"
+// Ogg Vorbis audio decoder - v1.11 - public domain
+// http://nothings.org/stb_vorbis/
+//
+// Original version written by Sean Barrett in 2007.
+//
+// Originally sponsored by RAD Game Tools. Seeking sponsored
+// by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software,
+// Aras Pranckevicius, and Sean Barrett.
+//
+// LICENSE
+//
+//   See end of file for license information.
+//
+// Limitations:
+//
+//   - floor 0 not supported (used in old ogg vorbis files pre-2004)
+//   - lossless sample-truncation at beginning ignored
+//   - cannot concatenate multiple vorbis streams
+//   - sample positions are 32-bit, limiting seekable 192Khz
+//       files to around 6 hours (Ogg supports 64-bit)
+//
+// Feature contributors:
+//    Dougall Johnson (sample-exact seeking)
+//
+// Bugfix/warning contributors:
+//    Terje Mathisen     Niklas Frykholm     Andy Hill
+//    Casey Muratori     John Bolton         Gargaj
+//    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
+//    Bernhard Wodo      Evan Balster        alxprd@github
+//    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
+//    Phillip Bennefall  Rohit               Thiago Goulart
+//    manxorist@github   saga musix          github:infatum
+//
+// Partial history:
+//    1.11    - 2017/07/23 - fix MinGW compilation 
+//    1.10    - 2017/03/03 - more robust seeking; fix negative ilog(); clear error in open_memory
+//    1.09    - 2016/04/04 - back out 'truncation of last frame' fix from previous version
+//    1.08    - 2016/04/02 - warnings; setup memory leaks; truncation of last frame
+//    1.07    - 2015/01/16 - fixes for crashes on invalid files; warning fixes; const
+//    1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
+//                           some crash fixes when out of memory or with corrupt files
+//                           fix some inappropriately signed shifts
+//    1.05    - 2015/04/19 - don't define __forceinline if it's redundant
+//    1.04    - 2014/08/27 - fix missing const-correct case in API
+//    1.03    - 2014/08/07 - warning fixes
+//    1.02    - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows
+//    1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct)
+//    1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
+//                           (API change) report sample rate for decode-full-file funcs
+//
+// See end of file for full version history.
+
+
+#ifndef STB_VORBIS_HEADER_ONLY
+
+// global configuration settings (e.g. set these in the project/makefile),
+// or just set them in this file at the top (although ideally the first few
+// should be visible when the header file is compiled too, although it's not
+// crucial)
+
+// STB_VORBIS_NO_PUSHDATA_API
+//     does not compile the code for the various stb_vorbis_*_pushdata()
+//     functions
+// #define STB_VORBIS_NO_PUSHDATA_API
+
+// STB_VORBIS_NO_PULLDATA_API
+//     does not compile the code for the non-pushdata APIs
+// #define STB_VORBIS_NO_PULLDATA_API
+
+// STB_VORBIS_NO_STDIO
+//     does not compile the code for the APIs that use FILE *s internally
+//     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
+// #define STB_VORBIS_NO_STDIO
+
+// STB_VORBIS_NO_INTEGER_CONVERSION
+//     does not compile the code for converting audio sample data from
+//     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
+// #define STB_VORBIS_NO_INTEGER_CONVERSION
+
+// STB_VORBIS_NO_FAST_SCALED_FLOAT
+//      does not use a fast float-to-int trick to accelerate float-to-int on
+//      most platforms which requires endianness be defined correctly.
+//#define STB_VORBIS_NO_FAST_SCALED_FLOAT
+
+
+// STB_VORBIS_MAX_CHANNELS [number]
+//     globally define this to the maximum number of channels you need.
+//     The spec does not put a restriction on channels except that
+//     the count is stored in a byte, so 255 is the hard limit.
+//     Reducing this saves about 16 bytes per value, so using 16 saves
+//     (255-16)*16 or around 4KB. Plus anything other memory usage
+//     I forgot to account for. Can probably go as low as 8 (7.1 audio),
+//     6 (5.1 audio), or 2 (stereo only).
+#ifndef STB_VORBIS_MAX_CHANNELS
+#define STB_VORBIS_MAX_CHANNELS    16  // enough for anyone?
+#endif
+
+// STB_VORBIS_PUSHDATA_CRC_COUNT [number]
+//     after a flush_pushdata(), stb_vorbis begins scanning for the
+//     next valid page, without backtracking. when it finds something
+//     that looks like a page, it streams through it and verifies its
+//     CRC32. Should that validation fail, it keeps scanning. But it's
+//     possible that _while_ streaming through to check the CRC32 of
+//     one candidate page, it sees another candidate page. This #define
+//     determines how many "overlapping" candidate pages it can search
+//     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
+//     garbage pages could be as big as 64KB, but probably average ~16KB.
+//     So don't hose ourselves by scanning an apparent 64KB page and
+//     missing a ton of real ones in the interim; so minimum of 2
+#ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
+#define STB_VORBIS_PUSHDATA_CRC_COUNT  4
+#endif
+
+// STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
+//     sets the log size of the huffman-acceleration table.  Maximum
+//     supported value is 24. with larger numbers, more decodings are O(1),
+//     but the table size is larger so worse cache missing, so you'll have
+//     to probe (and try multiple ogg vorbis files) to find the sweet spot.
+#ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
+#define STB_VORBIS_FAST_HUFFMAN_LENGTH   10
+#endif
+
+// STB_VORBIS_FAST_BINARY_LENGTH [number]
+//     sets the log size of the binary-search acceleration table. this
+//     is used in similar fashion to the fast-huffman size to set initial
+//     parameters for the binary search
+
+// STB_VORBIS_FAST_HUFFMAN_INT
+//     The fast huffman tables are much more efficient if they can be
+//     stored as 16-bit results instead of 32-bit results. This restricts
+//     the codebooks to having only 65535 possible outcomes, though.
+//     (At least, accelerated by the huffman table.)
+#ifndef STB_VORBIS_FAST_HUFFMAN_INT
+#define STB_VORBIS_FAST_HUFFMAN_SHORT
+#endif
+
+// STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
+//     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
+//     back on binary searching for the correct one. This requires storing
+//     extra tables with the huffman codes in sorted order. Defining this
+//     symbol trades off space for speed by forcing a linear search in the
+//     non-fast case, except for "sparse" codebooks.
+// #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
+
+// STB_VORBIS_DIVIDES_IN_RESIDUE
+//     stb_vorbis precomputes the result of the scalar residue decoding
+//     that would otherwise require a divide per chunk. you can trade off
+//     space for time by defining this symbol.
+// #define STB_VORBIS_DIVIDES_IN_RESIDUE
+
+// STB_VORBIS_DIVIDES_IN_CODEBOOK
+//     vorbis VQ codebooks can be encoded two ways: with every case explicitly
+//     stored, or with all elements being chosen from a small range of values,
+//     and all values possible in all elements. By default, stb_vorbis expands
+//     this latter kind out to look like the former kind for ease of decoding,
+//     because otherwise an integer divide-per-vector-element is required to
+//     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
+//     trade off storage for speed.
+//#define STB_VORBIS_DIVIDES_IN_CODEBOOK
+
+#ifdef STB_VORBIS_CODEBOOK_SHORTS
+#error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
+#endif
+
+// STB_VORBIS_DIVIDE_TABLE
+//     this replaces small integer divides in the floor decode loop with
+//     table lookups. made less than 1% difference, so disabled by default.
+
+// STB_VORBIS_NO_INLINE_DECODE
+//     disables the inlining of the scalar codebook fast-huffman decode.
+//     might save a little codespace; useful for debugging
+// #define STB_VORBIS_NO_INLINE_DECODE
+
+// STB_VORBIS_NO_DEFER_FLOOR
+//     Normally we only decode the floor without synthesizing the actual
+//     full curve. We can instead synthesize the curve immediately. This
+//     requires more memory and is very likely slower, so I don't think
+//     you'd ever want to do it except for debugging.
+// #define STB_VORBIS_NO_DEFER_FLOOR
+
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+
+#ifdef STB_VORBIS_NO_PULLDATA_API
+   #define STB_VORBIS_NO_INTEGER_CONVERSION
+   #define STB_VORBIS_NO_STDIO
+#endif
+
+#if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
+   #define STB_VORBIS_NO_STDIO 1
+#endif
+
+#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
+#ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
+
+   // only need endianness for fast-float-to-int, which we don't
+   // use for pushdata
+
+   #ifndef STB_VORBIS_BIG_ENDIAN
+     #define STB_VORBIS_ENDIAN  0
+   #else
+     #define STB_VORBIS_ENDIAN  1
+   #endif
+
+#endif
+#endif
+
+
+#ifndef STB_VORBIS_NO_STDIO
+#include <stdio.h>
+#endif
+
+#ifndef STB_VORBIS_NO_CRT
+   #include <stdlib.h>
+   #include <string.h>
+   #include <assert.h>
+   #include <math.h>
+
+   // find definition of alloca if it's not in stdlib.h:
+   #if defined(_MSC_VER) || defined(__MINGW32__)
+      #include <malloc.h>
+   #endif
+   #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
+      #include <alloca.h>
+   #endif
+#else // STB_VORBIS_NO_CRT
+   #define NULL 0
+   #define malloc(s)   0
+   #define free(s)     ((void) 0)
+   #define realloc(s)  0
+#endif // STB_VORBIS_NO_CRT
+
+#include <limits.h>
+
+#ifdef __MINGW32__
+   // eff you mingw:
+   //     "fixed":
+   //         http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
+   //     "no that broke the build, reverted, who cares about C":
+   //         http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
+   #ifdef __forceinline
+   #undef __forceinline
+   #endif
+   #define __forceinline
+   #define alloca __builtin_alloca
+#elif !defined(_MSC_VER)
+   #if __GNUC__
+      #define __forceinline inline
+   #else
+      #define __forceinline
+   #endif
+#endif
+
+#if STB_VORBIS_MAX_CHANNELS > 256
+#error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
+#endif
+
+#if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
+#error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
+#endif
+
+
+#if 0
+#include <crtdbg.h>
+#define CHECK(f)   _CrtIsValidHeapPointer(f->channel_buffers[1])
+#else
+#define CHECK(f)   ((void) 0)
+#endif
+
+#define MAX_BLOCKSIZE_LOG  13   // from specification
+#define MAX_BLOCKSIZE      (1 << MAX_BLOCKSIZE_LOG)
+
+
+typedef unsigned char  uint8;
+typedef   signed char   int8;
+typedef unsigned short uint16;
+typedef   signed short  int16;
+typedef unsigned int   uint32;
+typedef   signed int    int32;
+
+#ifndef TRUE
+#define TRUE 1
+#define FALSE 0
+#endif
+
+typedef float codetype;
+
+// @NOTE
+//
+// Some arrays below are tagged "//varies", which means it's actually
+// a variable-sized piece of data, but rather than malloc I assume it's
+// small enough it's better to just allocate it all together with the
+// main thing
+//
+// Most of the variables are specified with the smallest size I could pack
+// them into. It might give better performance to make them all full-sized
+// integers. It should be safe to freely rearrange the structures or change
+// the sizes larger--nothing relies on silently truncating etc., nor the
+// order of variables.
+
+#define FAST_HUFFMAN_TABLE_SIZE   (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
+#define FAST_HUFFMAN_TABLE_MASK   (FAST_HUFFMAN_TABLE_SIZE - 1)
+
+typedef struct
+{
+   int dimensions, entries;
+   uint8 *codeword_lengths;
+   float  minimum_value;
+   float  delta_value;
+   uint8  value_bits;
+   uint8  lookup_type;
+   uint8  sequence_p;
+   uint8  sparse;
+   uint32 lookup_values;
+   codetype *multiplicands;
+   uint32 *codewords;
+   #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
+    int16  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
+   #else
+    int32  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
+   #endif
+   uint32 *sorted_codewords;
+   int    *sorted_values;
+   int     sorted_entries;
+} Codebook;
+
+typedef struct
+{
+   uint8 order;
+   uint16 rate;
+   uint16 bark_map_size;
+   uint8 amplitude_bits;
+   uint8 amplitude_offset;
+   uint8 number_of_books;
+   uint8 book_list[16]; // varies
+} Floor0;
+
+typedef struct
+{
+   uint8 partitions;
+   uint8 partition_class_list[32]; // varies
+   uint8 class_dimensions[16]; // varies
+   uint8 class_subclasses[16]; // varies
+   uint8 class_masterbooks[16]; // varies
+   int16 subclass_books[16][8]; // varies
+   uint16 Xlist[31*8+2]; // varies
+   uint8 sorted_order[31*8+2];
+   uint8 neighbors[31*8+2][2];
+   uint8 floor1_multiplier;
+   uint8 rangebits;
+   int values;
+} Floor1;
+
+typedef union
+{
+   Floor0 floor0;
+   Floor1 floor1;
+} Floor;
+
+typedef struct
+{
+   uint32 begin, end;
+   uint32 part_size;
+   uint8 classifications;
+   uint8 classbook;
+   uint8 **classdata;
+   int16 (*residue_books)[8];
+} Residue;
+
+typedef struct
+{
+   uint8 magnitude;
+   uint8 angle;
+   uint8 mux;
+} MappingChannel;
+
+typedef struct
+{
+   uint16 coupling_steps;
+   MappingChannel *chan;
+   uint8  submaps;
+   uint8  submap_floor[15]; // varies
+   uint8  submap_residue[15]; // varies
+} Mapping;
+
+typedef struct
+{
+   uint8 blockflag;
+   uint8 mapping;
+   uint16 windowtype;
+   uint16 transformtype;
+} Mode;
+
+typedef struct
+{
+   uint32  goal_crc;    // expected crc if match
+   int     bytes_left;  // bytes left in packet
+   uint32  crc_so_far;  // running crc
+   int     bytes_done;  // bytes processed in _current_ chunk
+   uint32  sample_loc;  // granule pos encoded in page
+} CRCscan;
+
+typedef struct
+{
+   uint32 page_start, page_end;
+   uint32 last_decoded_sample;
+} ProbedPage;
+
+struct stb_vorbis
+{
+  // user-accessible info
+   unsigned int sample_rate;
+   int channels;
+
+   unsigned int setup_memory_required;
+   unsigned int temp_memory_required;
+   unsigned int setup_temp_memory_required;
+
+  // input config
+#ifndef STB_VORBIS_NO_STDIO
+   FILE *f;
+   uint32 f_start;
+   int close_on_free;
+#endif
+
+   uint8 *stream;
+   uint8 *stream_start;
+   uint8 *stream_end;
+
+   uint32 stream_len;
+
+   uint8  push_mode;
+
+   uint32 first_audio_page_offset;
+
+   ProbedPage p_first, p_last;
+
+  // memory management
+   stb_vorbis_alloc alloc;
+   int setup_offset;
+   int temp_offset;
+
+  // run-time results
+   int eof;
+   enum STBVorbisError error;
+
+  // user-useful data
+
+  // header info
+   int blocksize[2];
+   int blocksize_0, blocksize_1;
+   int codebook_count;
+   Codebook *codebooks;
+   int floor_count;
+   uint16 floor_types[64]; // varies
+   Floor *floor_config;
+   int residue_count;
+   uint16 residue_types[64]; // varies
+   Residue *residue_config;
+   int mapping_count;
+   Mapping *mapping;
+   int mode_count;
+   Mode mode_config[64];  // varies
+
+   uint32 total_samples;
+
+  // decode buffer
+   float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
+   float *outputs        [STB_VORBIS_MAX_CHANNELS];
+
+   float *previous_window[STB_VORBIS_MAX_CHANNELS];
+   int previous_length;
+
+   #ifndef STB_VORBIS_NO_DEFER_FLOOR
+   int16 *finalY[STB_VORBIS_MAX_CHANNELS];
+   #else
+   float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
+   #endif
+
+   uint32 current_loc; // sample location of next frame to decode
+   int    current_loc_valid;
+
+  // per-blocksize precomputed data
+   
+   // twiddle factors
+   float *A[2],*B[2],*C[2];
+   float *window[2];
+   uint16 *bit_reverse[2];
+
+  // current page/packet/segment streaming info
+   uint32 serial; // stream serial number for verification
+   int last_page;
+   int segment_count;
+   uint8 segments[255];
+   uint8 page_flag;
+   uint8 bytes_in_seg;
+   uint8 first_decode;
+   int next_seg;
+   int last_seg;  // flag that we're on the last segment
+   int last_seg_which; // what was the segment number of the last seg?
+   uint32 acc;
+   int valid_bits;
+   int packet_bytes;
+   int end_seg_with_known_loc;
+   uint32 known_loc_for_packet;
+   int discard_samples_deferred;
+   uint32 samples_output;
+
+  // push mode scanning
+   int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
+#ifndef STB_VORBIS_NO_PUSHDATA_API
+   CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
+#endif
+
+  // sample-access
+   int channel_buffer_start;
+   int channel_buffer_end;
+};
+
+#if defined(STB_VORBIS_NO_PUSHDATA_API)
+   #define IS_PUSH_MODE(f)   FALSE
+#elif defined(STB_VORBIS_NO_PULLDATA_API)
+   #define IS_PUSH_MODE(f)   TRUE
+#else
+   #define IS_PUSH_MODE(f)   ((f)->push_mode)
+#endif
+
+typedef struct stb_vorbis vorb;
+
+static int error(vorb *f, enum STBVorbisError e)
+{
+   f->error = e;
+   if (!f->eof && e != VORBIS_need_more_data) {
+      f->error=e; // breakpoint for debugging
+   }
+   return 0;
+}
+
+
+// these functions are used for allocating temporary memory
+// while decoding. if you can afford the stack space, use
+// alloca(); otherwise, provide a temp buffer and it will
+// allocate out of those.
+
+#define array_size_required(count,size)  (count*(sizeof(void *)+(size)))
+
+#define temp_alloc(f,size)              (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
+#define temp_alloc_save(f)              ((f)->temp_offset)
+#define temp_alloc_restore(f,p)         ((f)->temp_offset = (p))
+
+#define temp_block_array(f,count,size)  make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
+
+// given a sufficiently large block of memory, make an array of pointers to subblocks of it
+static void *make_block_array(void *mem, int count, int size)
+{
+   int i;
+   void ** p = (void **) mem;
+   char *q = (char *) (p + count);
+   for (i=0; i < count; ++i) {
+      p[i] = q;
+      q += size;
+   }
+   return p;
+}
+
+static void *setup_malloc(vorb *f, int sz)
+{
+   sz = (sz+3) & ~3;
+   f->setup_memory_required += sz;
+   if (f->alloc.alloc_buffer) {
+      void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
+      if (f->setup_offset + sz > f->temp_offset) return NULL;
+      f->setup_offset += sz;
+      return p;
+   }
+   return sz ? malloc(sz) : NULL;
+}
+
+static void setup_free(vorb *f, void *p)
+{
+   if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
+   free(p);
+}
+
+static void *setup_temp_malloc(vorb *f, int sz)
+{
+   sz = (sz+3) & ~3;
+   if (f->alloc.alloc_buffer) {
+      if (f->temp_offset - sz < f->setup_offset) return NULL;
+      f->temp_offset -= sz;
+      return (char *) f->alloc.alloc_buffer + f->temp_offset;
+   }
+   return malloc(sz);
+}
+
+static void setup_temp_free(vorb *f, void *p, int sz)
+{
+   if (f->alloc.alloc_buffer) {
+      f->temp_offset += (sz+3)&~3;
+      return;
+   }
+   free(p);
+}
+
+#define CRC32_POLY    0x04c11db7   // from spec
+
+static uint32 crc_table[256];
+static void crc32_init(void)
+{
+   int i,j;
+   uint32 s;
+   for(i=0; i < 256; i++) {
+      for (s=(uint32) i << 24, j=0; j < 8; ++j)
+         s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
+      crc_table[i] = s;
+   }
+}
+
+static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
+{
+   return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
+}
+
+
+// used in setup, and for huffman that doesn't go fast path
+static unsigned int bit_reverse(unsigned int n)
+{
+  n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
+  n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
+  n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
+  n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
+  return (n >> 16) | (n << 16);
+}
+
+static float square(float x)
+{
+   return x*x;
+}
+
+// this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
+// as required by the specification. fast(?) implementation from stb.h
+// @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
+static int ilog(int32 n)
+{
+   static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
+
+   if (n < 0) return 0; // signed n returns 0
+
+   // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
+   if (n < (1 << 14))
+        if (n < (1 <<  4))            return  0 + log2_4[n      ];
+        else if (n < (1 <<  9))       return  5 + log2_4[n >>  5];
+             else                     return 10 + log2_4[n >> 10];
+   else if (n < (1 << 24))
+             if (n < (1 << 19))       return 15 + log2_4[n >> 15];
+             else                     return 20 + log2_4[n >> 20];
+        else if (n < (1 << 29))       return 25 + log2_4[n >> 25];
+             else                     return 30 + log2_4[n >> 30];
+}
+
+#ifndef M_PI
+  #define M_PI  3.14159265358979323846264f  // from CRC
+#endif
+
+// code length assigned to a value with no huffman encoding
+#define NO_CODE   255
+
+/////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
+//
+// these functions are only called at setup, and only a few times
+// per file
+
+static float float32_unpack(uint32 x)
+{
+   // from the specification
+   uint32 mantissa = x & 0x1fffff;
+   uint32 sign = x & 0x80000000;
+   uint32 exp = (x & 0x7fe00000) >> 21;
+   double res = sign ? -(double)mantissa : (double)mantissa;
+   return (float) ldexp((float)res, exp-788);
+}
+
+
+// zlib & jpeg huffman tables assume that the output symbols
+// can either be arbitrarily arranged, or have monotonically
+// increasing frequencies--they rely on the lengths being sorted;
+// this makes for a very simple generation algorithm.
+// vorbis allows a huffman table with non-sorted lengths. This
+// requires a more sophisticated construction, since symbols in
+// order do not map to huffman codes "in order".
+static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
+{
+   if (!c->sparse) {
+      c->codewords      [symbol] = huff_code;
+   } else {
+      c->codewords       [count] = huff_code;
+      c->codeword_lengths[count] = len;
+      values             [count] = symbol;
+   }
+}
+
+static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
+{
+   int i,k,m=0;
+   uint32 available[32];
+
+   memset(available, 0, sizeof(available));
+   // find the first entry
+   for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
+   if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
+   // add to the list
+   add_entry(c, 0, k, m++, len[k], values);
+   // add all available leaves
+   for (i=1; i <= len[k]; ++i)
+      available[i] = 1U << (32-i);
+   // note that the above code treats the first case specially,
+   // but it's really the same as the following code, so they
+   // could probably be combined (except the initial code is 0,
+   // and I use 0 in available[] to mean 'empty')
+   for (i=k+1; i < n; ++i) {
+      uint32 res;
+      int z = len[i], y;
+      if (z == NO_CODE) continue;
+      // find lowest available leaf (should always be earliest,
+      // which is what the specification calls for)
+      // note that this property, and the fact we can never have
+      // more than one free leaf at a given level, isn't totally
+      // trivial to prove, but it seems true and the assert never
+      // fires, so!
+      while (z > 0 && !available[z]) --z;
+      if (z == 0) { return FALSE; }
+      res = available[z];
+      assert(z >= 0 && z < 32);
+      available[z] = 0;
+      add_entry(c, bit_reverse(res), i, m++, len[i], values);
+      // propogate availability up the tree
+      if (z != len[i]) {
+         assert(len[i] >= 0 && len[i] < 32);
+         for (y=len[i]; y > z; --y) {
+            assert(available[y] == 0);
+            available[y] = res + (1 << (32-y));
+         }
+      }
+   }
+   return TRUE;
+}
+
+// accelerated huffman table allows fast O(1) match of all symbols
+// of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
+static void compute_accelerated_huffman(Codebook *c)
+{
+   int i, len;
+   for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
+      c->fast_huffman[i] = -1;
+
+   len = c->sparse ? c->sorted_entries : c->entries;
+   #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
+   if (len > 32767) len = 32767; // largest possible value we can encode!
+   #endif
+   for (i=0; i < len; ++i) {
+      if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
+         uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
+         // set table entries for all bit combinations in the higher bits
+         while (z < FAST_HUFFMAN_TABLE_SIZE) {
+             c->fast_huffman[z] = i;
+             z += 1 << c->codeword_lengths[i];
+         }
+      }
+   }
+}
+
+#ifdef _MSC_VER
+#define STBV_CDECL __cdecl
+#else
+#define STBV_CDECL
+#endif
+
+static int STBV_CDECL uint32_compare(const void *p, const void *q)
+{
+   uint32 x = * (uint32 *) p;
+   uint32 y = * (uint32 *) q;
+   return x < y ? -1 : x > y;
+}
+
+static int include_in_sort(Codebook *c, uint8 len)
+{
+   if (c->sparse) { assert(len != NO_CODE); return TRUE; }
+   if (len == NO_CODE) return FALSE;
+   if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
+   return FALSE;
+}
+
+// if the fast table above doesn't work, we want to binary
+// search them... need to reverse the bits
+static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
+{
+   int i, len;
+   // build a list of all the entries
+   // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
+   // this is kind of a frivolous optimization--I don't see any performance improvement,
+   // but it's like 4 extra lines of code, so.
+   if (!c->sparse) {
+      int k = 0;
+      for (i=0; i < c->entries; ++i)
+         if (include_in_sort(c, lengths[i])) 
+            c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
+      assert(k == c->sorted_entries);
+   } else {
+      for (i=0; i < c->sorted_entries; ++i)
+         c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
+   }
+
+   qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
+   c->sorted_codewords[c->sorted_entries] = 0xffffffff;
+
+   len = c->sparse ? c->sorted_entries : c->entries;
+   // now we need to indicate how they correspond; we could either
+   //   #1: sort a different data structure that says who they correspond to
+   //   #2: for each sorted entry, search the original list to find who corresponds
+   //   #3: for each original entry, find the sorted entry
+   // #1 requires extra storage, #2 is slow, #3 can use binary search!
+   for (i=0; i < len; ++i) {
+      int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
+      if (include_in_sort(c,huff_len)) {
+         uint32 code = bit_reverse(c->codewords[i]);
+         int x=0, n=c->sorted_entries;
+         while (n > 1) {
+            // invariant: sc[x] <= code < sc[x+n]
+            int m = x + (n >> 1);
+            if (c->sorted_codewords[m] <= code) {
+               x = m;
+               n -= (n>>1);
+            } else {
+               n >>= 1;
+            }
+         }
+         assert(c->sorted_codewords[x] == code);
+         if (c->sparse) {
+            c->sorted_values[x] = values[i];
+            c->codeword_lengths[x] = huff_len;
+         } else {
+            c->sorted_values[x] = i;
+         }
+      }
+   }
+}
+
+// only run while parsing the header (3 times)
+static int vorbis_validate(uint8 *data)
+{
+   static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
+   return memcmp(data, vorbis, 6) == 0;
+}
+
+// called from setup only, once per code book
+// (formula implied by specification)
+static int lookup1_values(int entries, int dim)
+{
+   int r = (int) floor(exp((float) log((float) entries) / dim));
+   if ((int) floor(pow((float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
+      ++r;                                              // floor() to avoid _ftol() when non-CRT
+   assert(pow((float) r+1, dim) > entries);
+   assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
+   return r;
+}
+
+// called twice per file
+static void compute_twiddle_factors(int n, float *A, float *B, float *C)
+{
+   int n4 = n >> 2, n8 = n >> 3;
+   int k,k2;
+
+   for (k=k2=0; k < n4; ++k,k2+=2) {
+      A[k2  ] = (float)  cos(4*k*M_PI/n);
+      A[k2+1] = (float) -sin(4*k*M_PI/n);
+      B[k2  ] = (float)  cos((k2+1)*M_PI/n/2) * 0.5f;
+      B[k2+1] = (float)  sin((k2+1)*M_PI/n/2) * 0.5f;
+   }
+   for (k=k2=0; k < n8; ++k,k2+=2) {
+      C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
+      C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
+   }
+}
+
+static void compute_window(int n, float *window)
+{
+   int n2 = n >> 1, i;
+   for (i=0; i < n2; ++i)
+      window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
+}
+
+static void compute_bitreverse(int n, uint16 *rev)
+{
+   int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
+   int i, n8 = n >> 3;
+   for (i=0; i < n8; ++i)
+      rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
+}
+
+static int init_blocksize(vorb *f, int b, int n)
+{
+   int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
+   f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
+   f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
+   f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
+   if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
+   compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
+   f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
+   if (!f->window[b]) return error(f, VORBIS_outofmem);
+   compute_window(n, f->window[b]);
+   f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
+   if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
+   compute_bitreverse(n, f->bit_reverse[b]);
+   return TRUE;
+}
+
+static void neighbors(uint16 *x, int n, int *plow, int *phigh)
+{
+   int low = -1;
+   int high = 65536;
+   int i;
+   for (i=0; i < n; ++i) {
+      if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
+      if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
+   }
+}
+
+// this has been repurposed so y is now the original index instead of y
+typedef struct
+{
+   uint16 x,id;
+} stbv__floor_ordering;
+
+static int STBV_CDECL point_compare(const void *p, const void *q)
+{
+   stbv__floor_ordering *a = (stbv__floor_ordering *) p;
+   stbv__floor_ordering *b = (stbv__floor_ordering *) q;
+   return a->x < b->x ? -1 : a->x > b->x;
+}
+
+//
+/////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
+
+
+#if defined(STB_VORBIS_NO_STDIO)
+   #define USE_MEMORY(z)    TRUE
+#else
+   #define USE_MEMORY(z)    ((z)->stream)
+#endif
+
+static uint8 get8(vorb *z)
+{
+   if (USE_MEMORY(z)) {
+      if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
+      return *z->stream++;
+   }
+
+   #ifndef STB_VORBIS_NO_STDIO
+   {
+   int c = fgetc(z->f);
+   if (c == EOF) { z->eof = TRUE; return 0; }
+   return c;
+   }
+   #endif
+}
+
+static uint32 get32(vorb *f)
+{
+   uint32 x;
+   x = get8(f);
+   x += get8(f) << 8;
+   x += get8(f) << 16;
+   x += (uint32) get8(f) << 24;
+   return x;
+}
+
+static int getn(vorb *z, uint8 *data, int n)
+{
+   if (USE_MEMORY(z)) {
+      if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
+      memcpy(data, z->stream, n);
+      z->stream += n;
+      return 1;
+   }
+
+   #ifndef STB_VORBIS_NO_STDIO   
+   if (fread(data, n, 1, z->f) == 1)
+      return 1;
+   else {
+      z->eof = 1;
+      return 0;
+   }
+   #endif
+}
+
+static void skip(vorb *z, int n)
+{
+   if (USE_MEMORY(z)) {
+      z->stream += n;
+      if (z->stream >= z->stream_end) z->eof = 1;
+      return;
+   }
+   #ifndef STB_VORBIS_NO_STDIO
+   {
+      long x = ftell(z->f);
+      fseek(z->f, x+n, SEEK_SET);
+   }
+   #endif
+}
+
+static int set_file_offset(stb_vorbis *f, unsigned int loc)
+{
+   #ifndef STB_VORBIS_NO_PUSHDATA_API
+   if (f->push_mode) return 0;
+   #endif
+   f->eof = 0;
+   if (USE_MEMORY(f)) {
+      if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
+         f->stream = f->stream_end;
+         f->eof = 1;
+         return 0;
+      } else {
+         f->stream = f->stream_start + loc;
+         return 1;
+      }
+   }
+   #ifndef STB_VORBIS_NO_STDIO
+   if (loc + f->f_start < loc || loc >= 0x80000000) {
+      loc = 0x7fffffff;
+      f->eof = 1;
+   } else {
+      loc += f->f_start;
+   }
+   if (!fseek(f->f, loc, SEEK_SET))
+      return 1;
+   f->eof = 1;
+   fseek(f->f, f->f_start, SEEK_END);
+   return 0;
+   #endif
+}
+
+
+static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
+
+static int capture_pattern(vorb *f)
+{
+   if (0x4f != get8(f)) return FALSE;
+   if (0x67 != get8(f)) return FALSE;
+   if (0x67 != get8(f)) return FALSE;
+   if (0x53 != get8(f)) return FALSE;
+   return TRUE;
+}
+
+#define PAGEFLAG_continued_packet   1
+#define PAGEFLAG_first_page         2
+#define PAGEFLAG_last_page          4
+
+static int start_page_no_capturepattern(vorb *f)
+{
+   uint32 loc0,loc1,n;
+   // stream structure version
+   if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
+   // header flag
+   f->page_flag = get8(f);
+   // absolute granule position
+   loc0 = get32(f); 
+   loc1 = get32(f);
+   // @TODO: validate loc0,loc1 as valid positions?
+   // stream serial number -- vorbis doesn't interleave, so discard
+   get32(f);
+   //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
+   // page sequence number
+   n = get32(f);
+   f->last_page = n;
+   // CRC32
+   get32(f);
+   // page_segments
+   f->segment_count = get8(f);
+   if (!getn(f, f->segments, f->segment_count))
+      return error(f, VORBIS_unexpected_eof);
+   // assume we _don't_ know any the sample position of any segments
+   f->end_seg_with_known_loc = -2;
+   if (loc0 != ~0U || loc1 != ~0U) {
+      int i;
+      // determine which packet is the last one that will complete
+      for (i=f->segment_count-1; i >= 0; --i)
+         if (f->segments[i] < 255)
+            break;
+      // 'i' is now the index of the _last_ segment of a packet that ends
+      if (i >= 0) {
+         f->end_seg_with_known_loc = i;
+         f->known_loc_for_packet   = loc0;
+      }
+   }
+   if (f->first_decode) {
+      int i,len;
+      ProbedPage p;
+      len = 0;
+      for (i=0; i < f->segment_count; ++i)
+         len += f->segments[i];
+      len += 27 + f->segment_count;
+      p.page_start = f->first_audio_page_offset;
+      p.page_end = p.page_start + len;
+      p.last_decoded_sample = loc0;
+      f->p_first = p;
+   }
+   f->next_seg = 0;
+   return TRUE;
+}
+
+static int start_page(vorb *f)
+{
+   if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
+   return start_page_no_capturepattern(f);
+}
+
+static int start_packet(vorb *f)
+{
+   while (f->next_seg == -1) {
+      if (!start_page(f)) return FALSE;
+      if (f->page_flag & PAGEFLAG_continued_packet)
+         return error(f, VORBIS_continued_packet_flag_invalid);
+   }
+   f->last_seg = FALSE;
+   f->valid_bits = 0;
+   f->packet_bytes = 0;
+   f->bytes_in_seg = 0;
+   // f->next_seg is now valid
+   return TRUE;
+}
+
+static int maybe_start_packet(vorb *f)
+{
+   if (f->next_seg == -1) {
+      int x = get8(f);
+      if (f->eof) return FALSE; // EOF at page boundary is not an error!
+      if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
+      if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
+      if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
+      if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
+      if (!start_page_no_capturepattern(f)) return FALSE;
+      if (f->page_flag & PAGEFLAG_continued_packet) {
+         // set up enough state that we can read this packet if we want,
+         // e.g. during recovery
+         f->last_seg = FALSE;
+         f->bytes_in_seg = 0;
+         return error(f, VORBIS_continued_packet_flag_invalid);
+      }
+   }
+   return start_packet(f);
+}
+
+static int next_segment(vorb *f)
+{
+   int len;
+   if (f->last_seg) return 0;
+   if (f->next_seg == -1) {
+      f->last_seg_which = f->segment_count-1; // in case start_page fails
+      if (!start_page(f)) { f->last_seg = 1; return 0; }
+      if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
+   }
+   len = f->segments[f->next_seg++];
+   if (len < 255) {
+      f->last_seg = TRUE;
+      f->last_seg_which = f->next_seg-1;
+   }
+   if (f->next_seg >= f->segment_count)
+      f->next_seg = -1;
+   assert(f->bytes_in_seg == 0);
+   f->bytes_in_seg = len;
+   return len;
+}
+
+#define EOP    (-1)
+#define INVALID_BITS  (-1)
+
+static int get8_packet_raw(vorb *f)
+{
+   if (!f->bytes_in_seg) {  // CLANG!
+      if (f->last_seg) return EOP;
+      else if (!next_segment(f)) return EOP;
+   }
+   assert(f->bytes_in_seg > 0);
+   --f->bytes_in_seg;
+   ++f->packet_bytes;
+   return get8(f);
+}
+
+static int get8_packet(vorb *f)
+{
+   int x = get8_packet_raw(f);
+   f->valid_bits = 0;
+   return x;
+}
+
+static void flush_packet(vorb *f)
+{
+   while (get8_packet_raw(f) != EOP);
+}
+
+// @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
+// as the huffman decoder?
+static uint32 get_bits(vorb *f, int n)
+{
+   uint32 z;
+
+   if (f->valid_bits < 0) return 0;
+   if (f->valid_bits < n) {
+      if (n > 24) {
+         // the accumulator technique below would not work correctly in this case
+         z = get_bits(f, 24);
+         z += get_bits(f, n-24) << 24;
+         return z;
+      }
+      if (f->valid_bits == 0) f->acc = 0;
+      while (f->valid_bits < n) {
+         int z = get8_packet_raw(f);
+         if (z == EOP) {
+            f->valid_bits = INVALID_BITS;
+            return 0;
+         }
+         f->acc += z << f->valid_bits;
+         f->valid_bits += 8;
+      }
+   }
+   if (f->valid_bits < 0) return 0;
+   z = f->acc & ((1 << n)-1);
+   f->acc >>= n;
+   f->valid_bits -= n;
+   return z;
+}
+
+// @OPTIMIZE: primary accumulator for huffman
+// expand the buffer to as many bits as possible without reading off end of packet
+// it might be nice to allow f->valid_bits and f->acc to be stored in registers,
+// e.g. cache them locally and decode locally
+static __forceinline void prep_huffman(vorb *f)
+{
+   if (f->valid_bits <= 24) {
+      if (f->valid_bits == 0) f->acc = 0;
+      do {
+         int z;
+         if (f->last_seg && !f->bytes_in_seg) return;
+         z = get8_packet_raw(f);
+         if (z == EOP) return;
+         f->acc += (unsigned) z << f->valid_bits;
+         f->valid_bits += 8;
+      } while (f->valid_bits <= 24);
+   }
+}
+
+enum
+{
+   VORBIS_packet_id = 1,
+   VORBIS_packet_comment = 3,
+   VORBIS_packet_setup = 5
+};
+
+static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
+{
+   int i;
+   prep_huffman(f);
+
+   if (c->codewords == NULL && c->sorted_codewords == NULL)
+      return -1;
+
+   // cases to use binary search: sorted_codewords && !c->codewords
+   //                             sorted_codewords && c->entries > 8
+   if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
+      // binary search
+      uint32 code = bit_reverse(f->acc);
+      int x=0, n=c->sorted_entries, len;
+
+      while (n > 1) {
+         // invariant: sc[x] <= code < sc[x+n]
+         int m = x + (n >> 1);
+         if (c->sorted_codewords[m] <= code) {
+            x = m;
+            n -= (n>>1);
+         } else {
+            n >>= 1;
+         }
+      }
+      // x is now the sorted index
+      if (!c->sparse) x = c->sorted_values[x];
+      // x is now sorted index if sparse, or symbol otherwise
+      len = c->codeword_lengths[x];
+      if (f->valid_bits >= len) {
+         f->acc >>= len;
+         f->valid_bits -= len;
+         return x;
+      }
+
+      f->valid_bits = 0;
+      return -1;
+   }
+
+   // if small, linear search
+   assert(!c->sparse);
+   for (i=0; i < c->entries; ++i) {
+      if (c->codeword_lengths[i] == NO_CODE) continue;
+      if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
+         if (f->valid_bits >= c->codeword_lengths[i]) {
+            f->acc >>= c->codeword_lengths[i];
+            f->valid_bits -= c->codeword_lengths[i];
+            return i;
+         }
+         f->valid_bits = 0;
+         return -1;
+      }
+   }
+
+   error(f, VORBIS_invalid_stream);
+   f->valid_bits = 0;
+   return -1;
+}
+
+#ifndef STB_VORBIS_NO_INLINE_DECODE
+
+#define DECODE_RAW(var, f,c)                                  \
+   if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)        \
+      prep_huffman(f);                                        \
+   var = f->acc & FAST_HUFFMAN_TABLE_MASK;                    \
+   var = c->fast_huffman[var];                                \
+   if (var >= 0) {                                            \
+      int n = c->codeword_lengths[var];                       \
+      f->acc >>= n;                                           \
+      f->valid_bits -= n;                                     \
+      if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
+   } else {                                                   \
+      var = codebook_decode_scalar_raw(f,c);                  \
+   }
+
+#else
+
+static int codebook_decode_scalar(vorb *f, Codebook *c)
+{
+   int i;
+   if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
+      prep_huffman(f);
+   // fast huffman table lookup
+   i = f->acc & FAST_HUFFMAN_TABLE_MASK;
+   i = c->fast_huffman[i];
+   if (i >= 0) {
+      f->acc >>= c->codeword_lengths[i];
+      f->valid_bits -= c->codeword_lengths[i];
+      if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
+      return i;
+   }
+   return codebook_decode_scalar_raw(f,c);
+}
+
+#define DECODE_RAW(var,f,c)    var = codebook_decode_scalar(f,c);
+
+#endif
+
+#define DECODE(var,f,c)                                       \
+   DECODE_RAW(var,f,c)                                        \
+   if (c->sparse) var = c->sorted_values[var];
+
+#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
+  #define DECODE_VQ(var,f,c)   DECODE_RAW(var,f,c)
+#else
+  #define DECODE_VQ(var,f,c)   DECODE(var,f,c)
+#endif
+
+
+
+
+
+
+// CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
+// where we avoid one addition
+#define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off])
+#define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off])
+#define CODEBOOK_ELEMENT_BASE(c)         (0)
+
+static int codebook_decode_start(vorb *f, Codebook *c)
+{
+   int z = -1;
+
+   // type 0 is only legal in a scalar context
+   if (c->lookup_type == 0)
+      error(f, VORBIS_invalid_stream);
+   else {
+      DECODE_VQ(z,f,c);
+      if (c->sparse) assert(z < c->sorted_entries);
+      if (z < 0) {  // check for EOP
+         if (!f->bytes_in_seg)
+            if (f->last_seg)
+               return z;
+         error(f, VORBIS_invalid_stream);
+      }
+   }
+   return z;
+}
+
+static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
+{
+   int i,z = codebook_decode_start(f,c);
+   if (z < 0) return FALSE;
+   if (len > c->dimensions) len = c->dimensions;
+
+#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
+   if (c->lookup_type == 1) {
+      float last = CODEBOOK_ELEMENT_BASE(c);
+      int div = 1;
+      for (i=0; i < len; ++i) {
+         int off = (z / div) % c->lookup_values;
+         float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
+         output[i] += val;
+         if (c->sequence_p) last = val + c->minimum_value;
+         div *= c->lookup_values;
+      }
+      return TRUE;
+   }
+#endif
+
+   z *= c->dimensions;
+   if (c->sequence_p) {
+      float last = CODEBOOK_ELEMENT_BASE(c);
+      for (i=0; i < len; ++i) {
+         float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
+         output[i] += val;
+         last = val + c->minimum_value;
+      }
+   } else {
+      float last = CODEBOOK_ELEMENT_BASE(c);
+      for (i=0; i < len; ++i) {
+         output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
+      }
+   }
+
+   return TRUE;
+}
+
+static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
+{
+   int i,z = codebook_decode_start(f,c);
+   float last = CODEBOOK_ELEMENT_BASE(c);
+   if (z < 0) return FALSE;
+   if (len > c->dimensions) len = c->dimensions;
+
+#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
+   if (c->lookup_type == 1) {
+      int div = 1;
+      for (i=0; i < len; ++i) {
+         int off = (z / div) % c->lookup_values;
+         float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
+         output[i*step] += val;
+         if (c->sequence_p) last = val;
+         div *= c->lookup_values;
+      }
+      return TRUE;
+   }
+#endif
+
+   z *= c->dimensions;
+   for (i=0; i < len; ++i) {
+      float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
+      output[i*step] += val;
+      if (c->sequence_p) last = val;
+   }
+
+   return TRUE;
+}
+
+static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
+{
+   int c_inter = *c_inter_p;
+   int p_inter = *p_inter_p;
+   int i,z, effective = c->dimensions;
+
+   // type 0 is only legal in a scalar context
+   if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
+
+   while (total_decode > 0) {
+      float last = CODEBOOK_ELEMENT_BASE(c);
+      DECODE_VQ(z,f,c);
+      #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
+      assert(!c->sparse || z < c->sorted_entries);
+      #endif
+      if (z < 0) {
+         if (!f->bytes_in_seg)
+            if (f->last_seg) return FALSE;
+         return error(f, VORBIS_invalid_stream);
+      }
+
+      // if this will take us off the end of the buffers, stop short!
+      // we check by computing the length of the virtual interleaved
+      // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
+      // and the length we'll be using (effective)
+      if (c_inter + p_inter*ch + effective > len * ch) {
+         effective = len*ch - (p_inter*ch - c_inter);
+      }
+
+   #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
+      if (c->lookup_type == 1) {
+         int div = 1;
+         for (i=0; i < effective; ++i) {
+            int off = (z / div) % c->lookup_values;
+            float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
+            if (outputs[c_inter])
+               outputs[c_inter][p_inter] += val;
+            if (++c_inter == ch) { c_inter = 0; ++p_inter; }
+            if (c->sequence_p) last = val;
+            div *= c->lookup_values;
+         }
+      } else
+   #endif
+      {
+         z *= c->dimensions;
+         if (c->sequence_p) {
+            for (i=0; i < effective; ++i) {
+               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
+               if (outputs[c_inter])
+                  outputs[c_inter][p_inter] += val;
+               if (++c_inter == ch) { c_inter = 0; ++p_inter; }
+               last = val;
+            }
+         } else {
+            for (i=0; i < effective; ++i) {
+               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
+               if (outputs[c_inter])
+                  outputs[c_inter][p_inter] += val;
+               if (++c_inter == ch) { c_inter = 0; ++p_inter; }
+            }
+         }
+      }
+
+      total_decode -= effective;
+   }
+   *c_inter_p = c_inter;
+   *p_inter_p = p_inter;
+   return TRUE;
+}
+
+static int predict_point(int x, int x0, int x1, int y0, int y1)
+{
+   int dy = y1 - y0;
+   int adx = x1 - x0;
+   // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
+   int err = abs(dy) * (x - x0);
+   int off = err / adx;
+   return dy < 0 ? y0 - off : y0 + off;
+}
+
+// the following table is block-copied from the specification
+static float inverse_db_table[256] =
+{
+  1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f, 
+  1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f, 
+  1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f, 
+  2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f, 
+  2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f, 
+  3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f, 
+  4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f, 
+  6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f, 
+  7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f, 
+  1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f, 
+  1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f, 
+  1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f, 
+  2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f, 
+  2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f, 
+  3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f, 
+  4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f, 
+  5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f, 
+  7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f, 
+  9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f, 
+  1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f, 
+  1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f, 
+  2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f, 
+  2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f, 
+  3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f, 
+  4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f, 
+  5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f, 
+  7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f, 
+  9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f, 
+  0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f, 
+  0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f, 
+  0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f, 
+  0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f, 
+  0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f, 
+  0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f, 
+  0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f, 
+  0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f, 
+  0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f, 
+  0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f, 
+  0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f, 
+  0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f, 
+  0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f, 
+  0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f, 
+  0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f, 
+  0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f, 
+  0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f, 
+  0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f, 
+  0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f, 
+  0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f, 
+  0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f, 
+  0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f, 
+  0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f, 
+  0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f, 
+  0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f, 
+  0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f, 
+  0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f, 
+  0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f, 
+  0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f, 
+  0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f, 
+  0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f, 
+  0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f, 
+  0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f, 
+  0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f, 
+  0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f, 
+  0.82788260f,    0.88168307f,    0.9389798f,     1.0f
+};
+
+
+// @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
+// note that you must produce bit-identical output to decode correctly;
+// this specific sequence of operations is specified in the spec (it's
+// drawing integer-quantized frequency-space lines that the encoder
+// expects to be exactly the same)
+//     ... also, isn't the whole point of Bresenham's algorithm to NOT
+// have to divide in the setup? sigh.
+#ifndef STB_VORBIS_NO_DEFER_FLOOR
+#define LINE_OP(a,b)   a *= b
+#else
+#define LINE_OP(a,b)   a = b
+#endif
+
+#ifdef STB_VORBIS_DIVIDE_TABLE
+#define DIVTAB_NUMER   32
+#define DIVTAB_DENOM   64
+int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
+#endif
+
+static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
+{
+   int dy = y1 - y0;
+   int adx = x1 - x0;
+   int ady = abs(dy);
+   int base;
+   int x=x0,y=y0;
+   int err = 0;
+   int sy;
+
+#ifdef STB_VORBIS_DIVIDE_TABLE
+   if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
+      if (dy < 0) {
+         base = -integer_divide_table[ady][adx];
+         sy = base-1;
+      } else {
+         base =  integer_divide_table[ady][adx];
+         sy = base+1;
+      }
+   } else {
+      base = dy / adx;
+      if (dy < 0)
+         sy = base - 1;
+      else
+         sy = base+1;
+   }
+#else
+   base = dy / adx;
+   if (dy < 0)
+      sy = base - 1;
+   else
+      sy = base+1;
+#endif
+   ady -= abs(base) * adx;
+   if (x1 > n) x1 = n;
+   if (x < x1) {
+      LINE_OP(output[x], inverse_db_table[y]);
+      for (++x; x < x1; ++x) {
+         err += ady;
+         if (err >= adx) {
+            err -= adx;
+            y += sy;
+         } else
+            y += base;
+         LINE_OP(output[x], inverse_db_table[y]);
+      }
+   }
+}
+
+static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
+{
+   int k;
+   if (rtype == 0) {
+      int step = n / book->dimensions;
+      for (k=0; k < step; ++k)
+         if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
+            return FALSE;
+   } else {
+      for (k=0; k < n; ) {
+         if (!codebook_decode(f, book, target+offset, n-k))
+            return FALSE;
+         k += book->dimensions;
+         offset += book->dimensions;
+      }
+   }
+   return TRUE;
+}
+
+static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
+{
+   int i,j,pass;
+   Residue *r = f->residue_config + rn;
+   int rtype = f->residue_types[rn];
+   int c = r->classbook;
+   int classwords = f->codebooks[c].dimensions;
+   int n_read = r->end - r->begin;
+   int part_read = n_read / r->part_size;
+   int temp_alloc_point = temp_alloc_save(f);
+   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+   uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
+   #else
+   int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
+   #endif
+
+   CHECK(f);
+
+   for (i=0; i < ch; ++i)
+      if (!do_not_decode[i])
+         memset(residue_buffers[i], 0, sizeof(float) * n);
+
+   if (rtype == 2 && ch != 1) {
+      for (j=0; j < ch; ++j)
+         if (!do_not_decode[j])
+            break;
+      if (j == ch)
+         goto done;
+
+      for (pass=0; pass < 8; ++pass) {
+         int pcount = 0, class_set = 0;
+         if (ch == 2) {
+            while (pcount < part_read) {
+               int z = r->begin + pcount*r->part_size;
+               int c_inter = (z & 1), p_inter = z>>1;
+               if (pass == 0) {
+                  Codebook *c = f->codebooks+r->classbook;
+                  int q;
+                  DECODE(q,f,c);
+                  if (q == EOP) goto done;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  part_classdata[0][class_set] = r->classdata[q];
+                  #else
+                  for (i=classwords-1; i >= 0; --i) {
+                     classifications[0][i+pcount] = q % r->classifications;
+                     q /= r->classifications;
+                  }
+                  #endif
+               }
+               for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
+                  int z = r->begin + pcount*r->part_size;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  int c = part_classdata[0][class_set][i];
+                  #else
+                  int c = classifications[0][pcount];
+                  #endif
+                  int b = r->residue_books[c][pass];
+                  if (b >= 0) {
+                     Codebook *book = f->codebooks + b;
+                     #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
+                     if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
+                        goto done;
+                     #else
+                     // saves 1%
+                     if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
+                        goto done;
+                     #endif
+                  } else {
+                     z += r->part_size;
+                     c_inter = z & 1;
+                     p_inter = z >> 1;
+                  }
+               }
+               #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+               ++class_set;
+               #endif
+            }
+         } else if (ch == 1) {
+            while (pcount < part_read) {
+               int z = r->begin + pcount*r->part_size;
+               int c_inter = 0, p_inter = z;
+               if (pass == 0) {
+                  Codebook *c = f->codebooks+r->classbook;
+                  int q;
+                  DECODE(q,f,c);
+                  if (q == EOP) goto done;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  part_classdata[0][class_set] = r->classdata[q];
+                  #else
+                  for (i=classwords-1; i >= 0; --i) {
+                     classifications[0][i+pcount] = q % r->classifications;
+                     q /= r->classifications;
+                  }
+                  #endif
+               }
+               for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
+                  int z = r->begin + pcount*r->part_size;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  int c = part_classdata[0][class_set][i];
+                  #else
+                  int c = classifications[0][pcount];
+                  #endif
+                  int b = r->residue_books[c][pass];
+                  if (b >= 0) {
+                     Codebook *book = f->codebooks + b;
+                     if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
+                        goto done;
+                  } else {
+                     z += r->part_size;
+                     c_inter = 0;
+                     p_inter = z;
+                  }
+               }
+               #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+               ++class_set;
+               #endif
+            }
+         } else {
+            while (pcount < part_read) {
+               int z = r->begin + pcount*r->part_size;
+               int c_inter = z % ch, p_inter = z/ch;
+               if (pass == 0) {
+                  Codebook *c = f->codebooks+r->classbook;
+                  int q;
+                  DECODE(q,f,c);
+                  if (q == EOP) goto done;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  part_classdata[0][class_set] = r->classdata[q];
+                  #else
+                  for (i=classwords-1; i >= 0; --i) {
+                     classifications[0][i+pcount] = q % r->classifications;
+                     q /= r->classifications;
+                  }
+                  #endif
+               }
+               for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
+                  int z = r->begin + pcount*r->part_size;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  int c = part_classdata[0][class_set][i];
+                  #else
+                  int c = classifications[0][pcount];
+                  #endif
+                  int b = r->residue_books[c][pass];
+                  if (b >= 0) {
+                     Codebook *book = f->codebooks + b;
+                     if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
+                        goto done;
+                  } else {
+                     z += r->part_size;
+                     c_inter = z % ch;
+                     p_inter = z / ch;
+                  }
+               }
+               #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+               ++class_set;
+               #endif
+            }
+         }
+      }
+      goto done;
+   }
+   CHECK(f);
+
+   for (pass=0; pass < 8; ++pass) {
+      int pcount = 0, class_set=0;
+      while (pcount < part_read) {
+         if (pass == 0) {
+            for (j=0; j < ch; ++j) {
+               if (!do_not_decode[j]) {
+                  Codebook *c = f->codebooks+r->classbook;
+                  int temp;
+                  DECODE(temp,f,c);
+                  if (temp == EOP) goto done;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  part_classdata[j][class_set] = r->classdata[temp];
+                  #else
+                  for (i=classwords-1; i >= 0; --i) {
+                     classifications[j][i+pcount] = temp % r->classifications;
+                     temp /= r->classifications;
+                  }
+                  #endif
+               }
+            }
+         }
+         for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
+            for (j=0; j < ch; ++j) {
+               if (!do_not_decode[j]) {
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  int c = part_classdata[j][class_set][i];
+                  #else
+                  int c = classifications[j][pcount];
+                  #endif
+                  int b = r->residue_books[c][pass];
+                  if (b >= 0) {
+                     float *target = residue_buffers[j];
+                     int offset = r->begin + pcount * r->part_size;
+                     int n = r->part_size;
+                     Codebook *book = f->codebooks + b;
+                     if (!residue_decode(f, book, target, offset, n, rtype))
+                        goto done;
+                  }
+               }
+            }
+         }
+         #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+         ++class_set;
+         #endif
+      }
+   }
+  done:
+   CHECK(f);
+   temp_alloc_restore(f,temp_alloc_point);
+}
+
+
+#if 0
+// slow way for debugging
+void inverse_mdct_slow(float *buffer, int n)
+{
+   int i,j;
+   int n2 = n >> 1;
+   float *x = (float *) malloc(sizeof(*x) * n2);
+   memcpy(x, buffer, sizeof(*x) * n2);
+   for (i=0; i < n; ++i) {
+      float acc = 0;
+      for (j=0; j < n2; ++j)
+         // formula from paper:
+         //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
+         // formula from wikipedia
+         //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
+         // these are equivalent, except the formula from the paper inverts the multiplier!
+         // however, what actually works is NO MULTIPLIER!?!
+         //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
+         acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
+      buffer[i] = acc;
+   }
+   free(x);
+}
+#elif 0
+// same as above, but just barely able to run in real time on modern machines
+void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
+{
+   float mcos[16384];
+   int i,j;
+   int n2 = n >> 1, nmask = (n << 2) -1;
+   float *x = (float *) malloc(sizeof(*x) * n2);
+   memcpy(x, buffer, sizeof(*x) * n2);
+   for (i=0; i < 4*n; ++i)
+      mcos[i] = (float) cos(M_PI / 2 * i / n);
+
+   for (i=0; i < n; ++i) {
+      float acc = 0;
+      for (j=0; j < n2; ++j)
+         acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
+      buffer[i] = acc;
+   }
+   free(x);
+}
+#elif 0
+// transform to use a slow dct-iv; this is STILL basically trivial,
+// but only requires half as many ops
+void dct_iv_slow(float *buffer, int n)
+{
+   float mcos[16384];
+   float x[2048];
+   int i,j;
+   int n2 = n >> 1, nmask = (n << 3) - 1;
+   memcpy(x, buffer, sizeof(*x) * n);
+   for (i=0; i < 8*n; ++i)
+      mcos[i] = (float) cos(M_PI / 4 * i / n);
+   for (i=0; i < n; ++i) {
+      float acc = 0;
+      for (j=0; j < n; ++j)
+         acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
+      buffer[i] = acc;
+   }
+}
+
+void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
+{
+   int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
+   float temp[4096];
+
+   memcpy(temp, buffer, n2 * sizeof(float));
+   dct_iv_slow(temp, n2);  // returns -c'-d, a-b'
+
+   for (i=0; i < n4  ; ++i) buffer[i] = temp[i+n4];            // a-b'
+   for (   ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1];   // b-a', c+d'
+   for (   ; i < n   ; ++i) buffer[i] = -temp[i - n3_4];       // c'+d
+}
+#endif
+
+#ifndef LIBVORBIS_MDCT
+#define LIBVORBIS_MDCT 0
+#endif
+
+#if LIBVORBIS_MDCT
+// directly call the vorbis MDCT using an interface documented
+// by Jeff Roberts... useful for performance comparison
+typedef struct 
+{
+  int n;
+  int log2n;
+  
+  float *trig;
+  int   *bitrev;
+
+  float scale;
+} mdct_lookup;
+
+extern void mdct_init(mdct_lookup *lookup, int n);
+extern void mdct_clear(mdct_lookup *l);
+extern void mdct_backward(mdct_lookup *init, float *in, float *out);
+
+mdct_lookup M1,M2;
+
+void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
+{
+   mdct_lookup *M;
+   if (M1.n == n) M = &M1;
+   else if (M2.n == n) M = &M2;
+   else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
+   else { 
+      if (M2.n) __asm int 3;
+      mdct_init(&M2, n);
+      M = &M2;
+   }
+
+   mdct_backward(M, buffer, buffer);
+}
+#endif
+
+
+// the following were split out into separate functions while optimizing;
+// they could be pushed back up but eh. __forceinline showed no change;
+// they're probably already being inlined.
+static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
+{
+   float *ee0 = e + i_off;
+   float *ee2 = ee0 + k_off;
+   int i;
+
+   assert((n & 3) == 0);
+   for (i=(n>>2); i > 0; --i) {
+      float k00_20, k01_21;
+      k00_20  = ee0[ 0] - ee2[ 0];
+      k01_21  = ee0[-1] - ee2[-1];
+      ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
+      ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
+      ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
+      ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
+      A += 8;
+
+      k00_20  = ee0[-2] - ee2[-2];
+      k01_21  = ee0[-3] - ee2[-3];
+      ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
+      ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
+      ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
+      ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
+      A += 8;
+
+      k00_20  = ee0[-4] - ee2[-4];
+      k01_21  = ee0[-5] - ee2[-5];
+      ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
+      ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
+      ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
+      ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
+      A += 8;
+
+      k00_20  = ee0[-6] - ee2[-6];
+      k01_21  = ee0[-7] - ee2[-7];
+      ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
+      ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
+      ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
+      ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
+      A += 8;
+      ee0 -= 8;
+      ee2 -= 8;
+   }
+}
+
+static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
+{
+   int i;
+   float k00_20, k01_21;
+
+   float *e0 = e + d0;
+   float *e2 = e0 + k_off;
+
+   for (i=lim >> 2; i > 0; --i) {
+      k00_20 = e0[-0] - e2[-0];
+      k01_21 = e0[-1] - e2[-1];
+      e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
+      e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
+      e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
+      e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
+
+      A += k1;
+
+      k00_20 = e0[-2] - e2[-2];
+      k01_21 = e0[-3] - e2[-3];
+      e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
+      e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
+      e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
+      e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
+
+      A += k1;
+
+      k00_20 = e0[-4] - e2[-4];
+      k01_21 = e0[-5] - e2[-5];
+      e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
+      e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
+      e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
+      e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
+
+      A += k1;
+
+      k00_20 = e0[-6] - e2[-6];
+      k01_21 = e0[-7] - e2[-7];
+      e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
+      e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
+      e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
+      e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
+
+      e0 -= 8;
+      e2 -= 8;
+
+      A += k1;
+   }
+}
+
+static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
+{
+   int i;
+   float A0 = A[0];
+   float A1 = A[0+1];
+   float A2 = A[0+a_off];
+   float A3 = A[0+a_off+1];
+   float A4 = A[0+a_off*2+0];
+   float A5 = A[0+a_off*2+1];
+   float A6 = A[0+a_off*3+0];
+   float A7 = A[0+a_off*3+1];
+
+   float k00,k11;
+
+   float *ee0 = e  +i_off;
+   float *ee2 = ee0+k_off;
+
+   for (i=n; i > 0; --i) {
+      k00     = ee0[ 0] - ee2[ 0];
+      k11     = ee0[-1] - ee2[-1];
+      ee0[ 0] =  ee0[ 0] + ee2[ 0];
+      ee0[-1] =  ee0[-1] + ee2[-1];
+      ee2[ 0] = (k00) * A0 - (k11) * A1;
+      ee2[-1] = (k11) * A0 + (k00) * A1;
+
+      k00     = ee0[-2] - ee2[-2];
+      k11     = ee0[-3] - ee2[-3];
+      ee0[-2] =  ee0[-2] + ee2[-2];
+      ee0[-3] =  ee0[-3] + ee2[-3];
+      ee2[-2] = (k00) * A2 - (k11) * A3;
+      ee2[-3] = (k11) * A2 + (k00) * A3;
+
+      k00     = ee0[-4] - ee2[-4];
+      k11     = ee0[-5] - ee2[-5];
+      ee0[-4] =  ee0[-4] + ee2[-4];
+      ee0[-5] =  ee0[-5] + ee2[-5];
+      ee2[-4] = (k00) * A4 - (k11) * A5;
+      ee2[-5] = (k11) * A4 + (k00) * A5;
+
+      k00     = ee0[-6] - ee2[-6];
+      k11     = ee0[-7] - ee2[-7];
+      ee0[-6] =  ee0[-6] + ee2[-6];
+      ee0[-7] =  ee0[-7] + ee2[-7];
+      ee2[-6] = (k00) * A6 - (k11) * A7;
+      ee2[-7] = (k11) * A6 + (k00) * A7;
+
+      ee0 -= k0;
+      ee2 -= k0;
+   }
+}
+
+static __forceinline void iter_54(float *z)
+{
+   float k00,k11,k22,k33;
+   float y0,y1,y2,y3;
+
+   k00  = z[ 0] - z[-4];
+   y0   = z[ 0] + z[-4];
+   y2   = z[-2] + z[-6];
+   k22  = z[-2] - z[-6];
+
+   z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
+   z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
+
+   // done with y0,y2
+
+   k33  = z[-3] - z[-7];
+
+   z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
+   z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
+
+   // done with k33
+
+   k11  = z[-1] - z[-5];
+   y1   = z[-1] + z[-5];
+   y3   = z[-3] + z[-7];
+
+   z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
+   z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
+   z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
+   z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
+}
+
+static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
+{
+   int a_off = base_n >> 3;
+   float A2 = A[0+a_off];
+   float *z = e + i_off;
+   float *base = z - 16 * n;
+
+   while (z > base) {
+      float k00,k11;
+
+      k00   = z[-0] - z[-8];
+      k11   = z[-1] - z[-9];
+      z[-0] = z[-0] + z[-8];
+      z[-1] = z[-1] + z[-9];
+      z[-8] =  k00;
+      z[-9] =  k11 ;
+
+      k00    = z[ -2] - z[-10];
+      k11    = z[ -3] - z[-11];
+      z[ -2] = z[ -2] + z[-10];
+      z[ -3] = z[ -3] + z[-11];
+      z[-10] = (k00+k11) * A2;
+      z[-11] = (k11-k00) * A2;
+
+      k00    = z[-12] - z[ -4];  // reverse to avoid a unary negation
+      k11    = z[ -5] - z[-13];
+      z[ -4] = z[ -4] + z[-12];
+      z[ -5] = z[ -5] + z[-13];
+      z[-12] = k11;
+      z[-13] = k00;
+
+      k00    = z[-14] - z[ -6];  // reverse to avoid a unary negation
+      k11    = z[ -7] - z[-15];
+      z[ -6] = z[ -6] + z[-14];
+      z[ -7] = z[ -7] + z[-15];
+      z[-14] = (k00+k11) * A2;
+      z[-15] = (k00-k11) * A2;
+
+      iter_54(z);
+      iter_54(z-8);
+      z -= 16;
+   }
+}
+
+static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
+{
+   int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
+   int ld;
+   // @OPTIMIZE: reduce register pressure by using fewer variables?
+   int save_point = temp_alloc_save(f);
+   float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
+   float *u=NULL,*v=NULL;
+   // twiddle factors
+   float *A = f->A[blocktype];
+
+   // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
+   // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
+
+   // kernel from paper
+
+
+   // merged:
+   //   copy and reflect spectral data
+   //   step 0
+
+   // note that it turns out that the items added together during
+   // this step are, in fact, being added to themselves (as reflected
+   // by step 0). inexplicable inefficiency! this became obvious
+   // once I combined the passes.
+
+   // so there's a missing 'times 2' here (for adding X to itself).
+   // this propogates through linearly to the end, where the numbers
+   // are 1/2 too small, and need to be compensated for.
+
+   {
+      float *d,*e, *AA, *e_stop;
+      d = &buf2[n2-2];
+      AA = A;
+      e = &buffer[0];
+      e_stop = &buffer[n2];
+      while (e != e_stop) {
+         d[1] = (e[0] * AA[0] - e[2]*AA[1]);
+         d[0] = (e[0] * AA[1] + e[2]*AA[0]);
+         d -= 2;
+         AA += 2;
+         e += 4;
+      }
+
+      e = &buffer[n2-3];
+      while (d >= buf2) {
+         d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
+         d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
+         d -= 2;
+         AA += 2;
+         e -= 4;
+      }
+   }
+
+   // now we use symbolic names for these, so that we can
+   // possibly swap their meaning as we change which operations
+   // are in place
+
+   u = buffer;
+   v = buf2;
+
+   // step 2    (paper output is w, now u)
+   // this could be in place, but the data ends up in the wrong
+   // place... _somebody_'s got to swap it, so this is nominated
+   {
+      float *AA = &A[n2-8];
+      float *d0,*d1, *e0, *e1;
+
+      e0 = &v[n4];
+      e1 = &v[0];
+
+      d0 = &u[n4];
+      d1 = &u[0];
+
+      while (AA >= A) {
+         float v40_20, v41_21;
+
+         v41_21 = e0[1] - e1[1];
+         v40_20 = e0[0] - e1[0];
+         d0[1]  = e0[1] + e1[1];
+         d0[0]  = e0[0] + e1[0];
+         d1[1]  = v41_21*AA[4] - v40_20*AA[5];
+         d1[0]  = v40_20*AA[4] + v41_21*AA[5];
+
+         v41_21 = e0[3] - e1[3];
+         v40_20 = e0[2] - e1[2];
+         d0[3]  = e0[3] + e1[3];
+         d0[2]  = e0[2] + e1[2];
+         d1[3]  = v41_21*AA[0] - v40_20*AA[1];
+         d1[2]  = v40_20*AA[0] + v41_21*AA[1];
+
+         AA -= 8;
+
+         d0 += 4;
+         d1 += 4;
+         e0 += 4;
+         e1 += 4;
+      }
+   }
+
+   // step 3
+   ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
+
+   // optimized step 3:
+
+   // the original step3 loop can be nested r inside s or s inside r;
+   // it's written originally as s inside r, but this is dumb when r
+   // iterates many times, and s few. So I have two copies of it and
+   // switch between them halfway.
+
+   // this is iteration 0 of step 3
+   imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
+   imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
+
+   // this is iteration 1 of step 3
+   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
+   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
+   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
+   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
+
+   l=2;
+   for (; l < (ld-3)>>1; ++l) {
+      int k0 = n >> (l+2), k0_2 = k0>>1;
+      int lim = 1 << (l+1);
+      int i;
+      for (i=0; i < lim; ++i)
+         imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
+   }
+
+   for (; l < ld-6; ++l) {
+      int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
+      int rlim = n >> (l+6), r;
+      int lim = 1 << (l+1);
+      int i_off;
+      float *A0 = A;
+      i_off = n2-1;
+      for (r=rlim; r > 0; --r) {
+         imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
+         A0 += k1*4;
+         i_off -= 8;
+      }
+   }
+
+   // iterations with count:
+   //   ld-6,-5,-4 all interleaved together
+   //       the big win comes from getting rid of needless flops
+   //         due to the constants on pass 5 & 4 being all 1 and 0;
+   //       combining them to be simultaneous to improve cache made little difference
+   imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
+
+   // output is u
+
+   // step 4, 5, and 6
+   // cannot be in-place because of step 5
+   {
+      uint16 *bitrev = f->bit_reverse[blocktype];
+      // weirdly, I'd have thought reading sequentially and writing
+      // erratically would have been better than vice-versa, but in
+      // fact that's not what my testing showed. (That is, with
+      // j = bitreverse(i), do you read i and write j, or read j and write i.)
+
+      float *d0 = &v[n4-4];
+      float *d1 = &v[n2-4];
+      while (d0 >= v) {
+         int k4;
+
+         k4 = bitrev[0];
+         d1[3] = u[k4+0];
+         d1[2] = u[k4+1];
+         d0[3] = u[k4+2];
+         d0[2] = u[k4+3];
+
+         k4 = bitrev[1];
+         d1[1] = u[k4+0];
+         d1[0] = u[k4+1];
+         d0[1] = u[k4+2];
+         d0[0] = u[k4+3];
+         
+         d0 -= 4;
+         d1 -= 4;
+         bitrev += 2;
+      }
+   }
+   // (paper output is u, now v)
+
+
+   // data must be in buf2
+   assert(v == buf2);
+
+   // step 7   (paper output is v, now v)
+   // this is now in place
+   {
+      float *C = f->C[blocktype];
+      float *d, *e;
+
+      d = v;
+      e = v + n2 - 4;
+
+      while (d < e) {
+         float a02,a11,b0,b1,b2,b3;
+
+         a02 = d[0] - e[2];
+         a11 = d[1] + e[3];
+
+         b0 = C[1]*a02 + C[0]*a11;
+         b1 = C[1]*a11 - C[0]*a02;
+
+         b2 = d[0] + e[ 2];
+         b3 = d[1] - e[ 3];
+
+         d[0] = b2 + b0;
+         d[1] = b3 + b1;
+         e[2] = b2 - b0;
+         e[3] = b1 - b3;
+
+         a02 = d[2] - e[0];
+         a11 = d[3] + e[1];
+
+         b0 = C[3]*a02 + C[2]*a11;
+         b1 = C[3]*a11 - C[2]*a02;
+
+         b2 = d[2] + e[ 0];
+         b3 = d[3] - e[ 1];
+
+         d[2] = b2 + b0;
+         d[3] = b3 + b1;
+         e[0] = b2 - b0;
+         e[1] = b1 - b3;
+
+         C += 4;
+         d += 4;
+         e -= 4;
+      }
+   }
+
+   // data must be in buf2
+
+
+   // step 8+decode   (paper output is X, now buffer)
+   // this generates pairs of data a la 8 and pushes them directly through
+   // the decode kernel (pushing rather than pulling) to avoid having
+   // to make another pass later
+
+   // this cannot POSSIBLY be in place, so we refer to the buffers directly
+
+   {
+      float *d0,*d1,*d2,*d3;
+
+      float *B = f->B[blocktype] + n2 - 8;
+      float *e = buf2 + n2 - 8;
+      d0 = &buffer[0];
+      d1 = &buffer[n2-4];
+      d2 = &buffer[n2];
+      d3 = &buffer[n-4];
+      while (e >= v) {
+         float p0,p1,p2,p3;
+
+         p3 =  e[6]*B[7] - e[7]*B[6];
+         p2 = -e[6]*B[6] - e[7]*B[7]; 
+
+         d0[0] =   p3;
+         d1[3] = - p3;
+         d2[0] =   p2;
+         d3[3] =   p2;
+
+         p1 =  e[4]*B[5] - e[5]*B[4];
+         p0 = -e[4]*B[4] - e[5]*B[5]; 
+
+         d0[1] =   p1;
+         d1[2] = - p1;
+         d2[1] =   p0;
+         d3[2] =   p0;
+
+         p3 =  e[2]*B[3] - e[3]*B[2];
+         p2 = -e[2]*B[2] - e[3]*B[3]; 
+
+         d0[2] =   p3;
+         d1[1] = - p3;
+         d2[2] =   p2;
+         d3[1] =   p2;
+
+         p1 =  e[0]*B[1] - e[1]*B[0];
+         p0 = -e[0]*B[0] - e[1]*B[1]; 
+
+         d0[3] =   p1;
+         d1[0] = - p1;
+         d2[3] =   p0;
+         d3[0] =   p0;
+
+         B -= 8;
+         e -= 8;
+         d0 += 4;
+         d2 += 4;
+         d1 -= 4;
+         d3 -= 4;
+      }
+   }
+
+   temp_alloc_restore(f,save_point);
+}
+
+#if 0
+// this is the original version of the above code, if you want to optimize it from scratch
+void inverse_mdct_naive(float *buffer, int n)
+{
+   float s;
+   float A[1 << 12], B[1 << 12], C[1 << 11];
+   int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
+   int n3_4 = n - n4, ld;
+   // how can they claim this only uses N words?!
+   // oh, because they're only used sparsely, whoops
+   float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
+   // set up twiddle factors
+
+   for (k=k2=0; k < n4; ++k,k2+=2) {
+      A[k2  ] = (float)  cos(4*k*M_PI/n);
+      A[k2+1] = (float) -sin(4*k*M_PI/n);
+      B[k2  ] = (float)  cos((k2+1)*M_PI/n/2);
+      B[k2+1] = (float)  sin((k2+1)*M_PI/n/2);
+   }
+   for (k=k2=0; k < n8; ++k,k2+=2) {
+      C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
+      C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
+   }
+
+   // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
+   // Note there are bugs in that pseudocode, presumably due to them attempting
+   // to rename the arrays nicely rather than representing the way their actual
+   // implementation bounces buffers back and forth. As a result, even in the
+   // "some formulars corrected" version, a direct implementation fails. These
+   // are noted below as "paper bug".
+
+   // copy and reflect spectral data
+   for (k=0; k < n2; ++k) u[k] = buffer[k];
+   for (   ; k < n ; ++k) u[k] = -buffer[n - k - 1];
+   // kernel from paper
+   // step 1
+   for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
+      v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2]   - (u[k4+2] - u[n-k4-3])*A[k2+1];
+      v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
+   }
+   // step 2
+   for (k=k4=0; k < n8; k+=1, k4+=4) {
+      w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
+      w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
+      w[k4+3]    = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
+      w[k4+1]    = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
+   }
+   // step 3
+   ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
+   for (l=0; l < ld-3; ++l) {
+      int k0 = n >> (l+2), k1 = 1 << (l+3);
+      int rlim = n >> (l+4), r4, r;
+      int s2lim = 1 << (l+2), s2;
+      for (r=r4=0; r < rlim; r4+=4,++r) {
+         for (s2=0; s2 < s2lim; s2+=2) {
+            u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
+            u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
+            u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
+                                - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
+            u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
+                                + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
+         }
+      }
+      if (l+1 < ld-3) {
+         // paper bug: ping-ponging of u&w here is omitted
+         memcpy(w, u, sizeof(u));
+      }
+   }
+
+   // step 4
+   for (i=0; i < n8; ++i) {
+      int j = bit_reverse(i) >> (32-ld+3);
+      assert(j < n8);
+      if (i == j) {
+         // paper bug: original code probably swapped in place; if copying,
+         //            need to directly copy in this case
+         int i8 = i << 3;
+         v[i8+1] = u[i8+1];
+         v[i8+3] = u[i8+3];
+         v[i8+5] = u[i8+5];
+         v[i8+7] = u[i8+7];
+      } else if (i < j) {
+         int i8 = i << 3, j8 = j << 3;
+         v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
+         v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
+         v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
+         v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
+      }
+   }
+   // step 5
+   for (k=0; k < n2; ++k) {
+      w[k] = v[k*2+1];
+   }
+   // step 6
+   for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
+      u[n-1-k2] = w[k4];
+      u[n-2-k2] = w[k4+1];
+      u[n3_4 - 1 - k2] = w[k4+2];
+      u[n3_4 - 2 - k2] = w[k4+3];
+   }
+   // step 7
+   for (k=k2=0; k < n8; ++k, k2 += 2) {
+      v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
+      v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
+      v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
+      v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
+   }
+   // step 8
+   for (k=k2=0; k < n4; ++k,k2 += 2) {
+      X[k]      = v[k2+n2]*B[k2  ] + v[k2+1+n2]*B[k2+1];
+      X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2  ];
+   }
+
+   // decode kernel to output
+   // determined the following value experimentally
+   // (by first figuring out what made inverse_mdct_slow work); then matching that here
+   // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
+   s = 0.5; // theoretically would be n4
+
+   // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
+   //     so it needs to use the "old" B values to behave correctly, or else
+   //     set s to 1.0 ]]]
+   for (i=0; i < n4  ; ++i) buffer[i] = s * X[i+n4];
+   for (   ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
+   for (   ; i < n   ; ++i) buffer[i] = -s * X[i - n3_4];
+}
+#endif
+
+static float *get_window(vorb *f, int len)
+{
+   len <<= 1;
+   if (len == f->blocksize_0) return f->window[0];
+   if (len == f->blocksize_1) return f->window[1];
+   assert(0);
+   return NULL;
+}
+
+#ifndef STB_VORBIS_NO_DEFER_FLOOR
+typedef int16 YTYPE;
+#else
+typedef int YTYPE;
+#endif
+static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
+{
+   int n2 = n >> 1;
+   int s = map->chan[i].mux, floor;
+   floor = map->submap_floor[s];
+   if (f->floor_types[floor] == 0) {
+      return error(f, VORBIS_invalid_stream);
+   } else {
+      Floor1 *g = &f->floor_config[floor].floor1;
+      int j,q;
+      int lx = 0, ly = finalY[0] * g->floor1_multiplier;
+      for (q=1; q < g->values; ++q) {
+         j = g->sorted_order[q];
+         #ifndef STB_VORBIS_NO_DEFER_FLOOR
+         if (finalY[j] >= 0)
+         #else
+         if (step2_flag[j])
+         #endif
+         {
+            int hy = finalY[j] * g->floor1_multiplier;
+            int hx = g->Xlist[j];
+            if (lx != hx)
+               draw_line(target, lx,ly, hx,hy, n2);
+            CHECK(f);
+            lx = hx, ly = hy;
+         }
+      }
+      if (lx < n2) {
+         // optimization of: draw_line(target, lx,ly, n,ly, n2);
+         for (j=lx; j < n2; ++j)
+            LINE_OP(target[j], inverse_db_table[ly]);
+         CHECK(f);
+      }
+   }
+   return TRUE;
+}
+
+// The meaning of "left" and "right"
+//
+// For a given frame:
+//     we compute samples from 0..n
+//     window_center is n/2
+//     we'll window and mix the samples from left_start to left_end with data from the previous frame
+//     all of the samples from left_end to right_start can be output without mixing; however,
+//        this interval is 0-length except when transitioning between short and long frames
+//     all of the samples from right_start to right_end need to be mixed with the next frame,
+//        which we don't have, so those get saved in a buffer
+//     frame N's right_end-right_start, the number of samples to mix with the next frame,
+//        has to be the same as frame N+1's left_end-left_start (which they are by
+//        construction)
+
+static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
+{
+   Mode *m;
+   int i, n, prev, next, window_center;
+   f->channel_buffer_start = f->channel_buffer_end = 0;
+
+  retry:
+   if (f->eof) return FALSE;
+   if (!maybe_start_packet(f))
+      return FALSE;
+   // check packet type
+   if (get_bits(f,1) != 0) {
+      if (IS_PUSH_MODE(f))
+         return error(f,VORBIS_bad_packet_type);
+      while (EOP != get8_packet(f));
+      goto retry;
+   }
+
+   if (f->alloc.alloc_buffer)
+      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
+
+   i = get_bits(f, ilog(f->mode_count-1));
+   if (i == EOP) return FALSE;
+   if (i >= f->mode_count) return FALSE;
+   *mode = i;
+   m = f->mode_config + i;
+   if (m->blockflag) {
+      n = f->blocksize_1;
+      prev = get_bits(f,1);
+      next = get_bits(f,1);
+   } else {
+      prev = next = 0;
+      n = f->blocksize_0;
+   }
+
+// WINDOWING
+
+   window_center = n >> 1;
+   if (m->blockflag && !prev) {
+      *p_left_start = (n - f->blocksize_0) >> 2;
+      *p_left_end   = (n + f->blocksize_0) >> 2;
+   } else {
+      *p_left_start = 0;
+      *p_left_end   = window_center;
+   }
+   if (m->blockflag && !next) {
+      *p_right_start = (n*3 - f->blocksize_0) >> 2;
+      *p_right_end   = (n*3 + f->blocksize_0) >> 2;
+   } else {
+      *p_right_start = window_center;
+      *p_right_end   = n;
+   }
+
+   return TRUE;
+}
+
+static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
+{
+   Mapping *map;
+   int i,j,k,n,n2;
+   int zero_channel[256];
+   int really_zero_channel[256];
+
+// WINDOWING
+
+   n = f->blocksize[m->blockflag];
+   map = &f->mapping[m->mapping];
+
+// FLOORS
+   n2 = n >> 1;
+
+   CHECK(f);
+
+   for (i=0; i < f->channels; ++i) {
+      int s = map->chan[i].mux, floor;
+      zero_channel[i] = FALSE;
+      floor = map->submap_floor[s];
+      if (f->floor_types[floor] == 0) {
+         return error(f, VORBIS_invalid_stream);
+      } else {
+         Floor1 *g = &f->floor_config[floor].floor1;
+         if (get_bits(f, 1)) {
+            short *finalY;
+            uint8 step2_flag[256];
+            static int range_list[4] = { 256, 128, 86, 64 };
+            int range = range_list[g->floor1_multiplier-1];
+            int offset = 2;
+            finalY = f->finalY[i];
+            finalY[0] = get_bits(f, ilog(range)-1);
+            finalY[1] = get_bits(f, ilog(range)-1);
+            for (j=0; j < g->partitions; ++j) {
+               int pclass = g->partition_class_list[j];
+               int cdim = g->class_dimensions[pclass];
+               int cbits = g->class_subclasses[pclass];
+               int csub = (1 << cbits)-1;
+               int cval = 0;
+               if (cbits) {
+                  Codebook *c = f->codebooks + g->class_masterbooks[pclass];
+                  DECODE(cval,f,c);
+               }
+               for (k=0; k < cdim; ++k) {
+                  int book = g->subclass_books[pclass][cval & csub];
+                  cval = cval >> cbits;
+                  if (book >= 0) {
+                     int temp;
+                     Codebook *c = f->codebooks + book;
+                     DECODE(temp,f,c);
+                     finalY[offset++] = temp;
+                  } else
+                     finalY[offset++] = 0;
+               }
+            }
+            if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
+            step2_flag[0] = step2_flag[1] = 1;
+            for (j=2; j < g->values; ++j) {
+               int low, high, pred, highroom, lowroom, room, val;
+               low = g->neighbors[j][0];
+               high = g->neighbors[j][1];
+               //neighbors(g->Xlist, j, &low, &high);
+               pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
+               val = finalY[j];
+               highroom = range - pred;
+               lowroom = pred;
+               if (highroom < lowroom)
+                  room = highroom * 2;
+               else
+                  room = lowroom * 2;
+               if (val) {
+                  step2_flag[low] = step2_flag[high] = 1;
+                  step2_flag[j] = 1;
+                  if (val >= room)
+                     if (highroom > lowroom)
+                        finalY[j] = val - lowroom + pred;
+                     else
+                        finalY[j] = pred - val + highroom - 1;
+                  else
+                     if (val & 1)
+                        finalY[j] = pred - ((val+1)>>1);
+                     else
+                        finalY[j] = pred + (val>>1);
+               } else {
+                  step2_flag[j] = 0;
+                  finalY[j] = pred;
+               }
+            }
+
+#ifdef STB_VORBIS_NO_DEFER_FLOOR
+            do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
+#else
+            // defer final floor computation until _after_ residue
+            for (j=0; j < g->values; ++j) {
+               if (!step2_flag[j])
+                  finalY[j] = -1;
+            }
+#endif
+         } else {
+           error:
+            zero_channel[i] = TRUE;
+         }
+         // So we just defer everything else to later
+
+         // at this point we've decoded the floor into buffer
+      }
+   }
+   CHECK(f);
+   // at this point we've decoded all floors
+
+   if (f->alloc.alloc_buffer)
+      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
+
+   // re-enable coupled channels if necessary
+   memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
+   for (i=0; i < map->coupling_steps; ++i)
+      if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
+         zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
+      }
+
+   CHECK(f);
+// RESIDUE DECODE
+   for (i=0; i < map->submaps; ++i) {
+      float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
+      int r;
+      uint8 do_not_decode[256];
+      int ch = 0;
+      for (j=0; j < f->channels; ++j) {
+         if (map->chan[j].mux == i) {
+            if (zero_channel[j]) {
+               do_not_decode[ch] = TRUE;
+               residue_buffers[ch] = NULL;
+            } else {
+               do_not_decode[ch] = FALSE;
+               residue_buffers[ch] = f->channel_buffers[j];
+            }
+            ++ch;
+         }
+      }
+      r = map->submap_residue[i];
+      decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
+   }
+
+   if (f->alloc.alloc_buffer)
+      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
+   CHECK(f);
+
+// INVERSE COUPLING
+   for (i = map->coupling_steps-1; i >= 0; --i) {
+      int n2 = n >> 1;
+      float *m = f->channel_buffers[map->chan[i].magnitude];
+      float *a = f->channel_buffers[map->chan[i].angle    ];
+      for (j=0; j < n2; ++j) {
+         float a2,m2;
+         if (m[j] > 0)
+            if (a[j] > 0)
+               m2 = m[j], a2 = m[j] - a[j];
+            else
+               a2 = m[j], m2 = m[j] + a[j];
+         else
+            if (a[j] > 0)
+               m2 = m[j], a2 = m[j] + a[j];
+            else
+               a2 = m[j], m2 = m[j] - a[j];
+         m[j] = m2;
+         a[j] = a2;
+      }
+   }
+   CHECK(f);
+
+   // finish decoding the floors
+#ifndef STB_VORBIS_NO_DEFER_FLOOR
+   for (i=0; i < f->channels; ++i) {
+      if (really_zero_channel[i]) {
+         memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
+      } else {
+         do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
+      }
+   }
+#else
+   for (i=0; i < f->channels; ++i) {
+      if (really_zero_channel[i]) {
+         memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
+      } else {
+         for (j=0; j < n2; ++j)
+            f->channel_buffers[i][j] *= f->floor_buffers[i][j];
+      }
+   }
+#endif
+
+// INVERSE MDCT
+   CHECK(f);
+   for (i=0; i < f->channels; ++i)
+      inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
+   CHECK(f);
+
+   // this shouldn't be necessary, unless we exited on an error
+   // and want to flush to get to the next packet
+   flush_packet(f);
+
+   if (f->first_decode) {
+      // assume we start so first non-discarded sample is sample 0
+      // this isn't to spec, but spec would require us to read ahead
+      // and decode the size of all current frames--could be done,
+      // but presumably it's not a commonly used feature
+      f->current_loc = -n2; // start of first frame is positioned for discard
+      // we might have to discard samples "from" the next frame too,
+      // if we're lapping a large block then a small at the start?
+      f->discard_samples_deferred = n - right_end;
+      f->current_loc_valid = TRUE;
+      f->first_decode = FALSE;
+   } else if (f->discard_samples_deferred) {
+      if (f->discard_samples_deferred >= right_start - left_start) {
+         f->discard_samples_deferred -= (right_start - left_start);
+         left_start = right_start;
+         *p_left = left_start;
+      } else {
+         left_start += f->discard_samples_deferred;
+         *p_left = left_start;
+         f->discard_samples_deferred = 0;
+      }
+   } else if (f->previous_length == 0 && f->current_loc_valid) {
+      // we're recovering from a seek... that means we're going to discard
+      // the samples from this packet even though we know our position from
+      // the last page header, so we need to update the position based on
+      // the discarded samples here
+      // but wait, the code below is going to add this in itself even
+      // on a discard, so we don't need to do it here...
+   }
+
+   // check if we have ogg information about the sample # for this packet
+   if (f->last_seg_which == f->end_seg_with_known_loc) {
+      // if we have a valid current loc, and this is final:
+      if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
+         uint32 current_end = f->known_loc_for_packet - (n-right_end);
+         // then let's infer the size of the (probably) short final frame
+         if (current_end < f->current_loc + (right_end-left_start)) {
+            if (current_end < f->current_loc) {
+               // negative truncation, that's impossible!
+               *len = 0;
+            } else {
+               *len = current_end - f->current_loc;
+            }
+            *len += left_start;
+            if (*len > right_end) *len = right_end; // this should never happen
+            f->current_loc += *len;
+            return TRUE;
+         }
+      }
+      // otherwise, just set our sample loc
+      // guess that the ogg granule pos refers to the _middle_ of the
+      // last frame?
+      // set f->current_loc to the position of left_start
+      f->current_loc = f->known_loc_for_packet - (n2-left_start);
+      f->current_loc_valid = TRUE;
+   }
+   if (f->current_loc_valid)
+      f->current_loc += (right_start - left_start);
+
+   if (f->alloc.alloc_buffer)
+      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
+   *len = right_end;  // ignore samples after the window goes to 0
+   CHECK(f);
+
+   return TRUE;
+}
+
+static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
+{
+   int mode, left_end, right_end;
+   if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
+   return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
+}
+
+static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
+{
+   int prev,i,j;
+   // we use right&left (the start of the right- and left-window sin()-regions)
+   // to determine how much to return, rather than inferring from the rules
+   // (same result, clearer code); 'left' indicates where our sin() window
+   // starts, therefore where the previous window's right edge starts, and
+   // therefore where to start mixing from the previous buffer. 'right'
+   // indicates where our sin() ending-window starts, therefore that's where
+   // we start saving, and where our returned-data ends.
+
+   // mixin from previous window
+   if (f->previous_length) {
+      int i,j, n = f->previous_length;
+      float *w = get_window(f, n);
+      for (i=0; i < f->channels; ++i) {
+         for (j=0; j < n; ++j)
+            f->channel_buffers[i][left+j] =
+               f->channel_buffers[i][left+j]*w[    j] +
+               f->previous_window[i][     j]*w[n-1-j];
+      }
+   }
+
+   prev = f->previous_length;
+
+   // last half of this data becomes previous window
+   f->previous_length = len - right;
+
+   // @OPTIMIZE: could avoid this copy by double-buffering the
+   // output (flipping previous_window with channel_buffers), but
+   // then previous_window would have to be 2x as large, and
+   // channel_buffers couldn't be temp mem (although they're NOT
+   // currently temp mem, they could be (unless we want to level
+   // performance by spreading out the computation))
+   for (i=0; i < f->channels; ++i)
+      for (j=0; right+j < len; ++j)
+         f->previous_window[i][j] = f->channel_buffers[i][right+j];
+
+   if (!prev)
+      // there was no previous packet, so this data isn't valid...
+      // this isn't entirely true, only the would-have-overlapped data
+      // isn't valid, but this seems to be what the spec requires
+      return 0;
+
+   // truncate a short frame
+   if (len < right) right = len;
+
+   f->samples_output += right-left;
+
+   return right - left;
+}
+
+static int vorbis_pump_first_frame(stb_vorbis *f)
+{
+   int len, right, left, res;
+   res = vorbis_decode_packet(f, &len, &left, &right);
+   if (res)
+      vorbis_finish_frame(f, len, left, right);
+   return res;
+}
+
+#ifndef STB_VORBIS_NO_PUSHDATA_API
+static int is_whole_packet_present(stb_vorbis *f, int end_page)
+{
+   // make sure that we have the packet available before continuing...
+   // this requires a full ogg parse, but we know we can fetch from f->stream
+
+   // instead of coding this out explicitly, we could save the current read state,
+   // read the next packet with get8() until end-of-packet, check f->eof, then
+   // reset the state? but that would be slower, esp. since we'd have over 256 bytes
+   // of state to restore (primarily the page segment table)
+
+   int s = f->next_seg, first = TRUE;
+   uint8 *p = f->stream;
+
+   if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
+      for (; s < f->segment_count; ++s) {
+         p += f->segments[s];
+         if (f->segments[s] < 255)               // stop at first short segment
+            break;
+      }
+      // either this continues, or it ends it...
+      if (end_page)
+         if (s < f->segment_count-1)             return error(f, VORBIS_invalid_stream);
+      if (s == f->segment_count)
+         s = -1; // set 'crosses page' flag
+      if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
+      first = FALSE;
+   }
+   for (; s == -1;) {
+      uint8 *q; 
+      int n;
+
+      // check that we have the page header ready
+      if (p + 26 >= f->stream_end)               return error(f, VORBIS_need_more_data);
+      // validate the page
+      if (memcmp(p, ogg_page_header, 4))         return error(f, VORBIS_invalid_stream);
+      if (p[4] != 0)                             return error(f, VORBIS_invalid_stream);
+      if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
+         if (f->previous_length)
+            if ((p[5] & PAGEFLAG_continued_packet))  return error(f, VORBIS_invalid_stream);
+         // if no previous length, we're resynching, so we can come in on a continued-packet,
+         // which we'll just drop
+      } else {
+         if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
+      }
+      n = p[26]; // segment counts
+      q = p+27;  // q points to segment table
+      p = q + n; // advance past header
+      // make sure we've read the segment table
+      if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
+      for (s=0; s < n; ++s) {
+         p += q[s];
+         if (q[s] < 255)
+            break;
+      }
+      if (end_page)
+         if (s < n-1)                            return error(f, VORBIS_invalid_stream);
+      if (s == n)
+         s = -1; // set 'crosses page' flag
+      if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
+      first = FALSE;
+   }
+   return TRUE;
+}
+#endif // !STB_VORBIS_NO_PUSHDATA_API
+
+static int start_decoder(vorb *f)
+{
+   uint8 header[6], x,y;
+   int len,i,j,k, max_submaps = 0;
+   int longest_floorlist=0;
+
+   // first page, first packet
+
+   if (!start_page(f))                              return FALSE;
+   // validate page flag
+   if (!(f->page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
+   if (f->page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
+   if (f->page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
+   // check for expected packet length
+   if (f->segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
+   if (f->segments[0] != 30)                        return error(f, VORBIS_invalid_first_page);
+   // read packet
+   // check packet header
+   if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
+   if (!getn(f, header, 6))                         return error(f, VORBIS_unexpected_eof);
+   if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_first_page);
+   // vorbis_version
+   if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
+   f->channels = get8(f); if (!f->channels)         return error(f, VORBIS_invalid_first_page);
+   if (f->channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
+   f->sample_rate = get32(f); if (!f->sample_rate)  return error(f, VORBIS_invalid_first_page);
+   get32(f); // bitrate_maximum
+   get32(f); // bitrate_nominal
+   get32(f); // bitrate_minimum
+   x = get8(f);
+   {
+      int log0,log1;
+      log0 = x & 15;
+      log1 = x >> 4;
+      f->blocksize_0 = 1 << log0;
+      f->blocksize_1 = 1 << log1;
+      if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
+      if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
+      if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
+   }
+
+   // framing_flag
+   x = get8(f);
+   if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
+
+   // second packet!
+   if (!start_page(f))                              return FALSE;
+
+   if (!start_packet(f))                            return FALSE;
+   do {
+      len = next_segment(f);
+      skip(f, len);
+      f->bytes_in_seg = 0;
+   } while (len);
+
+   // third packet!
+   if (!start_packet(f))                            return FALSE;
+
+   #ifndef STB_VORBIS_NO_PUSHDATA_API
+   if (IS_PUSH_MODE(f)) {
+      if (!is_whole_packet_present(f, TRUE)) {
+         // convert error in ogg header to write type
+         if (f->error == VORBIS_invalid_stream)
+            f->error = VORBIS_invalid_setup;
+         return FALSE;
+      }
+   }
+   #endif
+
+   crc32_init(); // always init it, to avoid multithread race conditions
+
+   if (get8_packet(f) != VORBIS_packet_setup)       return error(f, VORBIS_invalid_setup);
+   for (i=0; i < 6; ++i) header[i] = get8_packet(f);
+   if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
+
+   // codebooks
+
+   f->codebook_count = get_bits(f,8) + 1;
+   f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
+   if (f->codebooks == NULL)                        return error(f, VORBIS_outofmem);
+   memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
+   for (i=0; i < f->codebook_count; ++i) {
+      uint32 *values;
+      int ordered, sorted_count;
+      int total=0;
+      uint8 *lengths;
+      Codebook *c = f->codebooks+i;
+      CHECK(f);
+      x = get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
+      x = get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
+      x = get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
+      x = get_bits(f, 8);
+      c->dimensions = (get_bits(f, 8)<<8) + x;
+      x = get_bits(f, 8);
+      y = get_bits(f, 8);
+      c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
+      ordered = get_bits(f,1);
+      c->sparse = ordered ? 0 : get_bits(f,1);
+
+      if (c->dimensions == 0 && c->entries != 0)    return error(f, VORBIS_invalid_setup);
+
+      if (c->sparse)
+         lengths = (uint8 *) setup_temp_malloc(f, c->entries);
+      else
+         lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
+
+      if (!lengths) return error(f, VORBIS_outofmem);
+
+      if (ordered) {
+         int current_entry = 0;
+         int current_length = get_bits(f,5) + 1;
+         while (current_entry < c->entries) {
+            int limit = c->entries - current_entry;
+            int n = get_bits(f, ilog(limit));
+            if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
+            memset(lengths + current_entry, current_length, n);
+            current_entry += n;
+            ++current_length;
+         }
+      } else {
+         for (j=0; j < c->entries; ++j) {
+            int present = c->sparse ? get_bits(f,1) : 1;
+            if (present) {
+               lengths[j] = get_bits(f, 5) + 1;
+               ++total;
+               if (lengths[j] == 32)
+                  return error(f, VORBIS_invalid_setup);
+            } else {
+               lengths[j] = NO_CODE;
+            }
+         }
+      }
+
+      if (c->sparse && total >= c->entries >> 2) {
+         // convert sparse items to non-sparse!
+         if (c->entries > (int) f->setup_temp_memory_required)
+            f->setup_temp_memory_required = c->entries;
+
+         c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
+         if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
+         memcpy(c->codeword_lengths, lengths, c->entries);
+         setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
+         lengths = c->codeword_lengths;
+         c->sparse = 0;
+      }
+
+      // compute the size of the sorted tables
+      if (c->sparse) {
+         sorted_count = total;
+      } else {
+         sorted_count = 0;
+         #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
+         for (j=0; j < c->entries; ++j)
+            if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
+               ++sorted_count;
+         #endif
+      }
+
+      c->sorted_entries = sorted_count;
+      values = NULL;
+
+      CHECK(f);
+      if (!c->sparse) {
+         c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
+         if (!c->codewords)                  return error(f, VORBIS_outofmem);
+      } else {
+         unsigned int size;
+         if (c->sorted_entries) {
+            c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
+            if (!c->codeword_lengths)           return error(f, VORBIS_outofmem);
+            c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
+            if (!c->codewords)                  return error(f, VORBIS_outofmem);
+            values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
+            if (!values)                        return error(f, VORBIS_outofmem);
+         }
+         size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
+         if (size > f->setup_temp_memory_required)
+            f->setup_temp_memory_required = size;
+      }
+
+      if (!compute_codewords(c, lengths, c->entries, values)) {
+         if (c->sparse) setup_temp_free(f, values, 0);
+         return error(f, VORBIS_invalid_setup);
+      }
+
+      if (c->sorted_entries) {
+         // allocate an extra slot for sentinels
+         c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
+         if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
+         // allocate an extra slot at the front so that c->sorted_values[-1] is defined
+         // so that we can catch that case without an extra if
+         c->sorted_values    = ( int   *) setup_malloc(f, sizeof(*c->sorted_values   ) * (c->sorted_entries+1));
+         if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
+         ++c->sorted_values;
+         c->sorted_values[-1] = -1;
+         compute_sorted_huffman(c, lengths, values);
+      }
+
+      if (c->sparse) {
+         setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
+         setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
+         setup_temp_free(f, lengths, c->entries);
+         c->codewords = NULL;
+      }
+
+      compute_accelerated_huffman(c);
+
+      CHECK(f);
+      c->lookup_type = get_bits(f, 4);
+      if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
+      if (c->lookup_type > 0) {
+         uint16 *mults;
+         c->minimum_value = float32_unpack(get_bits(f, 32));
+         c->delta_value = float32_unpack(get_bits(f, 32));
+         c->value_bits = get_bits(f, 4)+1;
+         c->sequence_p = get_bits(f,1);
+         if (c->lookup_type == 1) {
+            c->lookup_values = lookup1_values(c->entries, c->dimensions);
+         } else {
+            c->lookup_values = c->entries * c->dimensions;
+         }
+         if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
+         mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
+         if (mults == NULL) return error(f, VORBIS_outofmem);
+         for (j=0; j < (int) c->lookup_values; ++j) {
+            int q = get_bits(f, c->value_bits);
+            if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
+            mults[j] = q;
+         }
+
+#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
+         if (c->lookup_type == 1) {
+            int len, sparse = c->sparse;
+            float last=0;
+            // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
+            if (sparse) {
+               if (c->sorted_entries == 0) goto skip;
+               c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
+            } else
+               c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries        * c->dimensions);
+            if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
+            len = sparse ? c->sorted_entries : c->entries;
+            for (j=0; j < len; ++j) {
+               unsigned int z = sparse ? c->sorted_values[j] : j;
+               unsigned int div=1;
+               for (k=0; k < c->dimensions; ++k) {
+                  int off = (z / div) % c->lookup_values;
+                  float val = mults[off];
+                  val = mults[off]*c->delta_value + c->minimum_value + last;
+                  c->multiplicands[j*c->dimensions + k] = val;
+                  if (c->sequence_p)
+                     last = val;
+                  if (k+1 < c->dimensions) {
+                     if (div > UINT_MAX / (unsigned int) c->lookup_values) {
+                        setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
+                        return error(f, VORBIS_invalid_setup);
+                     }
+                     div *= c->lookup_values;
+                  }
+               }
+            }
+            c->lookup_type = 2;
+         }
+         else
+#endif
+         {
+            float last=0;
+            CHECK(f);
+            c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
+            if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
+            for (j=0; j < (int) c->lookup_values; ++j) {
+               float val = mults[j] * c->delta_value + c->minimum_value + last;
+               c->multiplicands[j] = val;
+               if (c->sequence_p)
+                  last = val;
+            }
+         }
+#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
+        skip:;
+#endif
+         setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
+
+         CHECK(f);
+      }
+      CHECK(f);
+   }
+
+   // time domain transfers (notused)
+
+   x = get_bits(f, 6) + 1;
+   for (i=0; i < x; ++i) {
+      uint32 z = get_bits(f, 16);
+      if (z != 0) return error(f, VORBIS_invalid_setup);
+   }
+
+   // Floors
+   f->floor_count = get_bits(f, 6)+1;
+   f->floor_config = (Floor *)  setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
+   if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
+   for (i=0; i < f->floor_count; ++i) {
+      f->floor_types[i] = get_bits(f, 16);
+      if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
+      if (f->floor_types[i] == 0) {
+         Floor0 *g = &f->floor_config[i].floor0;
+         g->order = get_bits(f,8);
+         g->rate = get_bits(f,16);
+         g->bark_map_size = get_bits(f,16);
+         g->amplitude_bits = get_bits(f,6);
+         g->amplitude_offset = get_bits(f,8);
+         g->number_of_books = get_bits(f,4) + 1;
+         for (j=0; j < g->number_of_books; ++j)
+            g->book_list[j] = get_bits(f,8);
+         return error(f, VORBIS_feature_not_supported);
+      } else {
+         stbv__floor_ordering p[31*8+2];
+         Floor1 *g = &f->floor_config[i].floor1;
+         int max_class = -1; 
+         g->partitions = get_bits(f, 5);
+         for (j=0; j < g->partitions; ++j) {
+            g->partition_class_list[j] = get_bits(f, 4);
+            if (g->partition_class_list[j] > max_class)
+               max_class = g->partition_class_list[j];
+         }
+         for (j=0; j <= max_class; ++j) {
+            g->class_dimensions[j] = get_bits(f, 3)+1;
+            g->class_subclasses[j] = get_bits(f, 2);
+            if (g->class_subclasses[j]) {
+               g->class_masterbooks[j] = get_bits(f, 8);
+               if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
+            }
+            for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
+               g->subclass_books[j][k] = get_bits(f,8)-1;
+               if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
+            }
+         }
+         g->floor1_multiplier = get_bits(f,2)+1;
+         g->rangebits = get_bits(f,4);
+         g->Xlist[0] = 0;
+         g->Xlist[1] = 1 << g->rangebits;
+         g->values = 2;
+         for (j=0; j < g->partitions; ++j) {
+            int c = g->partition_class_list[j];
+            for (k=0; k < g->class_dimensions[c]; ++k) {
+               g->Xlist[g->values] = get_bits(f, g->rangebits);
+               ++g->values;
+            }
+         }
+         // precompute the sorting
+         for (j=0; j < g->values; ++j) {
+            p[j].x = g->Xlist[j];
+            p[j].id = j;
+         }
+         qsort(p, g->values, sizeof(p[0]), point_compare);
+         for (j=0; j < g->values; ++j)
+            g->sorted_order[j] = (uint8) p[j].id;
+         // precompute the neighbors
+         for (j=2; j < g->values; ++j) {
+            int low=0,hi=0;
+            neighbors(g->Xlist, j, &low,&hi);
+            g->neighbors[j][0] = low;
+            g->neighbors[j][1] = hi;
+         }
+
+         if (g->values > longest_floorlist)
+            longest_floorlist = g->values;
+      }
+   }
+
+   // Residue
+   f->residue_count = get_bits(f, 6)+1;
+   f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
+   if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
+   memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
+   for (i=0; i < f->residue_count; ++i) {
+      uint8 residue_cascade[64];
+      Residue *r = f->residue_config+i;
+      f->residue_types[i] = get_bits(f, 16);
+      if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
+      r->begin = get_bits(f, 24);
+      r->end = get_bits(f, 24);
+      if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
+      r->part_size = get_bits(f,24)+1;
+      r->classifications = get_bits(f,6)+1;
+      r->classbook = get_bits(f,8);
+      if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
+      for (j=0; j < r->classifications; ++j) {
+         uint8 high_bits=0;
+         uint8 low_bits=get_bits(f,3);
+         if (get_bits(f,1))
+            high_bits = get_bits(f,5);
+         residue_cascade[j] = high_bits*8 + low_bits;
+      }
+      r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
+      if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
+      for (j=0; j < r->classifications; ++j) {
+         for (k=0; k < 8; ++k) {
+            if (residue_cascade[j] & (1 << k)) {
+               r->residue_books[j][k] = get_bits(f, 8);
+               if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
+            } else {
+               r->residue_books[j][k] = -1;
+            }
+         }
+      }
+      // precompute the classifications[] array to avoid inner-loop mod/divide
+      // call it 'classdata' since we already have r->classifications
+      r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
+      if (!r->classdata) return error(f, VORBIS_outofmem);
+      memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
+      for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
+         int classwords = f->codebooks[r->classbook].dimensions;
+         int temp = j;
+         r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
+         if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
+         for (k=classwords-1; k >= 0; --k) {
+            r->classdata[j][k] = temp % r->classifications;
+            temp /= r->classifications;
+         }
+      }
+   }
+
+   f->mapping_count = get_bits(f,6)+1;
+   f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
+   if (f->mapping == NULL) return error(f, VORBIS_outofmem);
+   memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
+   for (i=0; i < f->mapping_count; ++i) {
+      Mapping *m = f->mapping + i;      
+      int mapping_type = get_bits(f,16);
+      if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
+      m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
+      if (m->chan == NULL) return error(f, VORBIS_outofmem);
+      if (get_bits(f,1))
+         m->submaps = get_bits(f,4)+1;
+      else
+         m->submaps = 1;
+      if (m->submaps > max_submaps)
+         max_submaps = m->submaps;
+      if (get_bits(f,1)) {
+         m->coupling_steps = get_bits(f,8)+1;
+         for (k=0; k < m->coupling_steps; ++k) {
+            m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
+            m->chan[k].angle = get_bits(f, ilog(f->channels-1));
+            if (m->chan[k].magnitude >= f->channels)        return error(f, VORBIS_invalid_setup);
+            if (m->chan[k].angle     >= f->channels)        return error(f, VORBIS_invalid_setup);
+            if (m->chan[k].magnitude == m->chan[k].angle)   return error(f, VORBIS_invalid_setup);
+         }
+      } else
+         m->coupling_steps = 0;
+
+      // reserved field
+      if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
+      if (m->submaps > 1) {
+         for (j=0; j < f->channels; ++j) {
+            m->chan[j].mux = get_bits(f, 4);
+            if (m->chan[j].mux >= m->submaps)                return error(f, VORBIS_invalid_setup);
+         }
+      } else
+         // @SPECIFICATION: this case is missing from the spec
+         for (j=0; j < f->channels; ++j)
+            m->chan[j].mux = 0;
+
+      for (j=0; j < m->submaps; ++j) {
+         get_bits(f,8); // discard
+         m->submap_floor[j] = get_bits(f,8);
+         m->submap_residue[j] = get_bits(f,8);
+         if (m->submap_floor[j] >= f->floor_count)      return error(f, VORBIS_invalid_setup);
+         if (m->submap_residue[j] >= f->residue_count)  return error(f, VORBIS_invalid_setup);
+      }
+   }
+
+   // Modes
+   f->mode_count = get_bits(f, 6)+1;
+   for (i=0; i < f->mode_count; ++i) {
+      Mode *m = f->mode_config+i;
+      m->blockflag = get_bits(f,1);
+      m->windowtype = get_bits(f,16);
+      m->transformtype = get_bits(f,16);
+      m->mapping = get_bits(f,8);
+      if (m->windowtype != 0)                 return error(f, VORBIS_invalid_setup);
+      if (m->transformtype != 0)              return error(f, VORBIS_invalid_setup);
+      if (m->mapping >= f->mapping_count)     return error(f, VORBIS_invalid_setup);
+   }
+
+   flush_packet(f);
+
+   f->previous_length = 0;
+
+   for (i=0; i < f->channels; ++i) {
+      f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
+      f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
+      f->finalY[i]          = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
+      if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
+      #ifdef STB_VORBIS_NO_DEFER_FLOOR
+      f->floor_buffers[i]   = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
+      if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
+      #endif
+   }
+
+   if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
+   if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
+   f->blocksize[0] = f->blocksize_0;
+   f->blocksize[1] = f->blocksize_1;
+
+#ifdef STB_VORBIS_DIVIDE_TABLE
+   if (integer_divide_table[1][1]==0)
+      for (i=0; i < DIVTAB_NUMER; ++i)
+         for (j=1; j < DIVTAB_DENOM; ++j)
+            integer_divide_table[i][j] = i / j;
+#endif
+
+   // compute how much temporary memory is needed
+
+   // 1.
+   {
+      uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
+      uint32 classify_mem;
+      int i,max_part_read=0;
+      for (i=0; i < f->residue_count; ++i) {
+         Residue *r = f->residue_config + i;
+         int n_read = r->end - r->begin;
+         int part_read = n_read / r->part_size;
+         if (part_read > max_part_read)
+            max_part_read = part_read;
+      }
+      #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+      classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
+      #else
+      classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
+      #endif
+
+      f->temp_memory_required = classify_mem;
+      if (imdct_mem > f->temp_memory_required)
+         f->temp_memory_required = imdct_mem;
+   }
+
+   f->first_decode = TRUE;
+
+   if (f->alloc.alloc_buffer) {
+      assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
+      // check if there's enough temp memory so we don't error later
+      if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
+         return error(f, VORBIS_outofmem);
+   }
+
+   f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
+
+   return TRUE;
+}
+
+static void vorbis_deinit(stb_vorbis *p)
+{
+   int i,j;
+   if (p->residue_config) {
+      for (i=0; i < p->residue_count; ++i) {
+         Residue *r = p->residue_config+i;
+         if (r->classdata) {
+            for (j=0; j < p->codebooks[r->classbook].entries; ++j)
+               setup_free(p, r->classdata[j]);
+            setup_free(p, r->classdata);
+         }
+         setup_free(p, r->residue_books);
+      }
+   }
+
+   if (p->codebooks) {
+      CHECK(p);
+      for (i=0; i < p->codebook_count; ++i) {
+         Codebook *c = p->codebooks + i;
+         setup_free(p, c->codeword_lengths);
+         setup_free(p, c->multiplicands);
+         setup_free(p, c->codewords);
+         setup_free(p, c->sorted_codewords);
+         // c->sorted_values[-1] is the first entry in the array
+         setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
+      }
+      setup_free(p, p->codebooks);
+   }
+   setup_free(p, p->floor_config);
+   setup_free(p, p->residue_config);
+   if (p->mapping) {
+      for (i=0; i < p->mapping_count; ++i)
+         setup_free(p, p->mapping[i].chan);
+      setup_free(p, p->mapping);
+   }
+   CHECK(p);
+   for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
+      setup_free(p, p->channel_buffers[i]);
+      setup_free(p, p->previous_window[i]);
+      #ifdef STB_VORBIS_NO_DEFER_FLOOR
+      setup_free(p, p->floor_buffers[i]);
+      #endif
+      setup_free(p, p->finalY[i]);
+   }
+   for (i=0; i < 2; ++i) {
+      setup_free(p, p->A[i]);
+      setup_free(p, p->B[i]);
+      setup_free(p, p->C[i]);
+      setup_free(p, p->window[i]);
+      setup_free(p, p->bit_reverse[i]);
+   }
+   #ifndef STB_VORBIS_NO_STDIO
+   if (p->close_on_free) fclose(p->f);
+   #endif
+}
+
+void stb_vorbis_close(stb_vorbis *p)
+{
+   if (p == NULL) return;
+   vorbis_deinit(p);
+   setup_free(p,p);
+}
+
+static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
+{
+   memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
+   if (z) {
+      p->alloc = *z;
+      p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
+      p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
+   }
+   p->eof = 0;
+   p->error = VORBIS__no_error;
+   p->stream = NULL;
+   p->codebooks = NULL;
+   p->page_crc_tests = -1;
+   #ifndef STB_VORBIS_NO_STDIO
+   p->close_on_free = FALSE;
+   p->f = NULL;
+   #endif
+}
+
+int stb_vorbis_get_sample_offset(stb_vorbis *f)
+{
+   if (f->current_loc_valid)
+      return f->current_loc;
+   else
+      return -1;
+}
+
+stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
+{
+   stb_vorbis_info d;
+   d.channels = f->channels;
+   d.sample_rate = f->sample_rate;
+   d.setup_memory_required = f->setup_memory_required;
+   d.setup_temp_memory_required = f->setup_temp_memory_required;
+   d.temp_memory_required = f->temp_memory_required;
+   d.max_frame_size = f->blocksize_1 >> 1;
+   return d;
+}
+
+int stb_vorbis_get_error(stb_vorbis *f)
+{
+   int e = f->error;
+   f->error = VORBIS__no_error;
+   return e;
+}
+
+static stb_vorbis * vorbis_alloc(stb_vorbis *f)
+{
+   stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
+   return p;
+}
+
+#ifndef STB_VORBIS_NO_PUSHDATA_API
+
+void stb_vorbis_flush_pushdata(stb_vorbis *f)
+{
+   f->previous_length = 0;
+   f->page_crc_tests  = 0;
+   f->discard_samples_deferred = 0;
+   f->current_loc_valid = FALSE;
+   f->first_decode = FALSE;
+   f->samples_output = 0;
+   f->channel_buffer_start = 0;
+   f->channel_buffer_end = 0;
+}
+
+static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
+{
+   int i,n;
+   for (i=0; i < f->page_crc_tests; ++i)
+      f->scan[i].bytes_done = 0;
+
+   // if we have room for more scans, search for them first, because
+   // they may cause us to stop early if their header is incomplete
+   if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
+      if (data_len < 4) return 0;
+      data_len -= 3; // need to look for 4-byte sequence, so don't miss
+                     // one that straddles a boundary
+      for (i=0; i < data_len; ++i) {
+         if (data[i] == 0x4f) {
+            if (0==memcmp(data+i, ogg_page_header, 4)) {
+               int j,len;
+               uint32 crc;
+               // make sure we have the whole page header
+               if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
+                  // only read up to this page start, so hopefully we'll
+                  // have the whole page header start next time
+                  data_len = i;
+                  break;
+               }
+               // ok, we have it all; compute the length of the page
+               len = 27 + data[i+26];
+               for (j=0; j < data[i+26]; ++j)
+                  len += data[i+27+j];
+               // scan everything up to the embedded crc (which we must 0)
+               crc = 0;
+               for (j=0; j < 22; ++j)
+                  crc = crc32_update(crc, data[i+j]);
+               // now process 4 0-bytes
+               for (   ; j < 26; ++j)
+                  crc = crc32_update(crc, 0);
+               // len is the total number of bytes we need to scan
+               n = f->page_crc_tests++;
+               f->scan[n].bytes_left = len-j;
+               f->scan[n].crc_so_far = crc;
+               f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
+               // if the last frame on a page is continued to the next, then
+               // we can't recover the sample_loc immediately
+               if (data[i+27+data[i+26]-1] == 255)
+                  f->scan[n].sample_loc = ~0;
+               else
+                  f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
+               f->scan[n].bytes_done = i+j;
+               if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
+                  break;
+               // keep going if we still have room for more
+            }
+         }
+      }
+   }
+
+   for (i=0; i < f->page_crc_tests;) {
+      uint32 crc;
+      int j;
+      int n = f->scan[i].bytes_done;
+      int m = f->scan[i].bytes_left;
+      if (m > data_len - n) m = data_len - n;
+      // m is the bytes to scan in the current chunk
+      crc = f->scan[i].crc_so_far;
+      for (j=0; j < m; ++j)
+         crc = crc32_update(crc, data[n+j]);
+      f->scan[i].bytes_left -= m;
+      f->scan[i].crc_so_far = crc;
+      if (f->scan[i].bytes_left == 0) {
+         // does it match?
+         if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
+            // Houston, we have page
+            data_len = n+m; // consumption amount is wherever that scan ended
+            f->page_crc_tests = -1; // drop out of page scan mode
+            f->previous_length = 0; // decode-but-don't-output one frame
+            f->next_seg = -1;       // start a new page
+            f->current_loc = f->scan[i].sample_loc; // set the current sample location
+                                    // to the amount we'd have decoded had we decoded this page
+            f->current_loc_valid = f->current_loc != ~0U;
+            return data_len;
+         }
+         // delete entry
+         f->scan[i] = f->scan[--f->page_crc_tests];
+      } else {
+         ++i;
+      }
+   }
+
+   return data_len;
+}
+
+// return value: number of bytes we used
+int stb_vorbis_decode_frame_pushdata(
+         stb_vorbis *f,                   // the file we're decoding
+         const uint8 *data, int data_len, // the memory available for decoding
+         int *channels,                   // place to write number of float * buffers
+         float ***output,                 // place to write float ** array of float * buffers
+         int *samples                     // place to write number of output samples
+     )
+{
+   int i;
+   int len,right,left;
+
+   if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
+
+   if (f->page_crc_tests >= 0) {
+      *samples = 0;
+      return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
+   }
+
+   f->stream     = (uint8 *) data;
+   f->stream_end = (uint8 *) data + data_len;
+   f->error      = VORBIS__no_error;
+
+   // check that we have the entire packet in memory
+   if (!is_whole_packet_present(f, FALSE)) {
+      *samples = 0;
+      return 0;
+   }
+
+   if (!vorbis_decode_packet(f, &len, &left, &right)) {
+      // save the actual error we encountered
+      enum STBVorbisError error = f->error;
+      if (error == VORBIS_bad_packet_type) {
+         // flush and resynch
+         f->error = VORBIS__no_error;
+         while (get8_packet(f) != EOP)
+            if (f->eof) break;
+         *samples = 0;
+         return (int) (f->stream - data);
+      }
+      if (error == VORBIS_continued_packet_flag_invalid) {
+         if (f->previous_length == 0) {
+            // we may be resynching, in which case it's ok to hit one
+            // of these; just discard the packet
+            f->error = VORBIS__no_error;
+            while (get8_packet(f) != EOP)
+               if (f->eof) break;
+            *samples = 0;
+            return (int) (f->stream - data);
+         }
+      }
+      // if we get an error while parsing, what to do?
+      // well, it DEFINITELY won't work to continue from where we are!
+      stb_vorbis_flush_pushdata(f);
+      // restore the error that actually made us bail
+      f->error = error;
+      *samples = 0;
+      return 1;
+   }
+
+   // success!
+   len = vorbis_finish_frame(f, len, left, right);
+   for (i=0; i < f->channels; ++i)
+      f->outputs[i] = f->channel_buffers[i] + left;
+
+   if (channels) *channels = f->channels;
+   *samples = len;
+   *output = f->outputs;
+   return (int) (f->stream - data);
+}
+
+stb_vorbis *stb_vorbis_open_pushdata(
+         const unsigned char *data, int data_len, // the memory available for decoding
+         int *data_used,              // only defined if result is not NULL
+         int *error, const stb_vorbis_alloc *alloc)
+{
+   stb_vorbis *f, p;
+   vorbis_init(&p, alloc);
+   p.stream     = (uint8 *) data;
+   p.stream_end = (uint8 *) data + data_len;
+   p.push_mode  = TRUE;
+   if (!start_decoder(&p)) {
+      if (p.eof)
+         *error = VORBIS_need_more_data;
+      else
+         *error = p.error;
+      return NULL;
+   }
+   f = vorbis_alloc(&p);
+   if (f) {
+      *f = p;
+      *data_used = (int) (f->stream - data);
+      *error = 0;
+      return f;
+   } else {
+      vorbis_deinit(&p);
+      return NULL;
+   }
+}
+#endif // STB_VORBIS_NO_PUSHDATA_API
+
+unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
+{
+   #ifndef STB_VORBIS_NO_PUSHDATA_API
+   if (f->push_mode) return 0;
+   #endif
+   if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
+   #ifndef STB_VORBIS_NO_STDIO
+   return (unsigned int) (ftell(f->f) - f->f_start);
+   #endif
+}
+
+#ifndef STB_VORBIS_NO_PULLDATA_API
+//
+// DATA-PULLING API
+//
+
+static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
+{
+   for(;;) {
+      int n;
+      if (f->eof) return 0;
+      n = get8(f);
+      if (n == 0x4f) { // page header candidate
+         unsigned int retry_loc = stb_vorbis_get_file_offset(f);
+         int i;
+         // check if we're off the end of a file_section stream
+         if (retry_loc - 25 > f->stream_len)
+            return 0;
+         // check the rest of the header
+         for (i=1; i < 4; ++i)
+            if (get8(f) != ogg_page_header[i])
+               break;
+         if (f->eof) return 0;
+         if (i == 4) {
+            uint8 header[27];
+            uint32 i, crc, goal, len;
+            for (i=0; i < 4; ++i)
+               header[i] = ogg_page_header[i];
+            for (; i < 27; ++i)
+               header[i] = get8(f);
+            if (f->eof) return 0;
+            if (header[4] != 0) goto invalid;
+            goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
+            for (i=22; i < 26; ++i)
+               header[i] = 0;
+            crc = 0;
+            for (i=0; i < 27; ++i)
+               crc = crc32_update(crc, header[i]);
+            len = 0;
+            for (i=0; i < header[26]; ++i) {
+               int s = get8(f);
+               crc = crc32_update(crc, s);
+               len += s;
+            }
+            if (len && f->eof) return 0;
+            for (i=0; i < len; ++i)
+               crc = crc32_update(crc, get8(f));
+            // finished parsing probable page
+            if (crc == goal) {
+               // we could now check that it's either got the last
+               // page flag set, OR it's followed by the capture
+               // pattern, but I guess TECHNICALLY you could have
+               // a file with garbage between each ogg page and recover
+               // from it automatically? So even though that paranoia
+               // might decrease the chance of an invalid decode by
+               // another 2^32, not worth it since it would hose those
+               // invalid-but-useful files?
+               if (end)
+                  *end = stb_vorbis_get_file_offset(f);
+               if (last) {
+                  if (header[5] & 0x04)
+                     *last = 1;
+                  else
+                     *last = 0;
+               }
+               set_file_offset(f, retry_loc-1);
+               return 1;
+            }
+         }
+        invalid:
+         // not a valid page, so rewind and look for next one
+         set_file_offset(f, retry_loc);
+      }
+   }
+}
+
+
+#define SAMPLE_unknown  0xffffffff
+
+// seeking is implemented with a binary search, which narrows down the range to
+// 64K, before using a linear search (because finding the synchronization
+// pattern can be expensive, and the chance we'd find the end page again is
+// relatively high for small ranges)
+//
+// two initial interpolation-style probes are used at the start of the search
+// to try to bound either side of the binary search sensibly, while still
+// working in O(log n) time if they fail.
+
+static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
+{
+   uint8 header[27], lacing[255];
+   int i,len;
+
+   // record where the page starts
+   z->page_start = stb_vorbis_get_file_offset(f);
+
+   // parse the header
+   getn(f, header, 27);
+   if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
+      return 0;
+   getn(f, lacing, header[26]);
+
+   // determine the length of the payload
+   len = 0;
+   for (i=0; i < header[26]; ++i)
+      len += lacing[i];
+
+   // this implies where the page ends
+   z->page_end = z->page_start + 27 + header[26] + len;
+
+   // read the last-decoded sample out of the data
+   z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
+
+   // restore file state to where we were
+   set_file_offset(f, z->page_start);
+   return 1;
+}
+
+// rarely used function to seek back to the preceeding page while finding the
+// start of a packet
+static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
+{
+   unsigned int previous_safe, end;
+
+   // now we want to seek back 64K from the limit
+   if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
+      previous_safe = limit_offset - 65536;
+   else
+      previous_safe = f->first_audio_page_offset;
+
+   set_file_offset(f, previous_safe);
+
+   while (vorbis_find_page(f, &end, NULL)) {
+      if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
+         return 1;
+      set_file_offset(f, end);
+   }
+
+   return 0;
+}
+
+// implements the search logic for finding a page and starting decoding. if
+// the function succeeds, current_loc_valid will be true and current_loc will
+// be less than or equal to the provided sample number (the closer the
+// better).
+static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
+{
+   ProbedPage left, right, mid;
+   int i, start_seg_with_known_loc, end_pos, page_start;
+   uint32 delta, stream_length, padding;
+   double offset = 0, bytes_per_sample = 0;
+   int probe = 0;
+
+   // find the last page and validate the target sample
+   stream_length = stb_vorbis_stream_length_in_samples(f);
+   if (stream_length == 0)            return error(f, VORBIS_seek_without_length);
+   if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
+
+   // this is the maximum difference between the window-center (which is the
+   // actual granule position value), and the right-start (which the spec
+   // indicates should be the granule position (give or take one)).
+   padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
+   if (sample_number < padding)
+      sample_number = 0;
+   else
+      sample_number -= padding;
+
+   left = f->p_first;
+   while (left.last_decoded_sample == ~0U) {
+      // (untested) the first page does not have a 'last_decoded_sample'
+      set_file_offset(f, left.page_end);
+      if (!get_seek_page_info(f, &left)) goto error;
+   }
+
+   right = f->p_last;
+   assert(right.last_decoded_sample != ~0U);
+
+   // starting from the start is handled differently
+   if (sample_number <= left.last_decoded_sample) {
+      if (stb_vorbis_seek_start(f))
+         return 1;
+      return 0;
+   }
+
+   while (left.page_end != right.page_start) {
+      assert(left.page_end < right.page_start);
+      // search range in bytes
+      delta = right.page_start - left.page_end;
+      if (delta <= 65536) {
+         // there's only 64K left to search - handle it linearly
+         set_file_offset(f, left.page_end);
+      } else {
+         if (probe < 2) {
+            if (probe == 0) {
+               // first probe (interpolate)
+               double data_bytes = right.page_end - left.page_start;
+               bytes_per_sample = data_bytes / right.last_decoded_sample;
+               offset = left.page_start + bytes_per_sample * (sample_number - left.last_decoded_sample);
+            } else {
+               // second probe (try to bound the other side)
+               double error = ((double) sample_number - mid.last_decoded_sample) * bytes_per_sample;
+               if (error >= 0 && error <  8000) error =  8000;
+               if (error <  0 && error > -8000) error = -8000;
+               offset += error * 2;
+            }
+
+            // ensure the offset is valid
+            if (offset < left.page_end)
+               offset = left.page_end;
+            if (offset > right.page_start - 65536)
+               offset = right.page_start - 65536;
+
+            set_file_offset(f, (unsigned int) offset);
+         } else {
+            // binary search for large ranges (offset by 32K to ensure
+            // we don't hit the right page)
+            set_file_offset(f, left.page_end + (delta / 2) - 32768);
+         }
+
+         if (!vorbis_find_page(f, NULL, NULL)) goto error;
+      }
+
+      for (;;) {
+         if (!get_seek_page_info(f, &mid)) goto error;
+         if (mid.last_decoded_sample != ~0U) break;
+         // (untested) no frames end on this page
+         set_file_offset(f, mid.page_end);
+         assert(mid.page_start < right.page_start);
+      }
+
+      // if we've just found the last page again then we're in a tricky file,
+      // and we're close enough.
+      if (mid.page_start == right.page_start)
+         break;
+
+      if (sample_number < mid.last_decoded_sample)
+         right = mid;
+      else
+         left = mid;
+
+      ++probe;
+   }
+
+   // seek back to start of the last packet
+   page_start = left.page_start;
+   set_file_offset(f, page_start);
+   if (!start_page(f)) return error(f, VORBIS_seek_failed);
+   end_pos = f->end_seg_with_known_loc;
+   assert(end_pos >= 0);
+
+   for (;;) {
+      for (i = end_pos; i > 0; --i)
+         if (f->segments[i-1] != 255)
+            break;
+
+      start_seg_with_known_loc = i;
+
+      if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
+         break;
+
+      // (untested) the final packet begins on an earlier page
+      if (!go_to_page_before(f, page_start))
+         goto error;
+
+      page_start = stb_vorbis_get_file_offset(f);
+      if (!start_page(f)) goto error;
+      end_pos = f->segment_count - 1;
+   }
+
+   // prepare to start decoding
+   f->current_loc_valid = FALSE;
+   f->last_seg = FALSE;
+   f->valid_bits = 0;
+   f->packet_bytes = 0;
+   f->bytes_in_seg = 0;
+   f->previous_length = 0;
+   f->next_seg = start_seg_with_known_loc;
+
+   for (i = 0; i < start_seg_with_known_loc; i++)
+      skip(f, f->segments[i]);
+
+   // start decoding (optimizable - this frame is generally discarded)
+   if (!vorbis_pump_first_frame(f))
+      return 0;
+   if (f->current_loc > sample_number)
+      return error(f, VORBIS_seek_failed);
+   return 1;
+
+error:
+   // try to restore the file to a valid state
+   stb_vorbis_seek_start(f);
+   return error(f, VORBIS_seek_failed);
+}
+
+// the same as vorbis_decode_initial, but without advancing
+static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
+{
+   int bits_read, bytes_read;
+
+   if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
+      return 0;
+
+   // either 1 or 2 bytes were read, figure out which so we can rewind
+   bits_read = 1 + ilog(f->mode_count-1);
+   if (f->mode_config[*mode].blockflag)
+      bits_read += 2;
+   bytes_read = (bits_read + 7) / 8;
+
+   f->bytes_in_seg += bytes_read;
+   f->packet_bytes -= bytes_read;
+   skip(f, -bytes_read);
+   if (f->next_seg == -1)
+      f->next_seg = f->segment_count - 1;
+   else
+      f->next_seg--;
+   f->valid_bits = 0;
+
+   return 1;
+}
+
+int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
+{
+   uint32 max_frame_samples;
+
+   if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
+
+   // fast page-level search
+   if (!seek_to_sample_coarse(f, sample_number))
+      return 0;
+
+   assert(f->current_loc_valid);
+   assert(f->current_loc <= sample_number);
+
+   // linear search for the relevant packet
+   max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
+   while (f->current_loc < sample_number) {
+      int left_start, left_end, right_start, right_end, mode, frame_samples;
+      if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
+         return error(f, VORBIS_seek_failed);
+      // calculate the number of samples returned by the next frame
+      frame_samples = right_start - left_start;
+      if (f->current_loc + frame_samples > sample_number) {
+         return 1; // the next frame will contain the sample
+      } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
+         // there's a chance the frame after this could contain the sample
+         vorbis_pump_first_frame(f);
+      } else {
+         // this frame is too early to be relevant
+         f->current_loc += frame_samples;
+         f->previous_length = 0;
+         maybe_start_packet(f);
+         flush_packet(f);
+      }
+   }
+   // the next frame will start with the sample
+   assert(f->current_loc == sample_number);
+   return 1;
+}
+
+int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
+{
+   if (!stb_vorbis_seek_frame(f, sample_number))
+      return 0;
+
+   if (sample_number != f->current_loc) {
+      int n;
+      uint32 frame_start = f->current_loc;
+      stb_vorbis_get_frame_float(f, &n, NULL);
+      assert(sample_number > frame_start);
+      assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
+      f->channel_buffer_start += (sample_number - frame_start);
+   }
+
+   return 1;
+}
+
+int stb_vorbis_seek_start(stb_vorbis *f)
+{
+   if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
+   set_file_offset(f, f->first_audio_page_offset);
+   f->previous_length = 0;
+   f->first_decode = TRUE;
+   f->next_seg = -1;
+   return vorbis_pump_first_frame(f);
+}
+
+unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
+{
+   unsigned int restore_offset, previous_safe;
+   unsigned int end, last_page_loc;
+
+   if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
+   if (!f->total_samples) {
+      unsigned int last;
+      uint32 lo,hi;
+      char header[6];
+
+      // first, store the current decode position so we can restore it
+      restore_offset = stb_vorbis_get_file_offset(f);
+
+      // now we want to seek back 64K from the end (the last page must
+      // be at most a little less than 64K, but let's allow a little slop)
+      if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
+         previous_safe = f->stream_len - 65536;
+      else
+         previous_safe = f->first_audio_page_offset;
+
+      set_file_offset(f, previous_safe);
+      // previous_safe is now our candidate 'earliest known place that seeking
+      // to will lead to the final page'
+
+      if (!vorbis_find_page(f, &end, &last)) {
+         // if we can't find a page, we're hosed!
+         f->error = VORBIS_cant_find_last_page;
+         f->total_samples = 0xffffffff;
+         goto done;
+      }
+
+      // check if there are more pages
+      last_page_loc = stb_vorbis_get_file_offset(f);
+
+      // stop when the last_page flag is set, not when we reach eof;
+      // this allows us to stop short of a 'file_section' end without
+      // explicitly checking the length of the section
+      while (!last) {
+         set_file_offset(f, end);
+         if (!vorbis_find_page(f, &end, &last)) {
+            // the last page we found didn't have the 'last page' flag
+            // set. whoops!
+            break;
+         }
+         previous_safe = last_page_loc+1;
+         last_page_loc = stb_vorbis_get_file_offset(f);
+      }
+
+      set_file_offset(f, last_page_loc);
+
+      // parse the header
+      getn(f, (unsigned char *)header, 6);
+      // extract the absolute granule position
+      lo = get32(f);
+      hi = get32(f);
+      if (lo == 0xffffffff && hi == 0xffffffff) {
+         f->error = VORBIS_cant_find_last_page;
+         f->total_samples = SAMPLE_unknown;
+         goto done;
+      }
+      if (hi)
+         lo = 0xfffffffe; // saturate
+      f->total_samples = lo;
+
+      f->p_last.page_start = last_page_loc;
+      f->p_last.page_end   = end;
+      f->p_last.last_decoded_sample = lo;
+
+     done:
+      set_file_offset(f, restore_offset);
+   }
+   return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
+}
+
+float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
+{
+   return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
+}
+
+
+
+int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
+{
+   int len, right,left,i;
+   if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
+
+   if (!vorbis_decode_packet(f, &len, &left, &right)) {
+      f->channel_buffer_start = f->channel_buffer_end = 0;
+      return 0;
+   }
+
+   len = vorbis_finish_frame(f, len, left, right);
+   for (i=0; i < f->channels; ++i)
+      f->outputs[i] = f->channel_buffers[i] + left;
+
+   f->channel_buffer_start = left;
+   f->channel_buffer_end   = left+len;
+
+   if (channels) *channels = f->channels;
+   if (output)   *output = f->outputs;
+   return len;
+}
+
+#ifndef STB_VORBIS_NO_STDIO
+
+stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
+{
+   stb_vorbis *f, p;
+   vorbis_init(&p, alloc);
+   p.f = file;
+   p.f_start = (uint32) ftell(file);
+   p.stream_len   = length;
+   p.close_on_free = close_on_free;
+   if (start_decoder(&p)) {
+      f = vorbis_alloc(&p);
+      if (f) {
+         *f = p;
+         vorbis_pump_first_frame(f);
+         return f;
+      }
+   }
+   if (error) *error = p.error;
+   vorbis_deinit(&p);
+   return NULL;
+}
+
+stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
+{
+   unsigned int len, start;
+   start = (unsigned int) ftell(file);
+   fseek(file, 0, SEEK_END);
+   len = (unsigned int) (ftell(file) - start);
+   fseek(file, start, SEEK_SET);
+   return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
+}
+
+stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
+{
+	FILE *f;
+#if _WIN32 || _WIN64
+   fopen_s(&f, filename, "rb");
+#else
+	f = fopen(filename, "rb");
+#endif
+   if (f) 
+      return stb_vorbis_open_file(f, TRUE, error, alloc);
+   if (error) *error = VORBIS_file_open_failure;
+   return NULL;
+}
+#endif // STB_VORBIS_NO_STDIO
+
+stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
+{
+   stb_vorbis *f, p;
+   if (data == NULL) return NULL;
+   vorbis_init(&p, alloc);
+   p.stream = (uint8 *) data;
+   p.stream_end = (uint8 *) data + len;
+   p.stream_start = (uint8 *) p.stream;
+   p.stream_len = len;
+   p.push_mode = FALSE;
+   if (start_decoder(&p)) {
+      f = vorbis_alloc(&p);
+      if (f) {
+         *f = p;
+         vorbis_pump_first_frame(f);
+         if (error) *error = VORBIS__no_error;
+         return f;
+      }
+   }
+   if (error) *error = p.error;
+   vorbis_deinit(&p);
+   return NULL;
+}
+
+#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
+#define PLAYBACK_MONO     1
+#define PLAYBACK_LEFT     2
+#define PLAYBACK_RIGHT    4
+
+#define L  (PLAYBACK_LEFT  | PLAYBACK_MONO)
+#define C  (PLAYBACK_LEFT  | PLAYBACK_RIGHT | PLAYBACK_MONO)
+#define R  (PLAYBACK_RIGHT | PLAYBACK_MONO)
+
+static int8 channel_position[7][6] =
+{
+   { 0 },
+   { C },
+   { L, R },
+   { L, C, R },
+   { L, R, L, R },
+   { L, C, R, L, R },
+   { L, C, R, L, R, C },
+};
+
+
+#ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
+   typedef union {
+      float f;
+      int i;
+   } float_conv;
+   typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
+   #define FASTDEF(x) float_conv x
+   // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
+   #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
+   #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
+   #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
+   #define check_endianness()  
+#else
+   #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
+   #define check_endianness()
+   #define FASTDEF(x)
+#endif
+
+static void copy_samples(short *dest, float *src, int len)
+{
+   int i;
+   check_endianness();
+   for (i=0; i < len; ++i) {
+      FASTDEF(temp);
+      int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
+      if ((unsigned int) (v + 32768) > 65535)
+         v = v < 0 ? -32768 : 32767;
+      dest[i] = v;
+   }
+}
+
+static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
+{
+   #define BUFFER_SIZE  32
+   float buffer[BUFFER_SIZE];
+   int i,j,o,n = BUFFER_SIZE;
+   check_endianness();
+   for (o = 0; o < len; o += BUFFER_SIZE) {
+      memset(buffer, 0, sizeof(buffer));
+      if (o + n > len) n = len - o;
+      for (j=0; j < num_c; ++j) {
+         if (channel_position[num_c][j] & mask) {
+            for (i=0; i < n; ++i)
+               buffer[i] += data[j][d_offset+o+i];
+         }
+      }
+      for (i=0; i < n; ++i) {
+         FASTDEF(temp);
+         int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
+         if ((unsigned int) (v + 32768) > 65535)
+            v = v < 0 ? -32768 : 32767;
+         output[o+i] = v;
+      }
+   }
+}
+
+static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
+{
+   #define BUFFER_SIZE  32
+   float buffer[BUFFER_SIZE];
+   int i,j,o,n = BUFFER_SIZE >> 1;
+   // o is the offset in the source data
+   check_endianness();
+   for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
+      // o2 is the offset in the output data
+      int o2 = o << 1;
+      memset(buffer, 0, sizeof(buffer));
+      if (o + n > len) n = len - o;
+      for (j=0; j < num_c; ++j) {
+         int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
+         if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
+            for (i=0; i < n; ++i) {
+               buffer[i*2+0] += data[j][d_offset+o+i];
+               buffer[i*2+1] += data[j][d_offset+o+i];
+            }
+         } else if (m == PLAYBACK_LEFT) {
+            for (i=0; i < n; ++i) {
+               buffer[i*2+0] += data[j][d_offset+o+i];
+            }
+         } else if (m == PLAYBACK_RIGHT) {
+            for (i=0; i < n; ++i) {
+               buffer[i*2+1] += data[j][d_offset+o+i];
+            }
+         }
+      }
+      for (i=0; i < (n<<1); ++i) {
+         FASTDEF(temp);
+         int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
+         if ((unsigned int) (v + 32768) > 65535)
+            v = v < 0 ? -32768 : 32767;
+         output[o2+i] = v;
+      }
+   }
+}
+
+static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
+{
+   int i;
+   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
+      static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
+      for (i=0; i < buf_c; ++i)
+         compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
+   } else {
+      int limit = buf_c < data_c ? buf_c : data_c;
+      for (i=0; i < limit; ++i)
+         copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
+      for (   ; i < buf_c; ++i)
+         memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
+   }
+}
+
+int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
+{
+   float **output;
+   int len = stb_vorbis_get_frame_float(f, NULL, &output);
+   if (len > num_samples) len = num_samples;
+   if (len)
+      convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
+   return len;
+}
+
+static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
+{
+   int i;
+   check_endianness();
+   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
+      assert(buf_c == 2);
+      for (i=0; i < buf_c; ++i)
+         compute_stereo_samples(buffer, data_c, data, d_offset, len);
+   } else {
+      int limit = buf_c < data_c ? buf_c : data_c;
+      int j;
+      for (j=0; j < len; ++j) {
+         for (i=0; i < limit; ++i) {
+            FASTDEF(temp);
+            float f = data[i][d_offset+j];
+            int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
+            if ((unsigned int) (v + 32768) > 65535)
+               v = v < 0 ? -32768 : 32767;
+            *buffer++ = v;
+         }
+         for (   ; i < buf_c; ++i)
+            *buffer++ = 0;
+      }
+   }
+}
+
+int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
+{
+   float **output;
+   int len;
+   if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
+   len = stb_vorbis_get_frame_float(f, NULL, &output);
+   if (len) {
+      if (len*num_c > num_shorts) len = num_shorts / num_c;
+      convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
+   }
+   return len;
+}
+
+int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
+{
+   float **outputs;
+   int len = num_shorts / channels;
+   int n=0;
+   int z = f->channels;
+   if (z > channels) z = channels;
+   while (n < len) {
+      int k = f->channel_buffer_end - f->channel_buffer_start;
+      if (n+k >= len) k = len - n;
+      if (k)
+         convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
+      buffer += k*channels;
+      n += k;
+      f->channel_buffer_start += k;
+      if (n == len) break;
+      if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
+   }
+   return n;
+}
+
+int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
+{
+   float **outputs;
+   int n=0;
+   int z = f->channels;
+   if (z > channels) z = channels;
+   while (n < len) {
+      int k = f->channel_buffer_end - f->channel_buffer_start;
+      if (n+k >= len) k = len - n;
+      if (k)
+         convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
+      n += k;
+      f->channel_buffer_start += k;
+      if (n == len) break;
+      if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
+   }
+   return n;
+}
+
+#ifndef STB_VORBIS_NO_STDIO
+int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
+{
+   int data_len, offset, total, limit, error;
+   short *data;
+   stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
+   if (v == NULL) return -1;
+   limit = v->channels * 4096;
+   *channels = v->channels;
+   if (sample_rate)
+      *sample_rate = v->sample_rate;
+   offset = data_len = 0;
+   total = limit;
+   data = (short *) malloc(total * sizeof(*data));
+   if (data == NULL) {
+      stb_vorbis_close(v);
+      return -2;
+   }
+   for (;;) {
+      int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
+      if (n == 0) break;
+      data_len += n;
+      offset += n * v->channels;
+      if (offset + limit > total) {
+         short *data2;
+         total *= 2;
+         data2 = (short *) realloc(data, total * sizeof(*data));
+         if (data2 == NULL) {
+            free(data);
+            stb_vorbis_close(v);
+            return -2;
+         }
+         data = data2;
+      }
+   }
+   *output = data;
+   stb_vorbis_close(v);
+   return data_len;
+}
+#endif // NO_STDIO
+
+int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
+{
+   int data_len, offset, total, limit, error;
+   short *data;
+   stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
+   if (v == NULL) return -1;
+   limit = v->channels * 4096;
+   *channels = v->channels;
+   if (sample_rate)
+      *sample_rate = v->sample_rate;
+   offset = data_len = 0;
+   total = limit;
+   data = (short *) malloc(total * sizeof(*data));
+   if (data == NULL) {
+      stb_vorbis_close(v);
+      return -2;
+   }
+   for (;;) {
+      int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
+      if (n == 0) break;
+      data_len += n;
+      offset += n * v->channels;
+      if (offset + limit > total) {
+         short *data2;
+         total *= 2;
+         data2 = (short *) realloc(data, total * sizeof(*data));
+         if (data2 == NULL) {
+            free(data);
+            stb_vorbis_close(v);
+            return -2;
+         }
+         data = data2;
+      }
+   }
+   *output = data;
+   stb_vorbis_close(v);
+   return data_len;
+}
+#endif // STB_VORBIS_NO_INTEGER_CONVERSION
+
+int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
+{
+   float **outputs;
+   int len = num_floats / channels;
+   int n=0;
+   int z = f->channels;
+   if (z > channels) z = channels;
+   while (n < len) {
+      int i,j;
+      int k = f->channel_buffer_end - f->channel_buffer_start;
+      if (n+k >= len) k = len - n;
+      for (j=0; j < k; ++j) {
+         for (i=0; i < z; ++i)
+            *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
+         for (   ; i < channels; ++i)
+            *buffer++ = 0;
+      }
+      n += k;
+      f->channel_buffer_start += k;
+      if (n == len)
+         break;
+      if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
+         break;
+   }
+   return n;
+}
+
+int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
+{
+   float **outputs;
+   int n=0;
+   int z = f->channels;
+   if (z > channels) z = channels;
+   while (n < num_samples) {
+      int i;
+      int k = f->channel_buffer_end - f->channel_buffer_start;
+      if (n+k >= num_samples) k = num_samples - n;
+      if (k) {
+         for (i=0; i < z; ++i)
+            memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
+         for (   ; i < channels; ++i)
+            memset(buffer[i]+n, 0, sizeof(float) * k);
+      }
+      n += k;
+      f->channel_buffer_start += k;
+      if (n == num_samples)
+         break;
+      if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
+         break;
+   }
+   return n;
+}
+#endif // STB_VORBIS_NO_PULLDATA_API
+
+/* Version history
+    1.10    - 2017/03/03 - more robust seeking; fix negative ilog(); clear error in open_memory
+    1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
+    1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
+                           avoid discarding last frame of audio data
+    1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
+                           some more crash fixes when out of memory or with corrupt files 
+    1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
+                           some crash fixes when out of memory or with corrupt files
+    1.05    - 2015/04/19 - don't define __forceinline if it's redundant
+    1.04    - 2014/08/27 - fix missing const-correct case in API
+    1.03    - 2014/08/07 - Warning fixes
+    1.02    - 2014/07/09 - Declare qsort compare function _cdecl on windows
+    1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float
+    1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel
+                           (API change) report sample rate for decode-full-file funcs
+    0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
+    0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
+    0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
+    0.99993 - remove assert that fired on legal files with empty tables
+    0.99992 - rewind-to-start
+    0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
+    0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
+    0.9998 - add a full-decode function with a memory source
+    0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
+    0.9996 - query length of vorbis stream in samples/seconds
+    0.9995 - bugfix to another optimization that only happened in certain files
+    0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
+    0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
+    0.9992 - performance improvement of IMDCT; now performs close to reference implementation
+    0.9991 - performance improvement of IMDCT
+    0.999 - (should have been 0.9990) performance improvement of IMDCT
+    0.998 - no-CRT support from Casey Muratori
+    0.997 - bugfixes for bugs found by Terje Mathisen
+    0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
+    0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
+    0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
+    0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
+    0.992 - fixes for MinGW warning
+    0.991 - turn fast-float-conversion on by default
+    0.990 - fix push-mode seek recovery if you seek into the headers
+    0.98b - fix to bad release of 0.98
+    0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
+    0.97 - builds under c++ (typecasting, don't use 'class' keyword)
+    0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
+    0.95 - clamping code for 16-bit functions
+    0.94 - not publically released
+    0.93 - fixed all-zero-floor case (was decoding garbage)
+    0.92 - fixed a memory leak
+    0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
+    0.90 - first public release
+*/
+
+#endif // STB_VORBIS_HEADER_ONLY
+
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of 
+this software and associated documentation files (the "Software"), to deal in 
+the Software without restriction, including without limitation the rights to 
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
+of the Software, and to permit persons to whom the Software is furnished to do 
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all 
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 
+software, either in source code form or as a compiled binary, for any purpose, 
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this 
+software dedicate any and all copyright interest in the software to the public 
+domain. We make this dedication for the benefit of the public at large and to 
+the detriment of our heirs and successors. We intend this dedication to be an 
+overt act of relinquishment in perpetuity of all present and future rights to 
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/Utilities/stb_vorbis.h b/Utilities/stb_vorbis.h
new file mode 100644
index 0000000..a7dcb07
--- /dev/null
+++ b/Utilities/stb_vorbis.h
@@ -0,0 +1,333 @@
+//////////////////////////////////////////////////////////////////////////////
+//
+//  HEADER BEGINS HERE
+//
+
+#ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
+#define STB_VORBIS_INCLUDE_STB_VORBIS_H
+
+#if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
+#define STB_VORBIS_NO_STDIO 1
+#endif
+
+#ifndef STB_VORBIS_NO_STDIO
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	///////////   THREAD SAFETY
+
+	// Individual stb_vorbis* handles are not thread-safe; you cannot decode from
+	// them from multiple threads at the same time. However, you can have multiple
+	// stb_vorbis* handles and decode from them independently in multiple thrads.
+
+
+	///////////   MEMORY ALLOCATION
+
+	// normally stb_vorbis uses malloc() to allocate memory at startup,
+	// and alloca() to allocate temporary memory during a frame on the
+	// stack. (Memory consumption will depend on the amount of setup
+	// data in the file and how you set the compile flags for speed
+	// vs. size. In my test files the maximal-size usage is ~150KB.)
+	//
+	// You can modify the wrapper functions in the source (setup_malloc,
+	// setup_temp_malloc, temp_malloc) to change this behavior, or you
+	// can use a simpler allocation model: you pass in a buffer from
+	// which stb_vorbis will allocate _all_ its memory (including the
+	// temp memory). "open" may fail with a VORBIS_outofmem if you
+	// do not pass in enough data; there is no way to determine how
+	// much you do need except to succeed (at which point you can
+	// query get_info to find the exact amount required. yes I know
+	// this is lame).
+	//
+	// If you pass in a non-NULL buffer of the type below, allocation
+	// will occur from it as described above. Otherwise just pass NULL
+	// to use malloc()/alloca()
+
+	typedef struct
+	{
+		char *alloc_buffer;
+		int   alloc_buffer_length_in_bytes;
+	} stb_vorbis_alloc;
+
+
+	///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
+
+	typedef struct stb_vorbis stb_vorbis;
+
+	typedef struct
+	{
+		unsigned int sample_rate;
+		int channels;
+
+		unsigned int setup_memory_required;
+		unsigned int setup_temp_memory_required;
+		unsigned int temp_memory_required;
+
+		int max_frame_size;
+	} stb_vorbis_info;
+
+	// get general information about the file
+	extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
+
+	// get the last error detected (clears it, too)
+	extern int stb_vorbis_get_error(stb_vorbis *f);
+
+	// close an ogg vorbis file and free all memory in use
+	extern void stb_vorbis_close(stb_vorbis *f);
+
+	// this function returns the offset (in samples) from the beginning of the
+	// file that will be returned by the next decode, if it is known, or -1
+	// otherwise. after a flush_pushdata() call, this may take a while before
+	// it becomes valid again.
+	// NOT WORKING YET after a seek with PULLDATA API
+	extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
+
+	// returns the current seek point within the file, or offset from the beginning
+	// of the memory buffer. In pushdata mode it returns 0.
+	extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
+
+	///////////   PUSHDATA API
+
+#ifndef STB_VORBIS_NO_PUSHDATA_API
+
+	// this API allows you to get blocks of data from any source and hand
+	// them to stb_vorbis. you have to buffer them; stb_vorbis will tell
+	// you how much it used, and you have to give it the rest next time;
+	// and stb_vorbis may not have enough data to work with and you will
+	// need to give it the same data again PLUS more. Note that the Vorbis
+	// specification does not bound the size of an individual frame.
+
+	extern stb_vorbis *stb_vorbis_open_pushdata(
+		const unsigned char * datablock, int datablock_length_in_bytes,
+		int *datablock_memory_consumed_in_bytes,
+		int *error,
+		const stb_vorbis_alloc *alloc_buffer);
+	// create a vorbis decoder by passing in the initial data block containing
+	//    the ogg&vorbis headers (you don't need to do parse them, just provide
+	//    the first N bytes of the file--you're told if it's not enough, see below)
+	// on success, returns an stb_vorbis *, does not set error, returns the amount of
+	//    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
+	// on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
+	// if returns NULL and *error is VORBIS_need_more_data, then the input block was
+	//       incomplete and you need to pass in a larger block from the start of the file
+
+	extern int stb_vorbis_decode_frame_pushdata(
+		stb_vorbis *f,
+		const unsigned char *datablock, int datablock_length_in_bytes,
+		int *channels,             // place to write number of float * buffers
+		float ***output,           // place to write float ** array of float * buffers
+		int *samples               // place to write number of output samples
+	);
+	// decode a frame of audio sample data if possible from the passed-in data block
+	//
+	// return value: number of bytes we used from datablock
+	//
+	// possible cases:
+	//     0 bytes used, 0 samples output (need more data)
+	//     N bytes used, 0 samples output (resynching the stream, keep going)
+	//     N bytes used, M samples output (one frame of data)
+	// note that after opening a file, you will ALWAYS get one N-bytes,0-sample
+	// frame, because Vorbis always "discards" the first frame.
+	//
+	// Note that on resynch, stb_vorbis will rarely consume all of the buffer,
+	// instead only datablock_length_in_bytes-3 or less. This is because it wants
+	// to avoid missing parts of a page header if they cross a datablock boundary,
+	// without writing state-machiney code to record a partial detection.
+	//
+	// The number of channels returned are stored in *channels (which can be
+	// NULL--it is always the same as the number of channels reported by
+	// get_info). *output will contain an array of float* buffers, one per
+	// channel. In other words, (*output)[0][0] contains the first sample from
+	// the first channel, and (*output)[1][0] contains the first sample from
+	// the second channel.
+
+	extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
+	// inform stb_vorbis that your next datablock will not be contiguous with
+	// previous ones (e.g. you've seeked in the data); future attempts to decode
+	// frames will cause stb_vorbis to resynchronize (as noted above), and
+	// once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
+	// will begin decoding the _next_ frame.
+	//
+	// if you want to seek using pushdata, you need to seek in your file, then
+	// call stb_vorbis_flush_pushdata(), then start calling decoding, then once
+	// decoding is returning you data, call stb_vorbis_get_sample_offset, and
+	// if you don't like the result, seek your file again and repeat.
+#endif
+
+
+	//////////   PULLING INPUT API
+
+#ifndef STB_VORBIS_NO_PULLDATA_API
+	// This API assumes stb_vorbis is allowed to pull data from a source--
+	// either a block of memory containing the _entire_ vorbis stream, or a
+	// FILE * that you or it create, or possibly some other reading mechanism
+	// if you go modify the source to replace the FILE * case with some kind
+	// of callback to your code. (But if you don't support seeking, you may
+	// just want to go ahead and use pushdata.)
+
+#if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
+	extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
+#endif
+#if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
+	extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
+#endif
+	// decode an entire file and output the data interleaved into a malloc()ed
+	// buffer stored in *output. The return value is the number of samples
+	// decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
+	// When you're done with it, just free() the pointer returned in *output.
+
+	extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
+		int *error, const stb_vorbis_alloc *alloc_buffer);
+	// create an ogg vorbis decoder from an ogg vorbis stream in memory (note
+	// this must be the entire stream!). on failure, returns NULL and sets *error
+
+#ifndef STB_VORBIS_NO_STDIO
+	extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
+		int *error, const stb_vorbis_alloc *alloc_buffer);
+	// create an ogg vorbis decoder from a filename via fopen(). on failure,
+	// returns NULL and sets *error (possibly to VORBIS_file_open_failure).
+
+	extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
+		int *error, const stb_vorbis_alloc *alloc_buffer);
+	// create an ogg vorbis decoder from an open FILE *, looking for a stream at
+	// the _current_ seek point (ftell). on failure, returns NULL and sets *error.
+	// note that stb_vorbis must "own" this stream; if you seek it in between
+	// calls to stb_vorbis, it will become confused. Morever, if you attempt to
+	// perform stb_vorbis_seek_*() operations on this file, it will assume it
+	// owns the _entire_ rest of the file after the start point. Use the next
+	// function, stb_vorbis_open_file_section(), to limit it.
+
+	extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
+		int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
+	// create an ogg vorbis decoder from an open FILE *, looking for a stream at
+	// the _current_ seek point (ftell); the stream will be of length 'len' bytes.
+	// on failure, returns NULL and sets *error. note that stb_vorbis must "own"
+	// this stream; if you seek it in between calls to stb_vorbis, it will become
+	// confused.
+#endif
+
+	extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
+	extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
+	// these functions seek in the Vorbis file to (approximately) 'sample_number'.
+	// after calling seek_frame(), the next call to get_frame_*() will include
+	// the specified sample. after calling stb_vorbis_seek(), the next call to
+	// stb_vorbis_get_samples_* will start with the specified sample. If you
+	// do not need to seek to EXACTLY the target sample when using get_samples_*,
+	// you can also use seek_frame().
+
+	extern int stb_vorbis_seek_start(stb_vorbis *f);
+	// this function is equivalent to stb_vorbis_seek(f,0)
+
+	extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
+	extern float        stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
+	// these functions return the total length of the vorbis stream
+
+	extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
+	// decode the next frame and return the number of samples. the number of
+	// channels returned are stored in *channels (which can be NULL--it is always
+	// the same as the number of channels reported by get_info). *output will
+	// contain an array of float* buffers, one per channel. These outputs will
+	// be overwritten on the next call to stb_vorbis_get_frame_*.
+	//
+	// You generally should not intermix calls to stb_vorbis_get_frame_*()
+	// and stb_vorbis_get_samples_*(), since the latter calls the former.
+
+#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
+	extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
+	extern int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples);
+#endif
+	// decode the next frame and return the number of *samples* per channel.
+	// Note that for interleaved data, you pass in the number of shorts (the
+	// size of your array), but the return value is the number of samples per
+	// channel, not the total number of samples.
+	//
+	// The data is coerced to the number of channels you request according to the
+	// channel coercion rules (see below). You must pass in the size of your
+	// buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
+	// The maximum buffer size needed can be gotten from get_info(); however,
+	// the Vorbis I specification implies an absolute maximum of 4096 samples
+	// per channel.
+
+	// Channel coercion rules:
+	//    Let M be the number of channels requested, and N the number of channels present,
+	//    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
+	//    and stereo R be the sum of all R and center channels (channel assignment from the
+	//    vorbis spec).
+	//        M    N       output
+	//        1    k      sum(Ck) for all k
+	//        2    *      stereo L, stereo R
+	//        k    l      k > l, the first l channels, then 0s
+	//        k    l      k <= l, the first k channels
+	//    Note that this is not _good_ surround etc. mixing at all! It's just so
+	//    you get something useful.
+
+	extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
+	extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
+	// gets num_samples samples, not necessarily on a frame boundary--this requires
+	// buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
+	// Returns the number of samples stored per channel; it may be less than requested
+	// at the end of the file. If there are no more samples in the file, returns 0.
+
+#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
+	extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
+	extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
+#endif
+	// gets num_samples samples, not necessarily on a frame boundary--this requires
+	// buffering so you have to supply the buffers. Applies the coercion rules above
+	// to produce 'channels' channels. Returns the number of samples stored per channel;
+	// it may be less than requested at the end of the file. If there are no more
+	// samples in the file, returns 0.
+
+#endif
+
+	////////   ERROR CODES
+
+	enum STBVorbisError
+	{
+		VORBIS__no_error,
+
+		VORBIS_need_more_data = 1,             // not a real error
+
+		VORBIS_invalid_api_mixing,           // can't mix API modes
+		VORBIS_outofmem,                     // not enough memory
+		VORBIS_feature_not_supported,        // uses floor 0
+		VORBIS_too_many_channels,            // STB_VORBIS_MAX_CHANNELS is too small
+		VORBIS_file_open_failure,            // fopen() failed
+		VORBIS_seek_without_length,          // can't seek in unknown-length file
+
+		VORBIS_unexpected_eof = 10,            // file is truncated?
+		VORBIS_seek_invalid,                 // seek past EOF
+
+														 // decoding errors (corrupt/invalid stream) -- you probably
+														 // don't care about the exact details of these
+
+														 // vorbis errors:
+														 VORBIS_invalid_setup = 20,
+														 VORBIS_invalid_stream,
+
+														 // ogg errors:
+														 VORBIS_missing_capture_pattern = 30,
+														 VORBIS_invalid_stream_structure_version,
+														 VORBIS_continued_packet_flag_invalid,
+														 VORBIS_incorrect_stream_serial_number,
+														 VORBIS_invalid_first_page,
+														 VORBIS_bad_packet_type,
+														 VORBIS_cant_find_last_page,
+														 VORBIS_seek_failed
+	};
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
+//
+//  HEADER ENDS HERE
+//
+//////////////////////////////////////////////////////////////////////////////
+
diff --git a/Utilities/stdafx.cpp b/Utilities/stdafx.cpp
new file mode 100644
index 0000000..6400425
--- /dev/null
+++ b/Utilities/stdafx.cpp
@@ -0,0 +1,8 @@
+// stdafx.cpp : source file that includes just the standard includes
+// Utilities.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
diff --git a/Utilities/stdafx.h b/Utilities/stdafx.h
new file mode 100644
index 0000000..23f1bef
--- /dev/null
+++ b/Utilities/stdafx.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <string>
+#include <stdint.h>
+#include <stdio.h>
+#include <iostream>
+#include <memory>
+#include <vector>
+#include <atomic>
+
+#include "UTF8Util.h"
+
+using std::shared_ptr;
+using utf8::ifstream;
+using utf8::ofstream;
+using std::string;
+using std::vector;
+using std::atomic;
+using std::atomic_flag;
\ No newline at end of file
diff --git a/Utilities/xBRZ/config.h b/Utilities/xBRZ/config.h
new file mode 100644
index 0000000..49a6790
--- /dev/null
+++ b/Utilities/xBRZ/config.h
@@ -0,0 +1,33 @@
+// ****************************************************************************
+// * This file is part of the HqMAME project. It is distributed under         *
+// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0          *
+// * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved          *
+// *                                                                          *
+// * Additionally and as a special exception, the author gives permission     *
+// * to link the code of this program with the MAME library (or with modified *
+// * versions of MAME that use the same license as MAME), and distribute      *
+// * linked combinations including the two. You must obey the GNU General     *
+// * Public License in all respects for all of the code used other than MAME. *
+// * If you modify this file, you may extend this exception to your version   *
+// * of the file, but you are not obligated to do so. If you do not wish to   *
+// * do so, delete this exception statement from your version.                *
+// ****************************************************************************
+
+#ifndef XBRZ_CONFIG_HEADER_284578425345
+#define XBRZ_CONFIG_HEADER_284578425345
+
+//do NOT include any headers here! used by xBRZ_dll!!!
+
+namespace xbrz
+{
+struct ScalerCfg
+{
+    double luminanceWeight            = 1;
+    double equalColorTolerance        = 30;
+    double dominantDirectionThreshold = 3.6;
+    double steepDirectionThreshold    = 2.2;
+    double newTestAttribute           = 0; //unused; test new parameters
+};
+}
+
+#endif
diff --git a/Utilities/xBRZ/xbrz.cpp b/Utilities/xBRZ/xbrz.cpp
new file mode 100644
index 0000000..c8f4f47
--- /dev/null
+++ b/Utilities/xBRZ/xbrz.cpp
@@ -0,0 +1,1159 @@
+// ****************************************************************************
+// * This file is part of the HqMAME project. It is distributed under         *
+// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0          *
+// * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved          *
+// *                                                                          *
+// * Additionally and as a special exception, the author gives permission     *
+// * to link the code of this program with the MAME library (or with modified *
+// * versions of MAME that use the same license as MAME), and distribute      *
+// * linked combinations including the two. You must obey the GNU General     *
+// * Public License in all respects for all of the code used other than MAME. *
+// * If you modify this file, you may extend this exception to your version   *
+// * of the file, but you are not obligated to do so. If you do not wish to   *
+// * do so, delete this exception statement from your version.                *
+// ****************************************************************************
+
+#include "../stdafx.h"
+#include "xbrz.h"
+#include <cassert>
+#include <algorithm>
+#include <vector>
+#include <cmath>
+
+namespace
+{
+template <uint32_t N> inline
+unsigned char getByte(uint32_t val) { return static_cast<unsigned char>((val >> (8 * N)) & 0xff); }
+
+inline unsigned char getAlpha(uint32_t pix) { return getByte<3>(pix); }
+inline unsigned char getRed  (uint32_t pix) { return getByte<2>(pix); }
+inline unsigned char getGreen(uint32_t pix) { return getByte<1>(pix); }
+inline unsigned char getBlue (uint32_t pix) { return getByte<0>(pix); }
+
+inline uint32_t makePixel(                 unsigned char r, unsigned char g, unsigned char b) { return             (r << 16) | (g << 8) | b; }
+inline uint32_t makePixel(unsigned char a, unsigned char r, unsigned char g, unsigned char b) { return (a << 24) | (r << 16) | (g << 8) | b; }
+
+
+template <unsigned int M, unsigned int N> inline
+uint32_t gradientRGB(uint32_t pixFront, uint32_t pixBack) //blend front color with opacity M / N over opaque background: http://en.wikipedia.org/wiki/Alpha_compositing#Alpha_blending
+{
+    static_assert(0 < M && M < N && N <= 1000, "");
+
+    auto calcColor = [](unsigned char colFront, unsigned char colBack) -> unsigned char { return (colFront * M + colBack * (N - M)) / N; };
+
+    return makePixel(calcColor(getRed  (pixFront), getRed  (pixBack)),
+                     calcColor(getGreen(pixFront), getGreen(pixBack)),
+                     calcColor(getBlue (pixFront), getBlue (pixBack)));
+}
+
+
+template <unsigned int M, unsigned int N> inline
+uint32_t gradientARGB(uint32_t pixFront, uint32_t pixBack) //find intermediate color between two colors with alpha channels (=> NO alpha blending!!!)
+{
+    static_assert(0 < M && M < N && N <= 1000, "");
+
+    const unsigned int weightFront = getAlpha(pixFront) * M;
+    const unsigned int weightBack  = getAlpha(pixBack) * (N - M);
+    const unsigned int weightSum   = weightFront + weightBack;
+    if (weightSum == 0)
+        return 0;
+
+    auto calcColor = [=](unsigned char colFront, unsigned char colBack)
+    {
+        return static_cast<unsigned char>((colFront * weightFront + colBack * weightBack) / weightSum);
+    };
+
+    return makePixel(static_cast<unsigned char>(weightSum / N),
+                     calcColor(getRed  (pixFront), getRed  (pixBack)),
+                     calcColor(getGreen(pixFront), getGreen(pixBack)),
+                     calcColor(getBlue (pixFront), getBlue (pixBack)));
+}
+
+
+//inline
+//double fastSqrt(double n)
+//{
+//    __asm //speeds up xBRZ by about 9% compared to std::sqrt which internally uses the same assembler instructions but adds some "fluff"
+//    {
+//        fld n
+//        fsqrt
+//    }
+//}
+//
+
+
+uint32_t*       byteAdvance(      uint32_t* ptr, int bytes) { return reinterpret_cast<      uint32_t*>(reinterpret_cast<      char*>(ptr) + bytes); }
+const uint32_t* byteAdvance(const uint32_t* ptr, int bytes) { return reinterpret_cast<const uint32_t*>(reinterpret_cast<const char*>(ptr) + bytes); }
+
+
+//fill block  with the given color
+inline
+void fillBlock(uint32_t* trg, int pitch, uint32_t col, int blockWidth, int blockHeight)
+{
+    //for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
+    //    std::fill(trg, trg + blockWidth, col);
+
+    for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
+        for (int x = 0; x < blockWidth; ++x)
+            trg[x] = col;
+}
+
+inline
+void fillBlock(uint32_t* trg, int pitch, uint32_t col, int n) { fillBlock(trg, pitch, col, n, n); }
+
+
+#ifdef _MSC_VER
+    #define FORCE_INLINE __forceinline
+#elif defined __GNUC__
+    #define FORCE_INLINE __attribute__((always_inline)) inline
+#else
+    #define FORCE_INLINE inline
+#endif
+
+
+enum RotationDegree //clock-wise
+{
+    ROT_0,
+    ROT_90,
+    ROT_180,
+    ROT_270
+};
+
+//calculate input matrix coordinates after rotation at compile time
+template <RotationDegree rotDeg, size_t I, size_t J, size_t N>
+struct MatrixRotation;
+
+template <size_t I, size_t J, size_t N>
+struct MatrixRotation<ROT_0, I, J, N>
+{
+    static const size_t I_old = I;
+    static const size_t J_old = J;
+};
+
+template <RotationDegree rotDeg, size_t I, size_t J, size_t N> //(i, j) = (row, col) indices, N = size of (square) matrix
+struct MatrixRotation
+{
+    static const size_t I_old = N - 1 - MatrixRotation<static_cast<RotationDegree>(rotDeg - 1), I, J, N>::J_old; //old coordinates before rotation!
+    static const size_t J_old =         MatrixRotation<static_cast<RotationDegree>(rotDeg - 1), I, J, N>::I_old; //
+};
+
+
+template <size_t N, RotationDegree rotDeg>
+class OutputMatrix
+{
+public:
+    OutputMatrix(uint32_t* out, int outWidth) : //access matrix area, top-left at position "out" for image with given width
+        out_(out),
+        outWidth_(outWidth) {}
+
+    template <size_t I, size_t J>
+    uint32_t& ref() const
+    {
+        static const size_t I_old = MatrixRotation<rotDeg, I, J, N>::I_old;
+        static const size_t J_old = MatrixRotation<rotDeg, I, J, N>::J_old;
+        return *(out_ + J_old + I_old * outWidth_);
+    }
+
+private:
+    uint32_t* out_;
+    const int outWidth_;
+};
+
+
+template <class T> inline
+T square(T value) { return value * value; }
+
+struct DistYCbCrBuffer //30% perf boost compared to distYCbCr()!
+{
+public:
+    static double dist(uint32_t pix1, uint32_t pix2)
+    {
+#if defined _MSC_VER && _MSC_VER < 1900
+#error function scope static initialization is not yet thread-safe!
+#endif
+        static const DistYCbCrBuffer inst;
+        return inst.distImpl(pix1, pix2);
+    }
+
+private:
+    DistYCbCrBuffer() : buffer(256 * 256 * 256)
+    {
+        for (uint32_t i = 0; i < 256 * 256 * 256; ++i) //startup time: 114 ms on Intel Core i5 (four cores)
+        {
+            const int r_diff = getByte<2>(i) * 2 - 255;
+            const int g_diff = getByte<1>(i) * 2 - 255;
+            const int b_diff = getByte<0>(i) * 2 - 255;
+
+            const double k_b = 0.0593; //ITU-R BT.2020 conversion
+            const double k_r = 0.2627; //
+            const double k_g = 1 - k_b - k_r;
+
+            const double scale_b = 0.5 / (1 - k_b);
+            const double scale_r = 0.5 / (1 - k_r);
+
+            const double y   = k_r * r_diff + k_g * g_diff + k_b * b_diff; //[!], analog YCbCr!
+            const double c_b = scale_b * (b_diff - y);
+            const double c_r = scale_r * (r_diff - y);
+
+            buffer[i] = static_cast<float>(std::sqrt(square(y) + square(c_b) + square(c_r)));
+        }
+    }
+
+    double distImpl(uint32_t pix1, uint32_t pix2) const
+    {
+        //if (pix1 == pix2) -> 8% perf degradation!
+        //    return 0;
+        //if (pix1 > pix2)
+        //	  std::swap(pix1, pix2); -> 30% perf degradation!!!
+
+        const int r_diff = static_cast<int>(getRed  (pix1)) - getRed  (pix2);
+        const int g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);
+        const int b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);
+
+        return buffer[(((r_diff + 255) / 2) << 16) | //slightly reduce precision (division by 2) to squeeze value into single byte
+                      (((g_diff + 255) / 2) <<  8) |
+                      (( b_diff + 255) / 2)];
+    }
+
+    std::vector<float> buffer; //consumes 64 MB memory; using double is only 2% faster, but takes 128 MB
+};
+
+
+enum BlendType
+{
+    BLEND_NONE = 0,
+    BLEND_NORMAL,   //a normal indication to blend
+    BLEND_DOMINANT, //a strong indication to blend
+    //attention: BlendType must fit into the value range of 2 bit!!!
+};
+
+struct BlendResult
+{
+    BlendType
+    /**/blend_f, blend_g,
+    /**/blend_j, blend_k;
+};
+
+
+struct Kernel_4x4 //kernel for preprocessing step
+{
+    uint32_t
+    /**/a, b, c, d,
+    /**/e, f, g, h,
+    /**/i, j, k, l,
+    /**/m, n, o, p;
+};
+
+/*
+input kernel area naming convention:
+-----------------
+| A | B | C | D |
+----|---|---|---|
+| E | F | G | H |   //evaluate the four corners between F, G, J, K
+----|---|---|---|   //input pixel is at position F
+| I | J | K | L |
+----|---|---|---|
+| M | N | O | P |
+-----------------
+*/
+template <class ColorDistance>
+FORCE_INLINE //detect blend direction
+BlendResult preProcessCorners(const Kernel_4x4& ker, const xbrz::ScalerCfg& cfg) //result: F, G, J, K corners of "GradientType"
+{
+    BlendResult result = {};
+
+    if ((ker.f == ker.g &&
+         ker.j == ker.k) ||
+        (ker.f == ker.j &&
+         ker.g == ker.k))
+        return result;
+
+    auto dist = [&](uint32_t pix1, uint32_t pix2) { return ColorDistance::dist(pix1, pix2, cfg.luminanceWeight); };
+
+    const int weight = 4;
+    double jg = dist(ker.i, ker.f) + dist(ker.f, ker.c) + dist(ker.n, ker.k) + dist(ker.k, ker.h) + weight * dist(ker.j, ker.g);
+    double fk = dist(ker.e, ker.j) + dist(ker.j, ker.o) + dist(ker.b, ker.g) + dist(ker.g, ker.l) + weight * dist(ker.f, ker.k);
+
+    if (jg < fk) //test sample: 70% of values max(jg, fk) / min(jg, fk) are between 1.1 and 3.7 with median being 1.8
+    {
+        const bool dominantGradient = cfg.dominantDirectionThreshold * jg < fk;
+        if (ker.f != ker.g && ker.f != ker.j)
+            result.blend_f = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
+
+        if (ker.k != ker.j && ker.k != ker.g)
+            result.blend_k = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
+    }
+    else if (fk < jg)
+    {
+        const bool dominantGradient = cfg.dominantDirectionThreshold * fk < jg;
+        if (ker.j != ker.f && ker.j != ker.k)
+            result.blend_j = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
+
+        if (ker.g != ker.f && ker.g != ker.k)
+            result.blend_g = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
+    }
+    return result;
+}
+
+struct Kernel_3x3
+{
+    uint32_t
+    /**/a,  b,  c,
+    /**/d,  e,  f,
+    /**/g,  h,  i;
+};
+
+#define DEF_GETTER(x) template <RotationDegree rotDeg> uint32_t inline get_##x(const Kernel_3x3& ker) { return ker.x; }
+//we cannot and NEED NOT write "ker.##x" since ## concatenates preprocessor tokens but "." is not a token
+DEF_GETTER(a) DEF_GETTER(b) DEF_GETTER(c)
+DEF_GETTER(d) DEF_GETTER(e) DEF_GETTER(f)
+DEF_GETTER(g) DEF_GETTER(h) DEF_GETTER(i)
+#undef DEF_GETTER
+
+#define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_90>(const Kernel_3x3& ker) { return ker.y; }
+DEF_GETTER(b, d) DEF_GETTER(c, a)
+DEF_GETTER(d, h) DEF_GETTER(e, e) DEF_GETTER(f, b)
+DEF_GETTER(g, i) DEF_GETTER(h, f) DEF_GETTER(i, c)
+#undef DEF_GETTER
+
+#define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_180>(const Kernel_3x3& ker) { return ker.y; }
+DEF_GETTER(b, h) DEF_GETTER(c, g)
+DEF_GETTER(d, f) DEF_GETTER(e, e) DEF_GETTER(f, d)
+DEF_GETTER(g, c) DEF_GETTER(h, b) DEF_GETTER(i, a)
+#undef DEF_GETTER
+
+#define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_270>(const Kernel_3x3& ker) { return ker.y; }
+DEF_GETTER(b, f) DEF_GETTER(c, i)
+DEF_GETTER(d, b) DEF_GETTER(e, e) DEF_GETTER(f, h)
+DEF_GETTER(g, a) DEF_GETTER(h, d) DEF_GETTER(i,	g)
+#undef DEF_GETTER
+
+
+//compress four blend types into a single byte
+inline BlendType getTopR   (unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 2)); }
+inline BlendType getBottomR(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 4)); }
+inline BlendType getBottomL(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 6)); }
+
+inline void setTopL   (unsigned char& b, BlendType bt) { b |= bt; } //buffer is assumed to be initialized before preprocessing!
+inline void setTopR   (unsigned char& b, BlendType bt) { b |= (bt << 2); }
+inline void setBottomR(unsigned char& b, BlendType bt) { b |= (bt << 4); }
+inline void setBottomL(unsigned char& b, BlendType bt) { b |= (bt << 6); }
+
+inline bool blendingNeeded(unsigned char b) { return b != 0; }
+
+template <RotationDegree rotDeg> inline
+unsigned char rotateBlendInfo(unsigned char b) { return b; }
+template <> inline unsigned char rotateBlendInfo<ROT_90 >(unsigned char b) { return ((b << 2) | (b >> 6)) & 0xff; }
+template <> inline unsigned char rotateBlendInfo<ROT_180>(unsigned char b) { return ((b << 4) | (b >> 4)) & 0xff; }
+template <> inline unsigned char rotateBlendInfo<ROT_270>(unsigned char b) { return ((b << 6) | (b >> 2)) & 0xff; }
+
+
+/*
+input kernel area naming convention:
+-------------
+| A | B | C |
+----|---|---|
+| D | E | F | //input pixel is at position E
+----|---|---|
+| G | H | I |
+-------------
+*/
+template <class Scaler, class ColorDistance, RotationDegree rotDeg>
+FORCE_INLINE //perf: quite worth it!
+void blendPixel(const Kernel_3x3& ker,
+                uint32_t* target, int trgWidth,
+                unsigned char blendInfo, //result of preprocessing all four corners of pixel "e"
+                const xbrz::ScalerCfg& cfg)
+{
+#define a get_a<rotDeg>(ker)
+#define b get_b<rotDeg>(ker)
+#define c get_c<rotDeg>(ker)
+#define d get_d<rotDeg>(ker)
+#define e get_e<rotDeg>(ker)
+#define f get_f<rotDeg>(ker)
+#define g get_g<rotDeg>(ker)
+#define h get_h<rotDeg>(ker)
+#define i get_i<rotDeg>(ker)
+
+
+    const unsigned char blend = rotateBlendInfo<rotDeg>(blendInfo);
+
+    if (getBottomR(blend) >= BLEND_NORMAL)
+    {
+        auto eq   = [&](uint32_t pix1, uint32_t pix2) { return ColorDistance::dist(pix1, pix2, cfg.luminanceWeight) < cfg.equalColorTolerance; };
+        auto dist = [&](uint32_t pix1, uint32_t pix2) { return ColorDistance::dist(pix1, pix2, cfg.luminanceWeight); };
+
+        const bool doLineBlend = [&]() -> bool
+        {
+            if (getBottomR(blend) >= BLEND_DOMINANT)
+                return true;
+
+            //make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes
+            if (getTopR(blend) != BLEND_NONE && !eq(e, g)) //but support double-blending for 90 corners
+                return false;
+            if (getBottomL(blend) != BLEND_NONE && !eq(e, c))
+                return false;
+
+            //no full blending for L-shapes; blend corner only (handles "mario mushroom eyes")
+            if (!eq(e, i) && eq(g, h) && eq(h , i) && eq(i, f) && eq(f, c))
+                return false;
+
+            return true;
+        }();
+
+        const uint32_t px = dist(e, f) <= dist(e, h) ? f : h; //choose most similar color
+
+        OutputMatrix<Scaler::scale, rotDeg> out(target, trgWidth);
+
+        if (doLineBlend)
+        {
+            const double fg = dist(f, g); //test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9
+            const double hc = dist(h, c); //
+
+            const bool haveShallowLine = cfg.steepDirectionThreshold * fg <= hc && e != g && d != g;
+            const bool haveSteepLine   = cfg.steepDirectionThreshold * hc <= fg && e != c && b != c;
+
+            if (haveShallowLine)
+            {
+                if (haveSteepLine)
+                    Scaler::blendLineSteepAndShallow(px, out);
+                else
+                    Scaler::blendLineShallow(px, out);
+            }
+            else
+            {
+                if (haveSteepLine)
+                    Scaler::blendLineSteep(px, out);
+                else
+                    Scaler::blendLineDiagonal(px,out);
+            }
+        }
+        else
+            Scaler::blendCorner(px, out);
+    }
+
+#undef a
+#undef b
+#undef c
+#undef d
+#undef e
+#undef f
+#undef g
+#undef h
+#undef i
+}
+
+
+template <class Scaler, class ColorDistance> //scaler policy: see "Scaler2x" reference implementation
+void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
+{
+    yFirst = std::max(yFirst, 0);
+    yLast  = std::min(yLast, srcHeight);
+    if (yFirst >= yLast || srcWidth <= 0)
+        return;
+
+    const int trgWidth = srcWidth * Scaler::scale;
+
+    //"use" space at the end of the image as temporary buffer for "on the fly preprocessing": we even could use larger area of
+    //"sizeof(uint32_t) * srcWidth * (yLast - yFirst)" bytes without risk of accidental overwriting before accessing
+    const int bufferSize = srcWidth;
+    unsigned char* preProcBuffer = reinterpret_cast<unsigned char*>(trg + yLast * Scaler::scale * trgWidth) - bufferSize;
+    std::fill(preProcBuffer, preProcBuffer + bufferSize, 0);
+    static_assert(BLEND_NONE == 0, "");
+
+    //initialize preprocessing buffer for first row of current stripe: detect upper left and right corner blending
+    //this cannot be optimized for adjacent processing stripes; we must not allow for a memory race condition!
+    if (yFirst > 0)
+    {
+        const int y = yFirst - 1;
+
+        const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0);
+        const uint32_t* s_0  = src + srcWidth * y; //center line
+        const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1);
+        const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1);
+
+        for (int x = 0; x < srcWidth; ++x)
+        {
+            const int x_m1 = std::max(x - 1, 0);
+            const int x_p1 = std::min(x + 1, srcWidth - 1);
+            const int x_p2 = std::min(x + 2, srcWidth - 1);
+
+            Kernel_4x4 ker = {}; //perf: initialization is negligible
+            ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
+            ker.b = s_m1[x];
+            ker.c = s_m1[x_p1];
+            ker.d = s_m1[x_p2];
+
+            ker.e = s_0[x_m1];
+            ker.f = s_0[x];
+            ker.g = s_0[x_p1];
+            ker.h = s_0[x_p2];
+
+            ker.i = s_p1[x_m1];
+            ker.j = s_p1[x];
+            ker.k = s_p1[x_p1];
+            ker.l = s_p1[x_p2];
+
+            ker.m = s_p2[x_m1];
+            ker.n = s_p2[x];
+            ker.o = s_p2[x_p1];
+            ker.p = s_p2[x_p2];
+
+            const BlendResult res = preProcessCorners<ColorDistance>(ker, cfg);
+            /*
+            preprocessing blend result:
+            ---------
+            | F | G |   //evalute corner between F, G, J, K
+            ----|---|   //input pixel is at position F
+            | J | K |
+            ---------
+            */
+            setTopR(preProcBuffer[x], res.blend_j);
+
+            if (x + 1 < bufferSize)
+                setTopL(preProcBuffer[x + 1], res.blend_k);
+        }
+    }
+    //------------------------------------------------------------------------------------
+
+    for (int y = yFirst; y < yLast; ++y)
+    {
+        uint32_t* out = trg + Scaler::scale * y * trgWidth; //consider MT "striped" access
+
+        const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0);
+        const uint32_t* s_0  = src + srcWidth * y; //center line
+        const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1);
+        const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1);
+
+        unsigned char blend_xy1 = 0; //corner blending for current (x, y + 1) position
+
+        for (int x = 0; x < srcWidth; ++x, out += Scaler::scale)
+        {
+            //all those bounds checks have only insignificant impact on performance!
+            const int x_m1 = std::max(x - 1, 0); //perf: prefer array indexing to additional pointers!
+            const int x_p1 = std::min(x + 1, srcWidth - 1);
+            const int x_p2 = std::min(x + 2, srcWidth - 1);
+
+            Kernel_4x4 ker4 = {}; //perf: initialization is negligible
+
+            ker4.a = s_m1[x_m1]; //read sequentially from memory as far as possible
+            ker4.b = s_m1[x];
+            ker4.c = s_m1[x_p1];
+            ker4.d = s_m1[x_p2];
+
+            ker4.e = s_0[x_m1];
+            ker4.f = s_0[x];
+            ker4.g = s_0[x_p1];
+            ker4.h = s_0[x_p2];
+
+            ker4.i = s_p1[x_m1];
+            ker4.j = s_p1[x];
+            ker4.k = s_p1[x_p1];
+            ker4.l = s_p1[x_p2];
+
+            ker4.m = s_p2[x_m1];
+            ker4.n = s_p2[x];
+            ker4.o = s_p2[x_p1];
+            ker4.p = s_p2[x_p2];
+
+            //evaluate the four corners on bottom-right of current pixel
+            unsigned char blend_xy = 0; //for current (x, y) position
+            {
+                const BlendResult res = preProcessCorners<ColorDistance>(ker4, cfg);
+                /*
+                preprocessing blend result:
+                ---------
+                | F | G |   //evalute corner between F, G, J, K
+                ----|---|   //current input pixel is at position F
+                | J | K |
+                ---------
+                */
+                blend_xy = preProcBuffer[x];
+                setBottomR(blend_xy, res.blend_f); //all four corners of (x, y) have been determined at this point due to processing sequence!
+
+                setTopR(blend_xy1, res.blend_j); //set 2nd known corner for (x, y + 1)
+                preProcBuffer[x] = blend_xy1; //store on current buffer position for use on next row
+
+                blend_xy1 = 0;
+                setTopL(blend_xy1, res.blend_k); //set 1st known corner for (x + 1, y + 1) and buffer for use on next column
+
+                if (x + 1 < bufferSize) //set 3rd known corner for (x + 1, y)
+                    setBottomL(preProcBuffer[x + 1], res.blend_g);
+            }
+
+            //fill block of size scale * scale with the given color
+            fillBlock(out, trgWidth * sizeof(uint32_t), ker4.f, Scaler::scale); //place *after* preprocessing step, to not overwrite the results while processing the the last pixel!
+
+            //blend four corners of current pixel
+            if (blendingNeeded(blend_xy)) //good 5% perf-improvement
+            {
+                Kernel_3x3 ker3 = {}; //perf: initialization is negligible
+
+                ker3.a = ker4.a;
+                ker3.b = ker4.b;
+                ker3.c = ker4.c;
+
+                ker3.d = ker4.e;
+                ker3.e = ker4.f;
+                ker3.f = ker4.g;
+
+                ker3.g = ker4.i;
+                ker3.h = ker4.j;
+                ker3.i = ker4.k;
+
+                blendPixel<Scaler, ColorDistance, ROT_0  >(ker3, out, trgWidth, blend_xy, cfg);
+                blendPixel<Scaler, ColorDistance, ROT_90 >(ker3, out, trgWidth, blend_xy, cfg);
+                blendPixel<Scaler, ColorDistance, ROT_180>(ker3, out, trgWidth, blend_xy, cfg);
+                blendPixel<Scaler, ColorDistance, ROT_270>(ker3, out, trgWidth, blend_xy, cfg);
+            }
+        }
+    }
+}
+
+//------------------------------------------------------------------------------------
+
+template <class ColorGradient>
+struct Scaler2x : public ColorGradient
+{
+    static const int scale = 2;
+
+    template <unsigned int M, unsigned int N> //bring template function into scope for GCC
+    static void alphaGrad(uint32_t& pixBack, uint32_t pixFront) { ColorGradient::template alphaGrad<M, N>(pixBack, pixFront); }
+
+
+    template <class OutputMatrix>
+    static void blendLineShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<scale - 1, 0>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 1, 1>(), col);
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteep(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<0, scale - 1>(), col);
+        alphaGrad<3, 4>(out.template ref<1, scale - 1>(), col);
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<1, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<0, 1>(), col);
+        alphaGrad<5, 6>(out.template ref<1, 1>(), col); //[!] fixes 7/8 used in xBR
+    }
+
+    template <class OutputMatrix>
+    static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 2>(out.template ref<1, 1>(), col);
+    }
+
+    template <class OutputMatrix>
+    static void blendCorner(uint32_t col, OutputMatrix& out)
+    {
+        //model a round corner
+        alphaGrad<21, 100>(out.template ref<1, 1>(), col); //exact: 1 - pi/4 = 0.2146018366
+    }
+};
+
+
+template <class ColorGradient>
+struct Scaler3x : public ColorGradient
+{
+    static const int scale = 3;
+
+    template <unsigned int M, unsigned int N> //bring template function into scope for GCC
+    static void alphaGrad(uint32_t& pixBack, uint32_t pixFront) { ColorGradient::template alphaGrad<M, N>(pixBack, pixFront); }
+
+
+    template <class OutputMatrix>
+    static void blendLineShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<scale - 1, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 2, 2>(), col);
+
+        alphaGrad<3, 4>(out.template ref<scale - 1, 1>(), col);
+        out.template ref<scale - 1, 2>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteep(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<0, scale - 1>(), col);
+        alphaGrad<1, 4>(out.template ref<2, scale - 2>(), col);
+
+        alphaGrad<3, 4>(out.template ref<1, scale - 1>(), col);
+        out.template ref<2, scale - 1>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<2, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<0, 2>(), col);
+        alphaGrad<3, 4>(out.template ref<2, 1>(), col);
+        alphaGrad<3, 4>(out.template ref<1, 2>(), col);
+        out.template ref<2, 2>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 8>(out.template ref<1, 2>(), col); //conflict with other rotations for this odd scale
+        alphaGrad<1, 8>(out.template ref<2, 1>(), col);
+        alphaGrad<7, 8>(out.template ref<2, 2>(), col); //
+    }
+
+    template <class OutputMatrix>
+    static void blendCorner(uint32_t col, OutputMatrix& out)
+    {
+        //model a round corner
+        alphaGrad<45, 100>(out.template ref<2, 2>(), col); //exact: 0.4545939598
+        //alphaGrad<7, 256>(out.template ref<2, 1>(), col); //0.02826017254 -> negligible + avoid conflicts with other rotations for this odd scale
+        //alphaGrad<7, 256>(out.template ref<1, 2>(), col); //0.02826017254
+    }
+};
+
+
+template <class ColorGradient>
+struct Scaler4x : public ColorGradient
+{
+    static const int scale = 4;
+
+    template <unsigned int M, unsigned int N> //bring template function into scope for GCC
+    static void alphaGrad(uint32_t& pixBack, uint32_t pixFront) { ColorGradient::template alphaGrad<M, N>(pixBack, pixFront); }
+
+
+    template <class OutputMatrix>
+    static void blendLineShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<scale - 1, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 2, 2>(), col);
+
+        alphaGrad<3, 4>(out.template ref<scale - 1, 1>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 2, 3>(), col);
+
+        out.template ref<scale - 1, 2>() = col;
+        out.template ref<scale - 1, 3>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteep(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<0, scale - 1>(), col);
+        alphaGrad<1, 4>(out.template ref<2, scale - 2>(), col);
+
+        alphaGrad<3, 4>(out.template ref<1, scale - 1>(), col);
+        alphaGrad<3, 4>(out.template ref<3, scale - 2>(), col);
+
+        out.template ref<2, scale - 1>() = col;
+        out.template ref<3, scale - 1>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<3, 4>(out.template ref<3, 1>(), col);
+        alphaGrad<3, 4>(out.template ref<1, 3>(), col);
+        alphaGrad<1, 4>(out.template ref<3, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<0, 3>(), col);
+
+        alphaGrad<1, 3>(out.template ref<2, 2>(), col); //[!] fixes 1/4 used in xBR
+
+        out.template ref<3, 3>() = col;
+        out.template ref<3, 2>() = col;
+        out.template ref<2, 3>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 2>(out.template ref<scale - 1, scale / 2    >(), col);
+        alphaGrad<1, 2>(out.template ref<scale - 2, scale / 2 + 1>(), col);
+        out.template ref<scale - 1, scale - 1>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendCorner(uint32_t col, OutputMatrix& out)
+    {
+        //model a round corner
+        alphaGrad<68, 100>(out.template ref<3, 3>(), col); //exact: 0.6848532563
+        alphaGrad< 9, 100>(out.template ref<3, 2>(), col); //0.08677704501
+        alphaGrad< 9, 100>(out.template ref<2, 3>(), col); //0.08677704501
+    }
+};
+
+
+template <class ColorGradient>
+struct Scaler5x : public ColorGradient
+{
+    static const int scale = 5;
+
+    template <unsigned int M, unsigned int N> //bring template function into scope for GCC
+    static void alphaGrad(uint32_t& pixBack, uint32_t pixFront) { ColorGradient::template alphaGrad<M, N>(pixBack, pixFront); }
+
+
+    template <class OutputMatrix>
+    static void blendLineShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<scale - 1, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 2, 2>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 3, 4>(), col);
+
+        alphaGrad<3, 4>(out.template ref<scale - 1, 1>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 2, 3>(), col);
+
+        out.template ref<scale - 1, 2>() = col;
+        out.template ref<scale - 1, 3>() = col;
+        out.template ref<scale - 1, 4>() = col;
+        out.template ref<scale - 2, 4>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteep(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<0, scale - 1>(), col);
+        alphaGrad<1, 4>(out.template ref<2, scale - 2>(), col);
+        alphaGrad<1, 4>(out.template ref<4, scale - 3>(), col);
+
+        alphaGrad<3, 4>(out.template ref<1, scale - 1>(), col);
+        alphaGrad<3, 4>(out.template ref<3, scale - 2>(), col);
+
+        out.template ref<2, scale - 1>() = col;
+        out.template ref<3, scale - 1>() = col;
+        out.template ref<4, scale - 1>() = col;
+        out.template ref<4, scale - 2>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<0, scale - 1>(), col);
+        alphaGrad<1, 4>(out.template ref<2, scale - 2>(), col);
+        alphaGrad<3, 4>(out.template ref<1, scale - 1>(), col);
+
+        alphaGrad<1, 4>(out.template ref<scale - 1, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 2, 2>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 1, 1>(), col);
+
+        alphaGrad<2, 3>(out.template ref<3, 3>(), col);
+
+        out.template ref<2, scale - 1>() = col;
+        out.template ref<3, scale - 1>() = col;
+        out.template ref<4, scale - 1>() = col;
+
+        out.template ref<scale - 1, 2>() = col;
+        out.template ref<scale - 1, 3>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 8>(out.template ref<scale - 1, scale / 2    >(), col); //conflict with other rotations for this odd scale
+        alphaGrad<1, 8>(out.template ref<scale - 2, scale / 2 + 1>(), col);
+        alphaGrad<1, 8>(out.template ref<scale - 3, scale / 2 + 2>(), col); //
+
+        alphaGrad<7, 8>(out.template ref<4, 3>(), col);
+        alphaGrad<7, 8>(out.template ref<3, 4>(), col);
+
+        out.template ref<4, 4>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendCorner(uint32_t col, OutputMatrix& out)
+    {
+        //model a round corner
+        alphaGrad<86, 100>(out.template ref<4, 4>(), col); //exact: 0.8631434088
+        alphaGrad<23, 100>(out.template ref<4, 3>(), col); //0.2306749731
+        alphaGrad<23, 100>(out.template ref<3, 4>(), col); //0.2306749731
+        //alphaGrad<1, 64>(out.template ref<4, 2>(), col); //0.01676812367 -> negligible + avoid conflicts with other rotations for this odd scale
+        //alphaGrad<1, 64>(out.template ref<2, 4>(), col); //0.01676812367
+    }
+};
+
+
+template <class ColorGradient>
+struct Scaler6x : public ColorGradient
+{
+    static const int scale = 6;
+
+    template <unsigned int M, unsigned int N> //bring template function into scope for GCC
+    static void alphaGrad(uint32_t& pixBack, uint32_t pixFront) { ColorGradient::template alphaGrad<M, N>(pixBack, pixFront); }
+
+
+    template <class OutputMatrix>
+    static void blendLineShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<scale - 1, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 2, 2>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 3, 4>(), col);
+
+        alphaGrad<3, 4>(out.template ref<scale - 1, 1>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 2, 3>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 3, 5>(), col);
+
+        out.template ref<scale - 1, 2>() = col;
+        out.template ref<scale - 1, 3>() = col;
+        out.template ref<scale - 1, 4>() = col;
+        out.template ref<scale - 1, 5>() = col;
+
+        out.template ref<scale - 2, 4>() = col;
+        out.template ref<scale - 2, 5>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteep(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<0, scale - 1>(), col);
+        alphaGrad<1, 4>(out.template ref<2, scale - 2>(), col);
+        alphaGrad<1, 4>(out.template ref<4, scale - 3>(), col);
+
+        alphaGrad<3, 4>(out.template ref<1, scale - 1>(), col);
+        alphaGrad<3, 4>(out.template ref<3, scale - 2>(), col);
+        alphaGrad<3, 4>(out.template ref<5, scale - 3>(), col);
+
+        out.template ref<2, scale - 1>() = col;
+        out.template ref<3, scale - 1>() = col;
+        out.template ref<4, scale - 1>() = col;
+        out.template ref<5, scale - 1>() = col;
+
+        out.template ref<4, scale - 2>() = col;
+        out.template ref<5, scale - 2>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<0, scale - 1>(), col);
+        alphaGrad<1, 4>(out.template ref<2, scale - 2>(), col);
+        alphaGrad<3, 4>(out.template ref<1, scale - 1>(), col);
+        alphaGrad<3, 4>(out.template ref<3, scale - 2>(), col);
+
+        alphaGrad<1, 4>(out.template ref<scale - 1, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 2, 2>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 1, 1>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 2, 3>(), col);
+
+        out.template ref<2, scale - 1>() = col;
+        out.template ref<3, scale - 1>() = col;
+        out.template ref<4, scale - 1>() = col;
+        out.template ref<5, scale - 1>() = col;
+
+        out.template ref<4, scale - 2>() = col;
+        out.template ref<5, scale - 2>() = col;
+
+        out.template ref<scale - 1, 2>() = col;
+        out.template ref<scale - 1, 3>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 2>(out.template ref<scale - 1, scale / 2    >(), col);
+        alphaGrad<1, 2>(out.template ref<scale - 2, scale / 2 + 1>(), col);
+        alphaGrad<1, 2>(out.template ref<scale - 3, scale / 2 + 2>(), col);
+
+        out.template ref<scale - 2, scale - 1>() = col;
+        out.template ref<scale - 1, scale - 1>() = col;
+        out.template ref<scale - 1, scale - 2>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendCorner(uint32_t col, OutputMatrix& out)
+    {
+        //model a round corner
+        alphaGrad<97, 100>(out.template ref<5, 5>(), col); //exact: 0.9711013910
+        alphaGrad<42, 100>(out.template ref<4, 5>(), col); //0.4236372243
+        alphaGrad<42, 100>(out.template ref<5, 4>(), col); //0.4236372243
+        alphaGrad< 6, 100>(out.template ref<5, 3>(), col); //0.05652034508
+        alphaGrad< 6, 100>(out.template ref<3, 5>(), col); //0.05652034508
+    }
+};
+
+//------------------------------------------------------------------------------------
+
+struct ColorDistanceRGB
+{
+    static double dist(uint32_t pix1, uint32_t pix2, double luminanceWeight)
+    {
+        return DistYCbCrBuffer::dist(pix1, pix2);
+
+        //if (pix1 == pix2) //about 4% perf boost
+        //    return 0;
+        //return distYCbCr(pix1, pix2, luminanceWeight);
+    }
+};
+
+struct ColorDistanceARGB
+{
+    static double dist(uint32_t pix1, uint32_t pix2, double luminanceWeight)
+    {
+        const double a1 = getAlpha(pix1) / 255.0 ;
+        const double a2 = getAlpha(pix2) / 255.0 ;
+        /*
+        Requirements for a color distance handling alpha channel: with a1, a2 in [0, 1]
+
+        	1. if a1 = a2, distance should be: a1 * distYCbCr()
+        	2. if a1 = 0,  distance should be: a2 * distYCbCr(black, white) = a2 * 255
+        	3. if a1 = 1,  ??? maybe: 255 * (1 - a2) + a2 * distYCbCr()
+        */
+
+        //return std::min(a1, a2) * DistYCbCrBuffer::dist(pix1, pix2) + 255 * abs(a1 - a2);
+        //=> following code is 15% faster:
+        const double d = DistYCbCrBuffer::dist(pix1, pix2);
+        if (a1 < a2)
+            return a1 * d + 255 * (a2 - a1);
+        else
+            return a2 * d + 255 * (a1 - a2);
+
+        //alternative? return std::sqrt(a1 * a2 * square(DistYCbCrBuffer::dist(pix1, pix2)) + square(255 * (a1 - a2)));
+    }
+};
+
+
+struct ColorGradientRGB
+{
+    template <unsigned int M, unsigned int N>
+    static void alphaGrad(uint32_t& pixBack, uint32_t pixFront)
+    {
+        pixBack = gradientRGB<M, N>(pixFront, pixBack);
+    }
+};
+
+struct ColorGradientARGB
+{
+    template <unsigned int M, unsigned int N>
+    static void alphaGrad(uint32_t& pixBack, uint32_t pixFront)
+    {
+        pixBack = gradientARGB<M, N>(pixFront, pixBack);
+    }
+};
+}
+
+
+void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, ColorFormat colFmt, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
+{
+    switch (colFmt)
+    {
+        case ColorFormat::ARGB:
+            switch (factor)
+            {
+                case 2:
+                    return scaleImage<Scaler2x<ColorGradientARGB>, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 3:
+                    return scaleImage<Scaler3x<ColorGradientARGB>, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 4:
+                    return scaleImage<Scaler4x<ColorGradientARGB>, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 5:
+                    return scaleImage<Scaler5x<ColorGradientARGB>, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 6:
+                    return scaleImage<Scaler6x<ColorGradientARGB>, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+            }
+            break;
+
+        case ColorFormat::RGB:
+            switch (factor)
+            {
+                case 2:
+                    return scaleImage<Scaler2x<ColorGradientRGB>, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 3:
+                    return scaleImage<Scaler3x<ColorGradientRGB>, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 4:
+                    return scaleImage<Scaler4x<ColorGradientRGB>, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 5:
+                    return scaleImage<Scaler5x<ColorGradientRGB>, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 6:
+                    return scaleImage<Scaler6x<ColorGradientRGB>, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+            }
+            break;
+    }
+    assert(false);
+}
+
+
+bool xbrz::equalColorTest(uint32_t col1, uint32_t col2, ColorFormat colFmt, double luminanceWeight, double equalColorTolerance)
+{
+    switch (colFmt)
+    {
+        case ColorFormat::ARGB:
+            return ColorDistanceARGB::dist(col1, col2, luminanceWeight) < equalColorTolerance;
+
+        case ColorFormat::RGB:
+            return ColorDistanceRGB::dist(col1, col2, luminanceWeight) < equalColorTolerance;
+    }
+    assert(false);
+    return false;
+}
+
+
+void xbrz::nearestNeighborScale(const uint32_t* src, int srcWidth, int srcHeight, int srcPitch,
+                                uint32_t* trg, int trgWidth, int trgHeight, int trgPitch,
+                                SliceType st, int yFirst, int yLast)
+{
+    if (srcPitch < srcWidth * static_cast<int>(sizeof(uint32_t))  ||
+        trgPitch < trgWidth * static_cast<int>(sizeof(uint32_t)))
+    {
+        assert(false);
+        return;
+    }
+
+    switch (st)
+    {
+        case NN_SCALE_SLICE_SOURCE:
+            //nearest-neighbor (going over source image - fast for upscaling, since source is read only once
+            yFirst = std::max(yFirst, 0);
+            yLast  = std::min(yLast, srcHeight);
+            if (yFirst >= yLast || trgWidth <= 0 || trgHeight <= 0) return;
+
+            for (int y = yFirst; y < yLast; ++y)
+            {
+                //mathematically: ySrc = floor(srcHeight * yTrg / trgHeight)
+                // => search for integers in: [ySrc, ySrc + 1) * trgHeight / srcHeight
+
+                //keep within for loop to support MT input slices!
+                const int yTrg_first = ( y      * trgHeight + srcHeight - 1) / srcHeight; //=ceil(y * trgHeight / srcHeight)
+                const int yTrg_last  = ((y + 1) * trgHeight + srcHeight - 1) / srcHeight; //=ceil(((y + 1) * trgHeight) / srcHeight)
+                const int blockHeight = yTrg_last - yTrg_first;
+
+                if (blockHeight > 0)
+                {
+                    const uint32_t* srcLine = byteAdvance(src, y * srcPitch);
+                    uint32_t* trgLine  = byteAdvance(trg, yTrg_first * trgPitch);
+                    int xTrg_first = 0;
+
+                    for (int x = 0; x < srcWidth; ++x)
+                    {
+                        int xTrg_last = ((x + 1) * trgWidth + srcWidth - 1) / srcWidth;
+                        const int blockWidth = xTrg_last - xTrg_first;
+                        if (blockWidth > 0)
+                        {
+                            xTrg_first = xTrg_last;
+                            fillBlock(trgLine, trgPitch, srcLine[x], blockWidth, blockHeight);
+                            trgLine += blockWidth;
+                        }
+                    }
+                }
+            }
+            break;
+
+        case NN_SCALE_SLICE_TARGET:
+            //nearest-neighbor (going over target image - slow for upscaling, since source is read multiple times missing out on cache! Fast for similar image sizes!)
+            yFirst = std::max(yFirst, 0);
+            yLast  = std::min(yLast, trgHeight);
+            if (yFirst >= yLast || srcHeight <= 0 || srcWidth <= 0) return;
+
+            for (int y = yFirst; y < yLast; ++y)
+            {
+                uint32_t* trgLine = byteAdvance(trg, y * trgPitch);
+                const int ySrc = srcHeight * y / trgHeight;
+                const uint32_t* srcLine = byteAdvance(src, ySrc * srcPitch);
+                for (int x = 0; x < trgWidth; ++x)
+                {
+                    const int xSrc = srcWidth * x / trgWidth;
+                    trgLine[x] = srcLine[xSrc];
+                }
+            }
+            break;
+    }
+}
diff --git a/Utilities/xBRZ/xbrz.h b/Utilities/xBRZ/xbrz.h
new file mode 100644
index 0000000..40dd410
--- /dev/null
+++ b/Utilities/xBRZ/xbrz.h
@@ -0,0 +1,94 @@
+// ****************************************************************************
+// * This file is part of the HqMAME project. It is distributed under         *
+// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0          *
+// * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved          *
+// *                                                                          *
+// * Additionally and as a special exception, the author gives permission     *
+// * to link the code of this program with the MAME library (or with modified *
+// * versions of MAME that use the same license as MAME), and distribute      *
+// * linked combinations including the two. You must obey the GNU General     *
+// * Public License in all respects for all of the code used other than MAME. *
+// * If you modify this file, you may extend this exception to your version   *
+// * of the file, but you are not obligated to do so. If you do not wish to   *
+// * do so, delete this exception statement from your version.                *
+// ****************************************************************************
+
+#ifndef XBRZ_HEADER_3847894708239054
+#define XBRZ_HEADER_3847894708239054
+
+#include <cstddef> //size_t
+#include <cstdint> //uint32_t
+#include <limits>
+#include "config.h"
+
+namespace xbrz
+{
+/*
+-------------------------------------------------------------------------
+| xBRZ: "Scale by rules" - high quality image upscaling filter by Zenju |
+-------------------------------------------------------------------------
+using a modified approach of xBR:
+http://board.byuu.org/viewtopic.php?f=10&t=2248
+- new rule set preserving small image features
+- highly optimized for performance
+- support alpha channel
+- support multithreading
+- support 64-bit architectures
+- support processing image slices
+- support scaling up to 6xBRZ
+*/
+
+enum class ColorFormat //from high bits -> low bits, 8 bit per channel
+{
+    RGB,  //8 bit for each red, green, blue, upper 8 bits unused
+    ARGB, //including alpha channel, BGRA byte order on little-endian machines
+};
+
+/*
+-> map source (srcWidth * srcHeight) to target (scale * width x scale * height) image, optionally processing a half-open slice of rows [yFirst, yLast) only
+-> support for source/target pitch in bytes!
+-> if your emulator changes only a few image slices during each cycle (e.g. DOSBox) then there's no need to run xBRZ on the complete image:
+   Just make sure you enlarge the source image slice by 2 rows on top and 2 on bottom (this is the additional range the xBRZ algorithm is using during analysis)
+   Caveat: If there are multiple changed slices, make sure they do not overlap after adding these additional rows in order to avoid a memory race condition
+   in the target image data if you are using multiple threads for processing each enlarged slice!
+
+THREAD-SAFETY: - parts of the same image may be scaled by multiple threads as long as the [yFirst, yLast) ranges do not overlap!
+               - there is a minor inefficiency for the first row of a slice, so avoid processing single rows only; suggestion: process 8-16 rows at least
+*/
+void scale(size_t factor, //valid range: 2 - 6
+           const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight,
+           ColorFormat colFmt,
+           const ScalerCfg& cfg = ScalerCfg(),
+           int yFirst = 0, int yLast = std::numeric_limits<int>::max()); //slice of source image
+
+void nearestNeighborScale(const uint32_t* src, int srcWidth, int srcHeight,
+                          uint32_t* trg, int trgWidth, int trgHeight);
+
+enum SliceType
+{
+    NN_SCALE_SLICE_SOURCE,
+    NN_SCALE_SLICE_TARGET,
+};
+void nearestNeighborScale(const uint32_t* src, int srcWidth, int srcHeight, int srcPitch, //pitch in bytes!
+                          uint32_t* trg, int trgWidth, int trgHeight, int trgPitch,
+                          SliceType st, int yFirst, int yLast);
+
+//parameter tuning
+bool equalColorTest(uint32_t col1, uint32_t col2, ColorFormat colFmt, double luminanceWeight, double equalColorTolerance);
+
+
+
+
+
+//########################### implementation ###########################
+inline
+void nearestNeighborScale(const uint32_t* src, int srcWidth, int srcHeight,
+                          uint32_t* trg, int trgWidth, int trgHeight)
+{
+    nearestNeighborScale(src, srcWidth, srcHeight, srcWidth * sizeof(uint32_t),
+                         trg, trgWidth, trgHeight, trgWidth * sizeof(uint32_t),
+                         NN_SCALE_SLICE_TARGET, 0, trgHeight);
+}
+}
+
+#endif
diff --git a/Windows/DirectInputManager.cpp b/Windows/DirectInputManager.cpp
new file mode 100644
index 0000000..3655cf9
--- /dev/null
+++ b/Windows/DirectInputManager.cpp
@@ -0,0 +1,447 @@
+#include "stdafx.h"
+
+#define DIRECTINPUT_VERSION 0x0800
+#include <thread>
+#include <windows.h>
+#include <dinput.h>
+#include <dinputd.h>
+#include <wbemidl.h>
+#include <oleauto.h>
+#include "DirectInputManager.h"
+#include <algorithm>
+#include "../Core/MessageManager.h"
+#include "../Core/Console.h"
+
+LPDIRECTINPUT8 DirectInputManager::_directInput = nullptr;
+vector<DirectInputData> DirectInputManager::_joysticks;
+vector<DirectInputData> DirectInputManager::_joysticksToAdd;
+std::vector<GUID> DirectInputManager::_processedGuids;
+std::vector<GUID> DirectInputManager::_xinputDeviceGuids;
+HWND DirectInputManager::_hWnd = nullptr;
+
+void DirectInputManager::Initialize()
+{
+	HRESULT hr;
+
+	// Register with the DirectInput subsystem and get a pointer to a IDirectInput interface we can use.
+	// Create a DInput object
+	if(FAILED(hr = DirectInput8Create(GetModuleHandle(nullptr), DIRECTINPUT_VERSION, IID_IDirectInput8, (VOID**)&_directInput, nullptr))) {
+		MessageManager::Log("[DInput] DirectInput8Create failed: " + std::to_string(hr));
+		return;
+	}
+
+	IDirectInputJoyConfig8* pJoyConfig = nullptr;
+	if(FAILED(hr = _directInput->QueryInterface(IID_IDirectInputJoyConfig8, (void**)&pJoyConfig))) {
+		MessageManager::Log("[DInput] QueryInterface failed: " + std::to_string(hr));
+		return;
+	}
+
+	if(pJoyConfig) {
+		pJoyConfig->Release();
+	}
+
+	UpdateDeviceList();
+}
+
+bool DirectInputManager::ProcessDevice(const DIDEVICEINSTANCE* pdidInstance)
+{
+	const GUID* deviceGuid = &pdidInstance->guidInstance;
+
+	auto comp = [=](GUID guid) {
+		return guid.Data1 == deviceGuid->Data1 &&
+			guid.Data2 == deviceGuid->Data2 &&
+			guid.Data3 == deviceGuid->Data3 &&
+			memcmp(guid.Data4, deviceGuid->Data4, sizeof(guid.Data4)) == 0;
+	};
+
+	bool wasProcessedBefore = std::find_if(_processedGuids.begin(), _processedGuids.end(), comp) != _processedGuids.end();
+	if(wasProcessedBefore) {
+		return false;
+	} else {
+		bool isXInput = IsXInputDevice(&pdidInstance->guidProduct);
+		if(isXInput) {
+			_xinputDeviceGuids.push_back(*deviceGuid);
+			_processedGuids.push_back(*deviceGuid);
+		}
+		return !isXInput;
+	}
+}
+
+//-----------------------------------------------------------------------------
+// Enum each PNP device using WMI and check each device ID to see if it contains 
+// "IG_" (ex. "VID_045E&PID_028E&IG_00").  If it does, then it's an XInput device
+// Unfortunately this information can not be found by just using DirectInput 
+//-----------------------------------------------------------------------------
+bool DirectInputManager::IsXInputDevice(const GUID* pGuidProductFromDirectInput)
+{
+	IWbemLocator*           pIWbemLocator = NULL;
+	IEnumWbemClassObject*   pEnumDevices = NULL;
+	IWbemClassObject*       pDevices[20] = { 0 };
+	IWbemServices*          pIWbemServices = NULL;
+	BSTR                    bstrNamespace = NULL;
+	BSTR                    bstrDeviceID = NULL;
+	BSTR                    bstrClassName = NULL;
+	DWORD                   uReturned = 0;
+	bool                    bIsXinputDevice = false;
+	UINT                    iDevice = 0;
+	VARIANT                 var;
+	HRESULT                 hr;
+
+	// CoInit if needed
+	hr = CoInitialize(NULL);
+	bool bCleanupCOM = SUCCEEDED(hr);
+
+	// Create WMI
+	hr = CoCreateInstance(__uuidof(WbemLocator), NULL, CLSCTX_INPROC_SERVER, __uuidof(IWbemLocator), (LPVOID*)&pIWbemLocator);
+	if(FAILED(hr) || pIWbemLocator == NULL) {
+		goto LCleanup;
+	}
+
+	bstrNamespace = SysAllocString(L"\\\\.\\root\\cimv2"); 
+	bstrClassName = SysAllocString(L"Win32_PNPEntity");
+	bstrDeviceID = SysAllocString(L"DeviceID");
+
+	// Connect to WMI 
+	hr = pIWbemLocator->ConnectServer(bstrNamespace, NULL, NULL, 0L, 0L, NULL, NULL, &pIWbemServices);
+	if(FAILED(hr) || pIWbemServices == NULL) {
+		goto LCleanup;
+	}
+
+	// Switch security level to IMPERSONATE. 
+	CoSetProxyBlanket(pIWbemServices, RPC_C_AUTHN_WINNT, RPC_C_AUTHZ_NONE, NULL, RPC_C_AUTHN_LEVEL_CALL, RPC_C_IMP_LEVEL_IMPERSONATE, NULL, EOAC_NONE);
+
+	hr = pIWbemServices->CreateInstanceEnum(bstrClassName, 0, NULL, &pEnumDevices);
+	if(FAILED(hr) || pEnumDevices == NULL) {
+		goto LCleanup;
+	}
+
+	// Loop over all devices
+	for(;; ) {
+		// Get 20 at a time
+		hr = pEnumDevices->Next(10000, 20, pDevices, &uReturned);
+		if(FAILED(hr) || uReturned == 0 || bIsXinputDevice) {
+			break;
+		}
+
+		for(iDevice = 0; iDevice < uReturned; iDevice++) {
+			// For each device, get its device ID
+			hr = pDevices[iDevice]->Get(bstrDeviceID, 0L, &var, NULL, NULL);
+			if(SUCCEEDED(hr) && var.vt == VT_BSTR && var.bstrVal != NULL) {
+				// Check if the device ID contains "IG_".  If it does, then it's an XInput device
+				// This information can not be found from DirectInput 
+				if(wcsstr(var.bstrVal, L"IG_")) {
+					// If it does, then get the VID/PID from var.bstrVal
+					DWORD dwPid = 0, dwVid = 0;
+					WCHAR* strVid = wcsstr(var.bstrVal, L"VID_");
+					if(strVid && swscanf_s(strVid, L"VID_%4X", &dwVid) != 1) {
+						dwVid = 0;
+					}
+					WCHAR* strPid = wcsstr(var.bstrVal, L"PID_");
+					if(strPid && swscanf_s(strPid, L"PID_%4X", &dwPid) != 1) {
+						dwPid = 0;
+					}
+
+					// Compare the VID/PID to the DInput device
+					DWORD dwVidPid = MAKELONG(dwVid, dwPid);
+					if(dwVidPid == pGuidProductFromDirectInput->Data1) {
+						bIsXinputDevice = true;
+						pDevices[iDevice]->Release();
+						pDevices[iDevice] = nullptr;
+						break;
+					}
+				}
+			}
+			VariantClear(&var);
+			pDevices[iDevice]->Release();
+			pDevices[iDevice] = nullptr;
+		}
+	}
+
+LCleanup:
+	if(bstrNamespace) {
+		SysFreeString(bstrNamespace);
+	}
+	if(bstrDeviceID) {
+		SysFreeString(bstrDeviceID);
+	}
+	if(bstrClassName) {
+		SysFreeString(bstrClassName);
+	}
+	for(iDevice = 0; iDevice < 20; iDevice++) {
+		if(pDevices[iDevice]) {
+			pDevices[iDevice]->Release();
+		}
+	}
+	if(pEnumDevices) {
+		pEnumDevices->Release();
+	}
+	if(pIWbemLocator) {
+		pIWbemLocator->Release();
+	}
+	if(pIWbemServices) {
+		pIWbemServices->Release();
+	}
+
+	if(bCleanupCOM) {
+		CoUninitialize();
+	}
+
+	return bIsXinputDevice;
+}
+
+void DirectInputManager::UpdateDeviceList()
+{
+	if(_needToUpdate) {
+		//An update is already pending, skip
+		return;
+	}
+
+	HRESULT hr;
+
+	// Enumerate devices
+	if(SUCCEEDED(hr = _directInput->EnumDevices(DI8DEVCLASS_GAMECTRL, EnumJoysticksCallback, nullptr, DIEDFL_ALLDEVICES))) {
+		if(!_joysticksToAdd.empty()) {
+			//Sleeping apparently lets us read accurate "default" values, otherwise a PS4 controller returns all 0s, despite not doing so normally
+			for(DirectInputData &joystick : _joysticksToAdd) {
+				UpdateInputState(joystick);
+			}
+			std::this_thread::sleep_for(std::chrono::duration<int, std::milli>(100));
+
+			for(DirectInputData &joystick : _joysticksToAdd) {
+				UpdateInputState(joystick);
+				joystick.defaultState = joystick.state;
+			}
+			_needToUpdate = true;
+		}
+	}
+
+	if(_requestUpdate) {
+		_requestUpdate = false;
+		_needToUpdate = true;
+	}
+}
+
+//-----------------------------------------------------------------------------
+// Name: EnumJoysticksCallback()
+// Desc: Called once for each enumerated joystick. If we find one, create a
+//       device interface on it so we can play with it.
+//-----------------------------------------------------------------------------
+int DirectInputManager::EnumJoysticksCallback(const DIDEVICEINSTANCE* pdidInstance, void* pContext)
+{
+	HRESULT hr;
+
+	if(ProcessDevice(pdidInstance)) {
+		_processedGuids.push_back(pdidInstance->guidInstance);
+
+		// Obtain an interface to the enumerated joystick.
+		LPDIRECTINPUTDEVICE8 pJoystick = nullptr;
+		hr = _directInput->CreateDevice(pdidInstance->guidInstance, &pJoystick, nullptr);
+
+		if(SUCCEEDED(hr)) {
+			DIJOYSTATE2 state;
+			memset(&state, 0, sizeof(state));
+			DirectInputData data{ pJoystick, state, state, false };
+			memcpy(&data.instanceInfo, pdidInstance, sizeof(DIDEVICEINSTANCE));
+
+			// Set the data format to "simple joystick" - a predefined data format 
+			// A data format specifies which controls on a device we are interested in, and how they should be reported.
+			// This tells DInput that we will be passing a DIJOYSTATE2 structure to IDirectInputDevice::GetDeviceState().
+			if(SUCCEEDED(hr = data.joystick->SetDataFormat(&c_dfDIJoystick2))) {
+				// Set the cooperative level to let DInput know how this device should interact with the system and with other DInput applications.
+				if(SUCCEEDED(hr = data.joystick->SetCooperativeLevel(_hWnd, DISCL_NONEXCLUSIVE | DISCL_BACKGROUND))) {
+					// Enumerate the joystick objects. The callback function enabled user interface elements for objects that are found, and sets the min/max values property for discovered axes.
+					if(SUCCEEDED(hr = data.joystick->EnumObjects(EnumObjectsCallback, data.joystick, DIDFT_ALL))) {
+						_joysticksToAdd.push_back(data);
+					} else {
+						MessageManager::Log("[DInput] Failed to enumerate objects: " + std::to_string(hr));
+					}
+				} else {
+					MessageManager::Log("[DInput] Failed to set cooperative level: " + std::to_string(hr));
+				}
+			} else {
+				MessageManager::Log("[DInput] Failed to set data format: " + std::to_string(hr));
+			}
+		} else {
+			MessageManager::Log("[DInput] Failed to create directinput device" + std::to_string(hr));
+		}
+	}
+	return DIENUM_CONTINUE;
+}
+
+//-----------------------------------------------------------------------------
+// Name: EnumObjectsCallback()
+// Desc: Callback function for enumerating objects (axes, buttons, POVs) on a 
+//       joystick. This function enables user interface elements for objects
+//       that are found to exist, and scales axes min/max values.
+//-----------------------------------------------------------------------------
+int DirectInputManager::EnumObjectsCallback(const DIDEVICEOBJECTINSTANCE* pdidoi, void* pContext)
+{
+	LPDIRECTINPUTDEVICE8 joystick = (LPDIRECTINPUTDEVICE8)pContext;
+
+	// For axes that are returned, set the DIPROP_RANGE property for the enumerated axis in order to scale min/max values.
+	if(pdidoi->dwType & DIDFT_AXIS) {
+		DIPROPRANGE diprg;
+		diprg.diph.dwSize = sizeof(DIPROPRANGE);
+		diprg.diph.dwHeaderSize = sizeof(DIPROPHEADER);
+		diprg.diph.dwHow = DIPH_BYID;
+		diprg.diph.dwObj = pdidoi->dwType; // Specify the enumerated axis
+		diprg.lMin = -1000;
+		diprg.lMax = +1000;
+
+		// Set the range for the axis
+		if(FAILED(joystick->SetProperty(DIPROP_RANGE, &diprg.diph))) {
+			return DIENUM_STOP;
+		}
+	}
+
+	return DIENUM_CONTINUE;
+}
+
+
+void DirectInputManager::RefreshState()
+{
+	if(_needToUpdate) {
+		vector<DirectInputData> joysticks;
+		//Keep exisiting joysticks, if they still work, otherwise remove them from the list
+		for(DirectInputData &joystick : _joysticks) {
+			if(joystick.stateValid) {
+				joysticks.push_back(joystick);
+			} else {
+				MessageManager::Log("[DInput] Device lost, trying to reacquire...");
+				
+				//Release the joystick, we'll try to initialize it again if it still exists
+				const GUID* deviceGuid = &joystick.instanceInfo.guidInstance;
+
+				auto comp = [=](GUID guid) {
+					return guid.Data1 == deviceGuid->Data1 &&
+						guid.Data2 == deviceGuid->Data2 &&
+						guid.Data3 == deviceGuid->Data3 &&
+						memcmp(guid.Data4, deviceGuid->Data4, sizeof(guid.Data4)) == 0;
+				};
+				_processedGuids.erase(std::remove_if(_processedGuids.begin(), _processedGuids.end(), comp), _processedGuids.end());				
+
+				joystick.joystick->Unacquire();
+				joystick.joystick->Release();
+			}
+		}
+
+		//Add the newly-found joysticks
+		for(DirectInputData &joystick : _joysticksToAdd) {
+			joysticks.push_back(joystick);
+		}
+
+		_joysticks = joysticks;
+		_joysticksToAdd.clear();
+		_needToUpdate = false;
+	}
+
+	for(DirectInputData &joystick : _joysticks) {
+		UpdateInputState(joystick);
+	}
+}
+
+int DirectInputManager::GetJoystickCount()
+{
+	return (int)_joysticks.size();
+}
+
+bool DirectInputManager::IsPressed(int port, int button)
+{
+	if(port >= (int)_joysticks.size() || !_joysticks[port].stateValid) {
+		return false;
+	}
+
+	DIJOYSTATE2& state = _joysticks[port].state;
+	DIJOYSTATE2& defaultState = _joysticks[port].defaultState;
+	int deadRange = (int)(500 * 1);
+	//TODO
+	//_console->GetSettings()->GetControllerDeadzoneRatio()
+
+	int povDirection = state.rgdwPOV[0] / 4500;
+	bool povCentered = (LOWORD(state.rgdwPOV[0]) == 0xFFFF) || povDirection >= 8;
+
+	switch(button) {
+		case 0x00: return state.lY - defaultState.lY < -deadRange;
+		case 0x01: return state.lY - defaultState.lY > deadRange;
+		case 0x02: return state.lX - defaultState.lX < -deadRange;
+		case 0x03: return state.lX - defaultState.lX > deadRange;
+		case 0x04: return state.lRy - defaultState.lRy < -deadRange;
+		case 0x05: return state.lRy - defaultState.lRy > deadRange;
+		case 0x06: return state.lRx - defaultState.lRx < -deadRange;
+		case 0x07: return state.lRx - defaultState.lRx > deadRange;
+		case 0x08: return state.lZ - defaultState.lZ < -deadRange;
+		case 0x09: return state.lZ - defaultState.lZ > deadRange;
+		case 0x0A: return state.lRz - defaultState.lRz < -deadRange;
+		case 0x0B: return state.lRz - defaultState.lRz > deadRange;
+		case 0x0C: return !povCentered && (povDirection == 7 || povDirection == 0 || povDirection == 1);
+		case 0x0D: return !povCentered && (povDirection >= 3 && povDirection <= 5);
+		case 0x0E: return !povCentered && (povDirection >= 1 && povDirection <= 3);
+		case 0x0F: return !povCentered && (povDirection >= 5 && povDirection <= 7);
+		default: return state.rgbButtons[button - 0x10] != 0;
+	}
+
+	return false;
+}
+
+void DirectInputManager::UpdateInputState(DirectInputData &data)
+{
+	DIJOYSTATE2 newState;
+	HRESULT hr;
+
+	// Poll the device to read the current state
+	hr = data.joystick->Poll();
+	if(FAILED(hr)) {
+		// DInput is telling us that the input stream has been interrupted. We aren't tracking any state between polls, so
+		// we don't have any special reset that needs to be done. We just re-acquire and try again.
+		hr = data.joystick->Acquire();
+		while(hr == DIERR_INPUTLOST) {
+			hr = data.joystick->Acquire();
+		}
+
+		// hr may be DIERR_OTHERAPPHASPRIO or other errors.  This may occur when the app is minimized or in the process of 
+		// switching, so just try again later 
+		if(FAILED(hr)) {
+			data.stateValid = false;
+			_requestUpdate = true;
+			return;
+		}
+	}
+
+	// Get the input's device state
+	if(FAILED(hr = data.joystick->GetDeviceState(sizeof(DIJOYSTATE2), &newState))) {
+		MessageManager::Log("[DInput] Failed to get device state: " + std::to_string(hr));
+		data.stateValid = false;
+		_requestUpdate = true;
+		return; // The device should have been acquired during the Poll()
+	}
+
+	data.state = newState;
+	data.stateValid = true;
+}
+
+
+DirectInputManager::DirectInputManager(shared_ptr<Console> console, HWND hWnd)
+{
+	_console = console;
+	_hWnd = hWnd;
+	Initialize();
+}
+
+DirectInputManager::~DirectInputManager()
+{
+	for(DirectInputData &data: _joysticks) {
+		data.joystick->Unacquire();
+		data.joystick->Release();
+	}
+
+	_needToUpdate = false;
+	_joysticks.clear();
+	_joysticksToAdd.clear();
+	_processedGuids.clear();
+	_xinputDeviceGuids.clear();
+
+	if(_directInput) {
+		_directInput->Release();
+		_directInput = nullptr;
+	}
+}
diff --git a/Windows/DirectInputManager.h b/Windows/DirectInputManager.h
new file mode 100644
index 0000000..3f8c1d6
--- /dev/null
+++ b/Windows/DirectInputManager.h
@@ -0,0 +1,46 @@
+#pragma once
+#include "stdafx.h"
+#include <dinput.h>
+#include "../Utilities/SimpleLock.h"
+
+class Console;
+
+struct DirectInputData
+{
+	LPDIRECTINPUTDEVICE8 joystick;
+	DIJOYSTATE2 state;
+	DIJOYSTATE2 defaultState;
+	bool stateValid;
+	DIDEVICEINSTANCE instanceInfo;
+};
+
+class DirectInputManager
+{
+private:
+	static HWND _hWnd;
+	shared_ptr<Console> _console;
+	bool _needToUpdate = false;
+	bool _requestUpdate = false;
+	static LPDIRECTINPUT8 _directInput;
+	static vector<DirectInputData> _joysticks;
+	static vector<DirectInputData> _joysticksToAdd;
+
+	static std::vector<GUID> _processedGuids;
+	static std::vector<GUID> _xinputDeviceGuids;
+
+	void Initialize();
+	void UpdateInputState(DirectInputData& joystick);
+	static bool ProcessDevice(const DIDEVICEINSTANCE* pdidInstance);
+	static bool IsXInputDevice(const GUID* pGuidProductFromDirectInput);
+	static int __stdcall EnumJoysticksCallback(const DIDEVICEINSTANCE* pdidInstance, void* pContext);
+	static int __stdcall EnumObjectsCallback(const DIDEVICEOBJECTINSTANCE* pdidoi, void* pContext);
+
+public:
+	DirectInputManager(shared_ptr<Console> console, HWND window);
+	~DirectInputManager();
+
+	void RefreshState();
+	void UpdateDeviceList();
+	int GetJoystickCount();
+	bool IsPressed(int port, int button);
+};
diff --git a/Windows/DirectXTK/Audio.h b/Windows/DirectXTK/Audio.h
new file mode 100644
index 0000000..a05bdd7
--- /dev/null
+++ b/Windows/DirectXTK/Audio.h
@@ -0,0 +1,718 @@
+//--------------------------------------------------------------------------------------
+// File: Audio.h
+//
+// DirectXTK for Audio header
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#include <objbase.h>
+#include <mmreg.h>
+#include <audioclient.h>
+
+#if defined(_XBOX_ONE) && defined(_TITLE)
+#include <xma2defs.h>
+#pragma comment(lib,"acphal.lib")
+#endif
+
+#if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP
+#pragma comment(lib,"PhoneAudioSes.lib")
+#endif
+
+#ifndef XAUDIO2_HELPER_FUNCTIONS
+#define XAUDIO2_HELPER_FUNCTIONS
+#endif
+
+#if (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/)
+#if defined(_MSC_VER) && (_MSC_VER < 1700)
+#error DirectX Tool Kit for Audio does not support VS 2010 without the DirectX SDK 
+#endif
+#include <xaudio2.h>
+#include <xaudio2fx.h>
+#include <x3daudio.h>
+#include <xapofx.h>
+#pragma comment(lib,"xaudio2.lib")
+#else
+// Using XAudio 2.7 requires the DirectX SDK
+#include <C:\Program Files (x86)\Microsoft DirectX SDK (June 2010)\Include\comdecl.h>
+#include <C:\Program Files (x86)\Microsoft DirectX SDK (June 2010)\Include\xaudio2.h>
+#include <C:\Program Files (x86)\Microsoft DirectX SDK (June 2010)\Include\xaudio2fx.h>
+#include <C:\Program Files (x86)\Microsoft DirectX SDK (June 2010)\Include\xapofx.h>
+#pragma warning(push)
+#pragma warning( disable : 4005 )
+#include <C:\Program Files (x86)\Microsoft DirectX SDK (June 2010)\Include\x3daudio.h>
+#pragma warning(pop)
+#pragma comment(lib,"x3daudio.lib")
+#pragma comment(lib,"xapofx.lib")
+#endif
+
+#include <DirectXMath.h>
+
+#pragma warning(push)
+#pragma warning(disable : 4005)
+#include <stdint.h>
+#pragma warning(pop)
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+// VS 2010 doesn't support explicit calling convention for std::function
+#ifndef DIRECTX_STD_CALLCONV
+#if defined(_MSC_VER) && (_MSC_VER < 1700)
+#define DIRECTX_STD_CALLCONV
+#else
+#define DIRECTX_STD_CALLCONV __cdecl
+#endif
+#endif
+
+// VS 2010/2012 do not support =default =delete
+#ifndef DIRECTX_CTOR_DEFAULT
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+#define DIRECTX_CTOR_DEFAULT {}
+#define DIRECTX_CTOR_DELETE ;
+#else
+#define DIRECTX_CTOR_DEFAULT =default;
+#define DIRECTX_CTOR_DELETE =delete;
+#endif
+#endif
+
+#pragma warning(push)
+#pragma warning(disable : 4481)
+// VS 2010 considers 'override' to be a extension, but it's part of C++11 as of VS 2012
+
+namespace DirectX
+{
+    #if (DIRECTX_MATH_VERSION < 305) && !defined(XM_CALLCONV)
+    #define XM_CALLCONV __fastcall
+    typedef const XMVECTOR& HXMVECTOR;
+    typedef const XMMATRIX& FXMMATRIX;
+    #endif
+
+    class SoundEffectInstance;
+
+    //----------------------------------------------------------------------------------
+    struct AudioStatistics
+    {
+        size_t  playingOneShots;        // Number of one-shot sounds currently playing
+        size_t  playingInstances;       // Number of sound effect instances currently playing
+        size_t  allocatedInstances;     // Number of SoundEffectInstance allocated
+        size_t  allocatedVoices;        // Number of XAudio2 voices allocated (standard, 3D, one-shots, and idle one-shots) 
+        size_t  allocatedVoices3d;      // Number of XAudio2 voices allocated for 3D
+        size_t  allocatedVoicesOneShot; // Number of XAudio2 voices allocated for one-shot sounds
+        size_t  allocatedVoicesIdle;    // Number of XAudio2 voices allocated for one-shot sounds but not currently in use
+        size_t  audioBytes;             // Total wave data (in bytes) in SoundEffects and in-memory WaveBanks
+#if defined(_XBOX_ONE) && defined(_TITLE)
+        size_t  xmaAudioBytes;          // Total wave data (in bytes) in SoundEffects and in-memory WaveBanks allocated with ApuAlloc
+#endif
+    };
+
+
+    //----------------------------------------------------------------------------------
+    class IVoiceNotify
+    {
+    public:
+        virtual void __cdecl OnBufferEnd() = 0;
+            // Notfication that a voice buffer has finished
+            // Note this is called from XAudio2's worker thread, so it should perform very minimal and thread-safe operations
+
+        virtual void __cdecl OnCriticalError() = 0;
+            // Notification that the audio engine encountered a critical error
+
+        virtual void __cdecl OnReset() = 0;
+            // Notification of an audio engine reset
+
+        virtual void __cdecl OnUpdate() = 0;
+            // Notification of an audio engine per-frame update (opt-in)
+
+        virtual void __cdecl OnDestroyEngine() = 0;
+            // Notification that the audio engine is being destroyed
+
+        virtual void __cdecl OnTrim() = 0;
+            // Notification of a request to trim the voice pool
+
+        virtual void __cdecl GatherStatistics( AudioStatistics& stats ) const = 0;
+            // Contribute to statistics request
+    };
+
+    //----------------------------------------------------------------------------------
+    enum AUDIO_ENGINE_FLAGS
+    {
+        AudioEngine_Default             = 0x0,
+
+        AudioEngine_EnvironmentalReverb = 0x1,
+        AudioEngine_ReverbUseFilters    = 0x2,
+        AudioEngine_UseMasteringLimiter = 0x4,
+
+        AudioEngine_Debug               = 0x10000,
+        AudioEngine_ThrowOnNoAudioHW    = 0x20000,
+        AudioEngine_DisableVoiceReuse   = 0x40000,
+    };
+
+    inline AUDIO_ENGINE_FLAGS operator|(AUDIO_ENGINE_FLAGS a, AUDIO_ENGINE_FLAGS b) { return static_cast<AUDIO_ENGINE_FLAGS>( static_cast<int>(a) | static_cast<int>(b) ); }
+
+    enum SOUND_EFFECT_INSTANCE_FLAGS
+    {
+        SoundEffectInstance_Default             = 0x0,
+
+        SoundEffectInstance_Use3D               = 0x1,
+        SoundEffectInstance_ReverbUseFilters    = 0x2,
+        SoundEffectInstance_NoSetPitch          = 0x4,
+
+        SoundEffectInstance_UseRedirectLFE      = 0x10000,
+    };
+
+    inline SOUND_EFFECT_INSTANCE_FLAGS operator|(SOUND_EFFECT_INSTANCE_FLAGS a, SOUND_EFFECT_INSTANCE_FLAGS b) { return static_cast<SOUND_EFFECT_INSTANCE_FLAGS>( static_cast<int>(a) | static_cast<int>(b) ); }
+
+    enum AUDIO_ENGINE_REVERB
+    {
+        Reverb_Off,
+        Reverb_Default,
+        Reverb_Generic,
+        Reverb_Forest,
+        Reverb_PaddedCell,
+        Reverb_Room,
+        Reverb_Bathroom,
+        Reverb_LivingRoom,
+        Reverb_StoneRoom,
+        Reverb_Auditorium,
+        Reverb_ConcertHall,
+        Reverb_Cave,
+        Reverb_Arena,
+        Reverb_Hangar,
+        Reverb_CarpetedHallway,
+        Reverb_Hallway,
+        Reverb_StoneCorridor,
+        Reverb_Alley,
+        Reverb_City,
+        Reverb_Mountains,
+        Reverb_Quarry,
+        Reverb_Plain,
+        Reverb_ParkingLot,
+        Reverb_SewerPipe,
+        Reverb_Underwater,
+        Reverb_SmallRoom,
+        Reverb_MediumRoom,
+        Reverb_LargeRoom,
+        Reverb_MediumHall,
+        Reverb_LargeHall,
+        Reverb_Plate,
+        Reverb_MAX
+    };
+
+    enum SoundState
+    {
+        STOPPED = 0,
+        PLAYING,
+        PAUSED
+    };
+
+
+    //----------------------------------------------------------------------------------
+    class AudioEngine
+    {
+    public:
+        explicit AudioEngine( AUDIO_ENGINE_FLAGS flags = AudioEngine_Default, _In_opt_ const WAVEFORMATEX* wfx = nullptr, _In_opt_z_ const wchar_t* deviceId = nullptr, 
+                              AUDIO_STREAM_CATEGORY category = AudioCategory_GameEffects );
+
+        AudioEngine(AudioEngine&& moveFrom);
+        AudioEngine& operator= (AudioEngine&& moveFrom);
+        virtual ~AudioEngine();
+
+        bool __cdecl Update();
+            // Performs per-frame processing for the audio engine, returns false if in 'silent mode'
+
+        bool __cdecl Reset( _In_opt_ const WAVEFORMATEX* wfx = nullptr, _In_opt_z_ const wchar_t* deviceId = nullptr );
+            // Reset audio engine from critical error/silent mode using a new device; can also 'migrate' the graph
+            // Returns true if succesfully reset, false if in 'silent mode' due to no default device
+            // Note: One shots are lost, all SoundEffectInstances are in the STOPPED state after successful reset
+
+        void __cdecl Suspend();
+        void __cdecl Resume();
+            // Suspend/resumes audio processing (i.e. global pause/resume)
+
+        float __cdecl GetMasterVolume() const;
+        void __cdecl SetMasterVolume( float volume );
+            // Master volume property for all sounds
+
+        void __cdecl SetReverb( AUDIO_ENGINE_REVERB reverb );
+        void __cdecl SetReverb( _In_opt_ const XAUDIO2FX_REVERB_PARAMETERS* native );
+            // Sets environmental reverb for 3D positional audio (if active)
+
+        void __cdecl SetMasteringLimit( int release, int loudness );
+            // Sets the mastering volume limiter properties (if active)
+
+        AudioStatistics __cdecl GetStatistics() const;
+            // Gathers audio engine statistics
+
+        WAVEFORMATEXTENSIBLE __cdecl GetOutputFormat() const;
+            // Returns the format consumed by the mastering voice (which is the same as the device output if defaults are used)
+
+        uint32_t __cdecl GetChannelMask() const;
+            // Returns the output channel mask
+
+        int __cdecl GetOutputChannels() const;
+            // Returns the number of output channels
+
+        bool __cdecl IsAudioDevicePresent() const;
+            // Returns true if the audio graph is operating normally, false if in 'silent mode'
+
+        bool __cdecl IsCriticalError() const;
+            // Returns true if the audio graph is halted due to a critical error (which also places the engine into 'silent mode')
+
+        // Voice pool management.
+        void __cdecl SetDefaultSampleRate( int sampleRate );
+            // Sample rate for voices in the reuse pool (defaults to 44100)
+
+        void __cdecl SetMaxVoicePool( size_t maxOneShots, size_t maxInstances );
+            // Maximum number of voices to allocate for one-shots and instances
+            // Note: one-shots over this limit are ignored; too many instance voices throws an exception
+
+        void __cdecl TrimVoicePool();
+            // Releases any currently unused voices
+
+        // Internal-use functions
+        void __cdecl AllocateVoice( _In_ const WAVEFORMATEX* wfx, SOUND_EFFECT_INSTANCE_FLAGS flags, bool oneshot, _Outptr_result_maybenull_ IXAudio2SourceVoice** voice );
+
+        void __cdecl DestroyVoice( _In_ IXAudio2SourceVoice* voice );
+            // Should only be called for instance voices, not one-shots
+
+        void __cdecl RegisterNotify( _In_ IVoiceNotify* notify, bool usesUpdate );
+        void __cdecl UnregisterNotify( _In_ IVoiceNotify* notify, bool usesOneShots, bool usesUpdate );
+
+        // XAudio2 interface access
+        IXAudio2* __cdecl GetInterface() const;
+        IXAudio2MasteringVoice* __cdecl GetMasterVoice() const;
+        IXAudio2SubmixVoice* __cdecl GetReverbVoice() const;
+        X3DAUDIO_HANDLE& __cdecl Get3DHandle() const;
+
+        // Static functions
+        struct RendererDetail
+        {
+            std::wstring deviceId;
+            std::wstring description;
+        };
+
+        static std::vector<RendererDetail> __cdecl GetRendererDetails();
+            // Returns a list of valid audio endpoint devices
+
+    private:
+        // Private implementation.
+        class Impl;
+        std::unique_ptr<Impl> pImpl;
+
+        // Prevent copying.
+        AudioEngine(AudioEngine const&) DIRECTX_CTOR_DELETE
+        AudioEngine& operator= (AudioEngine const&) DIRECTX_CTOR_DELETE
+    };
+
+
+    //----------------------------------------------------------------------------------
+    class WaveBank
+    {
+    public:
+        WaveBank( _In_ AudioEngine* engine, _In_z_ const wchar_t* wbFileName );
+
+        WaveBank(WaveBank&& moveFrom);
+        WaveBank& operator= (WaveBank&& moveFrom);
+        virtual ~WaveBank();
+
+        void __cdecl Play( int index );
+        void __cdecl Play( int index, float volume, float pitch, float pan );
+
+        void __cdecl Play( _In_z_ const char* name );
+        void __cdecl Play( _In_z_ const char* name, float volume, float pitch, float pan );
+
+        std::unique_ptr<SoundEffectInstance> __cdecl CreateInstance( int index, SOUND_EFFECT_INSTANCE_FLAGS flags = SoundEffectInstance_Default );
+        std::unique_ptr<SoundEffectInstance> __cdecl CreateInstance( _In_z_ const char* name, SOUND_EFFECT_INSTANCE_FLAGS flags = SoundEffectInstance_Default );
+
+        bool __cdecl IsPrepared() const;
+        bool __cdecl IsInUse() const;
+        bool __cdecl IsStreamingBank() const;
+
+        size_t __cdecl GetSampleSizeInBytes( int index ) const;
+            // Returns size of wave audio data
+
+        size_t __cdecl GetSampleDuration( int index ) const;
+            // Returns the duration in samples
+
+        size_t __cdecl GetSampleDurationMS( int index ) const;
+            // Returns the duration in milliseconds
+
+        const WAVEFORMATEX* __cdecl GetFormat( int index, _Out_writes_bytes_(maxsize) WAVEFORMATEX* wfx, size_t maxsize ) const;
+
+        int __cdecl Find( _In_z_ const char* name ) const;
+
+#if defined(_XBOX_ONE) || (_WIN32_WINNT < _WIN32_WINNT_WIN8) || (_WIN32_WINNT >= 0x0A00 /*_WIN32_WINNT_WIN10*/ )
+        bool __cdecl FillSubmitBuffer( int index, _Out_ XAUDIO2_BUFFER& buffer, _Out_ XAUDIO2_BUFFER_WMA& wmaBuffer ) const;
+#else
+        void __cdecl FillSubmitBuffer( int index, _Out_ XAUDIO2_BUFFER& buffer ) const;
+#endif
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        // Prevent copying.
+        WaveBank(WaveBank const&) DIRECTX_CTOR_DELETE
+        WaveBank& operator= (WaveBank const&) DIRECTX_CTOR_DELETE
+
+        // Private interface
+        void __cdecl UnregisterInstance( _In_ SoundEffectInstance* instance );
+
+        friend class SoundEffectInstance;
+    };
+
+
+    //----------------------------------------------------------------------------------
+    class SoundEffect
+    {
+    public:
+        SoundEffect( _In_ AudioEngine* engine, _In_z_ const wchar_t* waveFileName );
+
+        SoundEffect( _In_ AudioEngine* engine, _Inout_ std::unique_ptr<uint8_t[]>& wavData,
+                     _In_ const WAVEFORMATEX* wfx, _In_reads_bytes_(audioBytes) const uint8_t* startAudio, size_t audioBytes );
+
+        SoundEffect( _In_ AudioEngine* engine, _Inout_ std::unique_ptr<uint8_t[]>& wavData,
+                     _In_ const WAVEFORMATEX* wfx, _In_reads_bytes_(audioBytes) const uint8_t* startAudio, size_t audioBytes,
+                     uint32_t loopStart, uint32_t loopLength );
+
+#if defined(_XBOX_ONE) || (_WIN32_WINNT < _WIN32_WINNT_WIN8) || (_WIN32_WINNT >= 0x0A00 /*_WIN32_WINNT_WIN10*/)
+
+        SoundEffect( _In_ AudioEngine* engine, _Inout_ std::unique_ptr<uint8_t[]>& wavData,
+                     _In_ const WAVEFORMATEX* wfx, _In_reads_bytes_(audioBytes) const uint8_t* startAudio, size_t audioBytes,
+                     _In_reads_(seekCount) const uint32_t* seekTable, size_t seekCount );
+
+#endif
+
+        SoundEffect(SoundEffect&& moveFrom);
+        SoundEffect& operator= (SoundEffect&& moveFrom);
+        virtual ~SoundEffect();
+
+        void __cdecl Play();
+        void __cdecl Play(float volume, float pitch, float pan);
+
+        std::unique_ptr<SoundEffectInstance> __cdecl CreateInstance( SOUND_EFFECT_INSTANCE_FLAGS flags = SoundEffectInstance_Default );
+
+        bool __cdecl IsInUse() const;
+
+        size_t __cdecl GetSampleSizeInBytes() const;
+            // Returns size of wave audio data
+
+        size_t __cdecl GetSampleDuration() const;
+            // Returns the duration in samples
+
+        size_t __cdecl GetSampleDurationMS() const;
+            // Returns the duration in milliseconds
+
+        const WAVEFORMATEX* __cdecl GetFormat() const;
+
+#if defined(_XBOX_ONE) || (_WIN32_WINNT < _WIN32_WINNT_WIN8) || (_WIN32_WINNT >= 0x0A00 /*_WIN32_WINNT_WIN10*/)
+        bool __cdecl FillSubmitBuffer( _Out_ XAUDIO2_BUFFER& buffer, _Out_ XAUDIO2_BUFFER_WMA& wmaBuffer ) const;
+#else
+        void __cdecl FillSubmitBuffer( _Out_ XAUDIO2_BUFFER& buffer ) const;
+#endif
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        // Prevent copying.
+        SoundEffect(SoundEffect const&) DIRECTX_CTOR_DELETE
+        SoundEffect& operator= (SoundEffect const&) DIRECTX_CTOR_DELETE
+
+        // Private interface
+        void __cdecl UnregisterInstance( _In_ SoundEffectInstance* instance );
+
+        friend class SoundEffectInstance;
+    };
+
+
+    //----------------------------------------------------------------------------------
+    struct AudioListener : public X3DAUDIO_LISTENER
+    {
+        AudioListener()
+        {
+            memset( this, 0, sizeof(X3DAUDIO_LISTENER) );
+
+            OrientFront.z = -1.f;
+
+            OrientTop.y = 1.f;
+        }
+
+        void XM_CALLCONV SetPosition( FXMVECTOR v )
+        {
+            XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &Position ), v );
+        }
+        void __cdecl SetPosition( const XMFLOAT3& pos )
+        {
+            Position.x = pos.x;
+            Position.y = pos.y;
+            Position.z = pos.z;
+        }
+
+        void XM_CALLCONV SetVelocity( FXMVECTOR v )
+        {
+            XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &Velocity ), v );
+        }
+        void __cdecl SetVelocity( const XMFLOAT3& vel )
+        {
+            Velocity.x = vel.x;
+            Velocity.y = vel.y;
+            Velocity.z = vel.z;
+        }
+
+        void XM_CALLCONV SetOrientation( FXMVECTOR forward, FXMVECTOR up )
+        {
+            XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &OrientFront ), forward );
+            XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &OrientTop ), up );
+        }
+        void __cdecl SetOrientation( const XMFLOAT3& forward, const XMFLOAT3& up )
+        {
+            OrientFront.x = forward.x;  OrientTop.x = up.x;
+            OrientFront.y = forward.y;  OrientTop.y = up.y;
+            OrientFront.z = forward.z;  OrientTop.z = up.z;
+        }
+
+        void XM_CALLCONV SetOrientationFromQuaternion( FXMVECTOR quat )
+        {
+            XMVECTOR forward = XMVector3Rotate( g_XMIdentityR2, quat );
+            XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &OrientFront ), forward );
+
+            XMVECTOR up  = XMVector3Rotate( g_XMIdentityR1, quat );
+            XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &OrientTop ), up );
+        }
+
+        void XM_CALLCONV Update( FXMVECTOR newPos, XMVECTOR upDir, float dt )
+            // Updates velocity and orientation by tracking changes in position over time...
+        {
+            if ( dt > 0.f )
+            {
+                XMVECTOR lastPos = XMLoadFloat3( reinterpret_cast<const XMFLOAT3*>( &Position ) );
+
+                XMVECTOR vDelta = ( newPos - lastPos );
+                XMVECTOR v = vDelta / dt;
+                XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &Velocity ), v );
+
+                vDelta = XMVector3Normalize( vDelta );
+                XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &OrientFront ), vDelta );
+
+                v = XMVector3Cross( upDir, vDelta );
+                v = XMVector3Normalize( v );
+
+                v = XMVector3Cross( vDelta, v );
+                v = XMVector3Normalize( v );
+                XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &OrientTop ), v );
+
+                XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &Position ), newPos );
+            }
+        }
+    };
+
+
+    //----------------------------------------------------------------------------------
+    struct AudioEmitter : public X3DAUDIO_EMITTER
+    {
+        float       EmitterAzimuths[XAUDIO2_MAX_AUDIO_CHANNELS];
+
+        AudioEmitter()
+        {
+            memset( this, 0, sizeof(X3DAUDIO_EMITTER) );
+            memset( EmitterAzimuths, 0, sizeof(EmitterAzimuths) );
+
+            OrientFront.z = -1.f;
+
+            OrientTop.y =
+            ChannelRadius = 
+            CurveDistanceScaler =
+            DopplerScaler = 1.f;
+
+            ChannelCount = 1;
+            pChannelAzimuths = EmitterAzimuths;
+
+            InnerRadiusAngle = X3DAUDIO_PI / 4.0f;
+        }
+
+        void XM_CALLCONV SetPosition( FXMVECTOR v )
+        {
+            XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &Position ), v );
+        }
+        void __cdecl SetPosition( const XMFLOAT3& pos )
+        {
+            Position.x = pos.x;
+            Position.y = pos.y;
+            Position.z = pos.z;
+        }
+
+        void XM_CALLCONV SetVelocity( FXMVECTOR v )
+        {
+            XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &Velocity ), v );
+        }
+        void __cdecl SetVelocity( const XMFLOAT3& vel )
+        {
+            Velocity.x = vel.x;
+            Velocity.y = vel.y;
+            Velocity.z = vel.z;
+        }
+
+        void XM_CALLCONV SetOrientation( FXMVECTOR forward, FXMVECTOR up )
+        {
+            XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &OrientFront ), forward );
+            XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &OrientTop ), up );
+        }
+        void __cdecl SetOrientation( const XMFLOAT3& forward, const XMFLOAT3& up )
+        {
+            OrientFront.x = forward.x;  OrientTop.x = up.x;
+            OrientFront.y = forward.y;  OrientTop.y = up.y;
+            OrientFront.z = forward.z;  OrientTop.z = up.z;
+        }
+
+        void XM_CALLCONV SetOrientationFromQuaternion( FXMVECTOR quat )
+        {
+            XMVECTOR forward = XMVector3Rotate( g_XMIdentityR2, quat );
+            XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &OrientFront ), forward );
+
+            XMVECTOR up  = XMVector3Rotate( g_XMIdentityR1, quat );
+            XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &OrientTop ), up );
+        }
+
+        void XM_CALLCONV Update( FXMVECTOR newPos, XMVECTOR upDir, float dt )
+            // Updates velocity and orientation by tracking changes in position over time...
+        {
+            if ( dt > 0.f )
+            {
+                XMVECTOR lastPos = XMLoadFloat3( reinterpret_cast<const XMFLOAT3*>( &Position ) );
+
+                XMVECTOR vDelta = ( newPos - lastPos );
+                XMVECTOR v = vDelta / dt;
+                XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &Velocity ), v );
+
+                vDelta = XMVector3Normalize( vDelta );
+                XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &OrientFront ), vDelta );
+
+                v = XMVector3Cross( upDir, vDelta );
+                v = XMVector3Normalize( v );
+
+                v = XMVector3Cross( vDelta, v );
+                v = XMVector3Normalize( v );
+                XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &OrientTop ), v );
+
+                XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &Position ), newPos );
+            }
+        }
+    };
+
+
+    //----------------------------------------------------------------------------------
+    class SoundEffectInstance
+    {
+    public:
+        SoundEffectInstance(SoundEffectInstance&& moveFrom);
+        SoundEffectInstance& operator= (SoundEffectInstance&& moveFrom);
+        virtual ~SoundEffectInstance();
+
+        void __cdecl Play( bool loop = false );
+        void __cdecl Stop( bool immediate = true );
+        void __cdecl Pause();
+        void __cdecl Resume();
+
+        void __cdecl SetVolume( float volume );
+        void __cdecl SetPitch( float pitch );
+        void __cdecl SetPan( float pan );
+
+        void __cdecl Apply3D( const AudioListener& listener, const AudioEmitter& emitter, bool rhcoords = true );
+
+        bool __cdecl IsLooped() const;
+
+        SoundState __cdecl GetState();
+
+        // Notifications.
+        void __cdecl OnDestroyParent();
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        // Private constructors
+        SoundEffectInstance( _In_ AudioEngine* engine, _In_ SoundEffect* effect, SOUND_EFFECT_INSTANCE_FLAGS flags );
+        SoundEffectInstance( _In_ AudioEngine* engine, _In_ WaveBank* effect, int index, SOUND_EFFECT_INSTANCE_FLAGS flags );
+
+        friend std::unique_ptr<SoundEffectInstance> __cdecl SoundEffect::CreateInstance( SOUND_EFFECT_INSTANCE_FLAGS );
+        friend std::unique_ptr<SoundEffectInstance> __cdecl WaveBank::CreateInstance( int, SOUND_EFFECT_INSTANCE_FLAGS );
+
+        // Prevent copying.
+        SoundEffectInstance(SoundEffectInstance const&) DIRECTX_CTOR_DELETE
+        SoundEffectInstance& operator= (SoundEffectInstance const&) DIRECTX_CTOR_DELETE
+    };
+
+
+    //----------------------------------------------------------------------------------
+    class DynamicSoundEffectInstance
+    {
+    public:
+        DynamicSoundEffectInstance( _In_ AudioEngine* engine,
+                                    _In_opt_ std::function<void DIRECTX_STD_CALLCONV(DynamicSoundEffectInstance*)> bufferNeeded,
+                                    int sampleRate, int channels, int sampleBits = 16,
+                                    SOUND_EFFECT_INSTANCE_FLAGS flags = SoundEffectInstance_Default );
+        DynamicSoundEffectInstance(DynamicSoundEffectInstance&& moveFrom);
+        DynamicSoundEffectInstance& operator= (DynamicSoundEffectInstance&& moveFrom);
+        virtual ~DynamicSoundEffectInstance();
+
+        void __cdecl Play();
+        void __cdecl Stop( bool immediate = true );
+        void __cdecl Pause();
+        void __cdecl Resume();
+
+        void __cdecl SetVolume( float volume );
+        void __cdecl SetPitch( float pitch );
+        void __cdecl SetPan( float pan );
+
+        void __cdecl Apply3D( const AudioListener& listener, const AudioEmitter& emitter, bool rhcoords = true );
+
+        void __cdecl SubmitBuffer( _In_reads_bytes_(audioBytes) const uint8_t* pAudioData, size_t audioBytes );
+        void __cdecl SubmitBuffer( _In_reads_bytes_(audioBytes) const uint8_t* pAudioData, uint32_t offset, size_t audioBytes );
+
+        SoundState __cdecl GetState();
+
+        size_t __cdecl GetSampleDuration( size_t bytes ) const;
+            // Returns duration in samples of a buffer of a given size
+
+        size_t __cdecl GetSampleDurationMS( size_t bytes ) const;
+            // Returns duration in milliseconds of a buffer of a given size
+
+        size_t __cdecl GetSampleSizeInBytes( uint64_t duration ) const;
+            // Returns size of a buffer for a duration given in milliseconds
+
+        int __cdecl GetPendingBufferCount() const;
+
+        const WAVEFORMATEX* __cdecl GetFormat() const;
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        // Prevent copying.
+        DynamicSoundEffectInstance(DynamicSoundEffectInstance const&) DIRECTX_CTOR_DELETE
+        DynamicSoundEffectInstance& operator= (DynamicSoundEffectInstance const&) DIRECTX_CTOR_DELETE
+    };
+}
+
+#pragma warning(pop)
\ No newline at end of file
diff --git a/Windows/DirectXTK/CommonStates.h b/Windows/DirectXTK/CommonStates.h
new file mode 100644
index 0000000..cfeb436
--- /dev/null
+++ b/Windows/DirectXTK/CommonStates.h
@@ -0,0 +1,81 @@
+//--------------------------------------------------------------------------------------
+// File: CommonStates.h
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_XBOX_ONE) && defined(_TITLE)
+#include <d3d11_x.h>
+#else
+#include <d3d11_1.h>
+#endif
+
+// VS 2010/2012 do not support =default =delete
+#ifndef DIRECTX_CTOR_DEFAULT
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+#define DIRECTX_CTOR_DEFAULT {}
+#define DIRECTX_CTOR_DELETE ;
+#else
+#define DIRECTX_CTOR_DEFAULT =default;
+#define DIRECTX_CTOR_DELETE =delete;
+#endif
+#endif
+
+#include <memory>
+
+
+namespace DirectX
+{
+    class CommonStates
+    {
+    public:
+        explicit CommonStates(_In_ ID3D11Device* device);
+        CommonStates(CommonStates&& moveFrom);
+        CommonStates& operator= (CommonStates&& moveFrom);
+        virtual ~CommonStates();
+
+        // Blend states.
+        ID3D11BlendState* __cdecl Opaque() const;
+        ID3D11BlendState* __cdecl AlphaBlend() const;
+        ID3D11BlendState* __cdecl Additive() const;
+        ID3D11BlendState* __cdecl NonPremultiplied() const;
+
+        // Depth stencil states.
+        ID3D11DepthStencilState* __cdecl DepthNone() const;
+        ID3D11DepthStencilState* __cdecl DepthDefault() const;
+        ID3D11DepthStencilState* __cdecl DepthRead() const;
+
+        // Rasterizer states.
+        ID3D11RasterizerState* __cdecl CullNone() const;
+        ID3D11RasterizerState* __cdecl CullClockwise() const;
+        ID3D11RasterizerState* __cdecl CullCounterClockwise() const;
+        ID3D11RasterizerState* __cdecl Wireframe() const;
+
+        // Sampler states.
+        ID3D11SamplerState* __cdecl PointWrap() const;
+        ID3D11SamplerState* __cdecl PointClamp() const;
+        ID3D11SamplerState* __cdecl LinearWrap() const;
+        ID3D11SamplerState* __cdecl LinearClamp() const;
+        ID3D11SamplerState* __cdecl AnisotropicWrap() const;
+        ID3D11SamplerState* __cdecl AnisotropicClamp() const;
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::shared_ptr<Impl> pImpl;
+
+        // Prevent copying.
+        CommonStates(CommonStates const&) DIRECTX_CTOR_DELETE
+        CommonStates& operator= (CommonStates const&) DIRECTX_CTOR_DELETE
+    };
+}
diff --git a/Windows/DirectXTK/DDSTextureLoader.h b/Windows/DirectXTK/DDSTextureLoader.h
new file mode 100644
index 0000000..e2a40d2
--- /dev/null
+++ b/Windows/DirectXTK/DDSTextureLoader.h
@@ -0,0 +1,160 @@
+//--------------------------------------------------------------------------------------
+// File: DDSTextureLoader.h
+//
+// Functions for loading a DDS texture and creating a Direct3D 11 runtime resource for it
+//
+// Note these functions are useful as a light-weight runtime loader for DDS files. For
+// a full-featured DDS file reader, writer, and texture processing pipeline see
+// the 'Texconv' sample and the 'DirectXTex' library.
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248926
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_XBOX_ONE) && defined(_TITLE)
+#include <d3d11_x.h>
+#else
+#include <d3d11_1.h>
+#endif
+
+#pragma warning(push)
+#pragma warning(disable : 4005)
+#include <stdint.h>
+#pragma warning(pop)
+
+namespace DirectX
+{
+    enum DDS_ALPHA_MODE
+    {
+        DDS_ALPHA_MODE_UNKNOWN       = 0,
+        DDS_ALPHA_MODE_STRAIGHT      = 1,
+        DDS_ALPHA_MODE_PREMULTIPLIED = 2,
+        DDS_ALPHA_MODE_OPAQUE        = 3,
+        DDS_ALPHA_MODE_CUSTOM        = 4,
+    };
+
+    // Standard version
+    HRESULT __cdecl CreateDDSTextureFromMemory( _In_ ID3D11Device* d3dDevice,
+                                                _In_reads_bytes_(ddsDataSize) const uint8_t* ddsData,
+                                                _In_ size_t ddsDataSize,
+                                                _Outptr_opt_ ID3D11Resource** texture,
+                                                _Outptr_opt_ ID3D11ShaderResourceView** textureView,
+                                                _In_ size_t maxsize = 0,
+                                                _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr
+                                              );
+
+    HRESULT __cdecl CreateDDSTextureFromFile( _In_ ID3D11Device* d3dDevice,
+                                              _In_z_ const wchar_t* szFileName,
+                                              _Outptr_opt_ ID3D11Resource** texture,
+                                              _Outptr_opt_ ID3D11ShaderResourceView** textureView,
+                                              _In_ size_t maxsize = 0,
+                                              _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr
+                                            );
+
+    // Standard version with optional auto-gen mipmap support
+    #if defined(_XBOX_ONE) && defined(_TITLE)
+    HRESULT __cdecl CreateDDSTextureFromMemory( _In_ ID3D11DeviceX* d3dDevice,
+                                                _In_opt_ ID3D11DeviceContextX* d3dContext,
+    #else
+    HRESULT __cdecl CreateDDSTextureFromMemory( _In_ ID3D11Device* d3dDevice,
+                                                _In_opt_ ID3D11DeviceContext* d3dContext,
+    #endif
+                                                _In_reads_bytes_(ddsDataSize) const uint8_t* ddsData,
+                                                _In_ size_t ddsDataSize,
+                                                _Outptr_opt_ ID3D11Resource** texture,
+                                                _Outptr_opt_ ID3D11ShaderResourceView** textureView,
+                                                _In_ size_t maxsize = 0,
+                                                _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr
+                                              );
+
+    #if defined(_XBOX_ONE) && defined(_TITLE)
+    HRESULT __cdecl CreateDDSTextureFromFile( _In_ ID3D11DeviceX* d3dDevice,
+                                              _In_opt_ ID3D11DeviceContextX* d3dContext,
+    #else
+    HRESULT __cdecl CreateDDSTextureFromFile( _In_ ID3D11Device* d3dDevice,
+                                              _In_opt_ ID3D11DeviceContext* d3dContext,
+    #endif
+                                              _In_z_ const wchar_t* szFileName,
+                                              _Outptr_opt_ ID3D11Resource** texture,
+                                              _Outptr_opt_ ID3D11ShaderResourceView** textureView,
+                                              _In_ size_t maxsize = 0,
+                                              _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr
+                                            );
+
+    // Extended version
+    HRESULT __cdecl CreateDDSTextureFromMemoryEx( _In_ ID3D11Device* d3dDevice,
+                                                  _In_reads_bytes_(ddsDataSize) const uint8_t* ddsData,
+                                                  _In_ size_t ddsDataSize,
+                                                  _In_ size_t maxsize,
+                                                  _In_ D3D11_USAGE usage,
+                                                  _In_ unsigned int bindFlags,
+                                                  _In_ unsigned int cpuAccessFlags,
+                                                  _In_ unsigned int miscFlags,
+                                                  _In_ bool forceSRGB,
+                                                  _Outptr_opt_ ID3D11Resource** texture,
+                                                  _Outptr_opt_ ID3D11ShaderResourceView** textureView,
+                                                  _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr
+                                                );
+
+    HRESULT __cdecl CreateDDSTextureFromFileEx( _In_ ID3D11Device* d3dDevice,
+                                                _In_z_ const wchar_t* szFileName,
+                                                _In_ size_t maxsize,
+                                                _In_ D3D11_USAGE usage,
+                                                _In_ unsigned int bindFlags,
+                                                _In_ unsigned int cpuAccessFlags,
+                                                _In_ unsigned int miscFlags,
+                                                _In_ bool forceSRGB,
+                                                _Outptr_opt_ ID3D11Resource** texture,
+                                                _Outptr_opt_ ID3D11ShaderResourceView** textureView,
+                                                _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr
+                                              );
+
+    // Extended version with optional auto-gen mipmap support
+    #if defined(_XBOX_ONE) && defined(_TITLE)
+    HRESULT __cdecl CreateDDSTextureFromMemoryEx( _In_ ID3D11DeviceX* d3dDevice,
+                                                  _In_opt_ ID3D11DeviceContextX* d3dContext,
+    #else
+    HRESULT __cdecl CreateDDSTextureFromMemoryEx( _In_ ID3D11Device* d3dDevice,
+                                                  _In_opt_ ID3D11DeviceContext* d3dContext,
+    #endif
+                                                  _In_reads_bytes_(ddsDataSize) const uint8_t* ddsData,
+                                                  _In_ size_t ddsDataSize,
+                                                  _In_ size_t maxsize,
+                                                  _In_ D3D11_USAGE usage,
+                                                  _In_ unsigned int bindFlags,
+                                                  _In_ unsigned int cpuAccessFlags,
+                                                  _In_ unsigned int miscFlags,
+                                                  _In_ bool forceSRGB,
+                                                  _Outptr_opt_ ID3D11Resource** texture,
+                                                  _Outptr_opt_ ID3D11ShaderResourceView** textureView,
+                                                  _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr
+                                                );
+
+    #if defined(_XBOX_ONE) && defined(_TITLE)
+    HRESULT __cdecl CreateDDSTextureFromFileEx( _In_ ID3D11DeviceX* d3dDevice,
+                                                _In_opt_ ID3D11DeviceContextX* d3dContext,
+    #else
+    HRESULT __cdecl CreateDDSTextureFromFileEx( _In_ ID3D11Device* d3dDevice,
+                                                _In_opt_ ID3D11DeviceContext* d3dContext,
+    #endif
+                                                _In_z_ const wchar_t* szFileName,
+                                                _In_ size_t maxsize,
+                                                _In_ D3D11_USAGE usage,
+                                                _In_ unsigned int bindFlags,
+                                                _In_ unsigned int cpuAccessFlags,
+                                                _In_ unsigned int miscFlags,
+                                                _In_ bool forceSRGB,
+                                                _Outptr_opt_ ID3D11Resource** texture,
+                                                _Outptr_opt_ ID3D11ShaderResourceView** textureView,
+                                                _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr
+                                              );
+}
\ No newline at end of file
diff --git a/Windows/DirectXTK/DirectXHelpers.h b/Windows/DirectXTK/DirectXHelpers.h
new file mode 100644
index 0000000..40ce645
--- /dev/null
+++ b/Windows/DirectXTK/DirectXHelpers.h
@@ -0,0 +1,150 @@
+//--------------------------------------------------------------------------------------
+// File: DirectXHelpers.h
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_XBOX_ONE) && defined(_TITLE)
+#include <d3d11_x.h>
+#else
+#include <d3d11_1.h>
+#endif
+
+#if !defined(NO_D3D11_DEBUG_NAME) && ( defined(_DEBUG) || defined(PROFILE) )
+#if !defined(_XBOX_ONE) || !defined(_TITLE)
+#pragma comment(lib,"dxguid.lib")
+#endif
+#endif
+
+#include <exception>
+
+#pragma warning(push)
+#pragma warning(disable : 4005)
+#include <stdint.h>
+#pragma warning(pop)
+
+//
+// The core Direct3D headers provide the following helper C++ classes
+//  CD3D11_RECT
+//  CD3D11_BOX
+//  CD3D11_DEPTH_STENCIL_DESC
+//  CD3D11_BLEND_DESC, CD3D11_BLEND_DESC1
+//  CD3D11_RASTERIZER_DESC, CD3D11_RASTERIZER_DESC1
+//  CD3D11_BUFFER_DESC
+//  CD3D11_TEXTURE1D_DESC
+//  CD3D11_TEXTURE2D_DESC
+//  CD3D11_TEXTURE3D_DESC
+//  CD3D11_SHADER_RESOURCE_VIEW_DESC
+//  CD3D11_RENDER_TARGET_VIEW_DESC
+//  CD3D11_VIEWPORT
+//  CD3D11_DEPTH_STENCIL_VIEW_DESC
+//  CD3D11_UNORDERED_ACCESS_VIEW_DESC
+//  CD3D11_SAMPLER_DESC
+//  CD3D11_QUERY_DESC
+//  CD3D11_COUNTER_DESC
+//
+
+
+namespace DirectX
+{
+    // simliar to std::lock_guard for exception-safe Direct3D 11 resource locking
+    class MapGuard : public D3D11_MAPPED_SUBRESOURCE
+    {
+    public:
+        MapGuard( _In_ ID3D11DeviceContext* context,
+                  _In_ ID3D11Resource *resource,
+                  _In_ UINT subresource,
+                  _In_ D3D11_MAP mapType,
+                  _In_ UINT mapFlags )
+            : mContext(context), mResource(resource), mSubresource(subresource)
+        {
+            HRESULT hr = mContext->Map( resource, subresource, mapType, mapFlags, this );
+            if (FAILED(hr))
+            {
+                throw std::exception();
+            }
+        }
+
+        ~MapGuard()
+        {
+            mContext->Unmap( mResource, mSubresource );
+        }
+
+        uint8_t* get() const
+        {
+            return reinterpret_cast<uint8_t*>( pData );
+        }
+        uint8_t* get(size_t slice) const
+        {
+            return reinterpret_cast<uint8_t*>( pData ) + ( slice * DepthPitch );
+        }
+
+        uint8_t* scanline(size_t row) const
+        {
+            return reinterpret_cast<uint8_t*>( pData ) + ( row * RowPitch );
+        }
+        uint8_t* scanline(size_t slice, size_t row) const
+        {
+            return reinterpret_cast<uint8_t*>( pData ) + ( slice * DepthPitch ) + ( row * RowPitch );
+        }
+
+    private:
+        ID3D11DeviceContext*    mContext;
+        ID3D11Resource*         mResource;
+        UINT                    mSubresource;
+
+        MapGuard(MapGuard const&);
+        MapGuard& operator= (MapGuard const&);
+    };
+
+
+    // Helper sets a D3D resource name string (used by PIX and debug layer leak reporting).
+    template<UINT TNameLength>
+    inline void SetDebugObjectName(_In_ ID3D11DeviceChild* resource, _In_z_ const char (&name)[TNameLength])
+    {
+        #if !defined(NO_D3D11_DEBUG_NAME) && ( defined(_DEBUG) || defined(PROFILE) )
+            #if defined(_XBOX_ONE) && defined(_TITLE)
+                WCHAR wname[MAX_PATH];
+                int result = MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, name, TNameLength, wname, MAX_PATH );
+                if ( result > 0 )
+                {
+                    resource->SetName( wname );
+                }
+            #else
+                resource->SetPrivateData(WKPDID_D3DDebugObjectName, TNameLength - 1, name);
+            #endif
+        #else
+            UNREFERENCED_PARAMETER(resource);
+            UNREFERENCED_PARAMETER(name);
+        #endif
+    }
+
+    template<UINT TNameLength>
+    inline void SetDebugObjectName(_In_ ID3D11DeviceChild* resource, _In_z_ const wchar_t (&name)[TNameLength])
+    {
+        #if !defined(NO_D3D11_DEBUG_NAME) && ( defined(_DEBUG) || defined(PROFILE) )
+            #if defined(_XBOX_ONE) && defined(_TITLE)
+                resource->SetName( name );
+            #else
+                char aname[MAX_PATH];
+                int result = WideCharToMultiByte( CP_ACP, 0, name, TNameLength, aname, MAX_PATH, nullptr, nullptr );
+                if ( result > 0 )
+                {
+                    resource->SetPrivateData(WKPDID_D3DDebugObjectName, TNameLength - 1, aname);
+                }
+            #endif
+        #else
+            UNREFERENCED_PARAMETER(resource);
+            UNREFERENCED_PARAMETER(name);
+        #endif
+    }
+}
\ No newline at end of file
diff --git a/Windows/DirectXTK/Effects.h b/Windows/DirectXTK/Effects.h
new file mode 100644
index 0000000..5c594f2
--- /dev/null
+++ b/Windows/DirectXTK/Effects.h
@@ -0,0 +1,612 @@
+//--------------------------------------------------------------------------------------
+// File: Effects.h
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_XBOX_ONE) && defined(_TITLE)
+#include <d3d11_x.h>
+#else
+#include <d3d11_1.h>
+#endif
+
+// VS 2010/2012 do not support =default =delete
+#ifndef DIRECTX_CTOR_DEFAULT
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+#define DIRECTX_CTOR_DEFAULT {}
+#define DIRECTX_CTOR_DELETE ;
+#else
+#define DIRECTX_CTOR_DEFAULT =default;
+#define DIRECTX_CTOR_DELETE =delete;
+#endif
+#endif
+
+#include <DirectXMath.h>
+#include <memory>
+
+#pragma warning(push)
+#pragma warning(disable : 4481)
+// VS 2010 considers 'override' to be a extension, but it's part of C++11 as of VS 2012
+
+namespace DirectX
+{
+    #if (DIRECTX_MATH_VERSION < 305) && !defined(XM_CALLCONV)
+    #define XM_CALLCONV __fastcall
+    typedef const XMVECTOR& HXMVECTOR;
+    typedef const XMMATRIX& FXMMATRIX;
+    #endif
+
+    //----------------------------------------------------------------------------------
+    // Abstract interface representing any effect which can be applied onto a D3D device context.
+    class IEffect
+    {
+    public:
+        virtual ~IEffect() { }
+
+        virtual void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) = 0;
+
+        virtual void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) = 0;
+    };
+
+
+    // Abstract interface for effects with world, view, and projection matrices.
+    class IEffectMatrices
+    {
+    public:
+        virtual ~IEffectMatrices() { }
+
+        virtual void XM_CALLCONV SetWorld(FXMMATRIX value) = 0;
+        virtual void XM_CALLCONV SetView(FXMMATRIX value) = 0;
+        virtual void XM_CALLCONV SetProjection(FXMMATRIX value) = 0;
+    };
+
+
+    // Abstract interface for effects which support directional lighting.
+    class IEffectLights
+    {
+    public:
+        virtual ~IEffectLights() { }
+
+        virtual void __cdecl SetLightingEnabled(bool value) = 0;
+        virtual void __cdecl SetPerPixelLighting(bool value) = 0;
+        virtual void XM_CALLCONV SetAmbientLightColor(FXMVECTOR value) = 0;
+
+        virtual void __cdecl SetLightEnabled(int whichLight, bool value) = 0;
+        virtual void XM_CALLCONV SetLightDirection(int whichLight, FXMVECTOR value) = 0;
+        virtual void XM_CALLCONV SetLightDiffuseColor(int whichLight, FXMVECTOR value) = 0;
+        virtual void XM_CALLCONV SetLightSpecularColor(int whichLight, FXMVECTOR value) = 0;
+
+        virtual void __cdecl EnableDefaultLighting() = 0;
+
+        static const int MaxDirectionalLights = 3;
+    };
+
+
+    // Abstract interface for effects which support fog.
+    class IEffectFog
+    {
+    public:
+        virtual ~IEffectFog() { }
+
+        virtual void __cdecl SetFogEnabled(bool value) = 0;
+        virtual void __cdecl SetFogStart(float value) = 0;
+        virtual void __cdecl SetFogEnd(float value) = 0;
+        virtual void XM_CALLCONV SetFogColor(FXMVECTOR value) = 0;
+    };
+
+
+    // Abstract interface for effects which support skinning
+    class IEffectSkinning
+    {
+    public:
+        virtual ~IEffectSkinning() { } 
+
+        virtual void __cdecl SetWeightsPerVertex(int value) = 0;
+        virtual void __cdecl SetBoneTransforms(_In_reads_(count) XMMATRIX const* value, size_t count) = 0;
+        virtual void __cdecl ResetBoneTransforms() = 0;
+
+        static const int MaxBones = 72;
+    };
+
+
+    //----------------------------------------------------------------------------------
+    // Built-in shader supports optional texture mapping, vertex coloring, directional lighting, and fog.
+    class BasicEffect : public IEffect, public IEffectMatrices, public IEffectLights, public IEffectFog
+    {
+    public:
+        explicit BasicEffect(_In_ ID3D11Device* device);
+        BasicEffect(BasicEffect&& moveFrom);
+        BasicEffect& operator= (BasicEffect&& moveFrom);
+        virtual ~BasicEffect();
+
+        // IEffect methods.
+        void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) override;
+
+        void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) override;
+
+        // Camera settings.
+        void XM_CALLCONV SetWorld(FXMMATRIX value) override;
+        void XM_CALLCONV SetView(FXMMATRIX value) override;
+        void XM_CALLCONV SetProjection(FXMMATRIX value) override;
+
+        // Material settings.
+        void XM_CALLCONV SetDiffuseColor(FXMVECTOR value);
+        void XM_CALLCONV SetEmissiveColor(FXMVECTOR value);
+        void XM_CALLCONV SetSpecularColor(FXMVECTOR value);
+        void __cdecl SetSpecularPower(float value);
+        void __cdecl DisableSpecular();
+        void __cdecl SetAlpha(float value);
+        
+        // Light settings.
+        void __cdecl SetLightingEnabled(bool value) override;
+        void __cdecl SetPerPixelLighting(bool value) override;
+        void XM_CALLCONV SetAmbientLightColor(FXMVECTOR value) override;
+
+        void __cdecl SetLightEnabled(int whichLight, bool value) override;
+        void XM_CALLCONV SetLightDirection(int whichLight, FXMVECTOR value) override;
+        void XM_CALLCONV SetLightDiffuseColor(int whichLight, FXMVECTOR value) override;
+        void XM_CALLCONV SetLightSpecularColor(int whichLight, FXMVECTOR value) override;
+
+        void __cdecl EnableDefaultLighting() override;
+
+        // Fog settings.
+        void __cdecl SetFogEnabled(bool value) override;
+        void __cdecl SetFogStart(float value) override;
+        void __cdecl SetFogEnd(float value) override;
+        void XM_CALLCONV SetFogColor(FXMVECTOR value) override;
+
+        // Vertex color setting.
+        void __cdecl SetVertexColorEnabled(bool value);
+
+        // Texture setting.
+        void __cdecl SetTextureEnabled(bool value);
+        void __cdecl SetTexture(_In_opt_ ID3D11ShaderResourceView* value);
+        
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        // Prevent copying.
+        BasicEffect(BasicEffect const&) DIRECTX_CTOR_DELETE
+        BasicEffect& operator= (BasicEffect const&) DIRECTX_CTOR_DELETE
+    };
+
+
+
+    // Built-in shader supports per-pixel alpha testing.
+    class AlphaTestEffect : public IEffect, public IEffectMatrices, public IEffectFog
+    {
+    public:
+        explicit AlphaTestEffect(_In_ ID3D11Device* device);
+        AlphaTestEffect(AlphaTestEffect&& moveFrom);
+        AlphaTestEffect& operator= (AlphaTestEffect&& moveFrom);
+        virtual ~AlphaTestEffect();
+
+        // IEffect methods.
+        void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) override;
+
+        void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) override;
+
+        // Camera settings.
+        void XM_CALLCONV SetWorld(FXMMATRIX value) override;
+        void XM_CALLCONV SetView(FXMMATRIX value) override;
+        void XM_CALLCONV SetProjection(FXMMATRIX value) override;
+
+        // Material settings.
+        void XM_CALLCONV SetDiffuseColor(FXMVECTOR value);
+        void __cdecl SetAlpha(float value);
+        
+        // Fog settings.
+        void __cdecl SetFogEnabled(bool value) override;
+        void __cdecl SetFogStart(float value) override;
+        void __cdecl SetFogEnd(float value) override;
+        void XM_CALLCONV SetFogColor(FXMVECTOR value) override;
+
+        // Vertex color setting.
+        void __cdecl SetVertexColorEnabled(bool value);
+
+        // Texture setting.
+        void __cdecl SetTexture(_In_opt_ ID3D11ShaderResourceView* value);
+        
+        // Alpha test settings.
+        void __cdecl SetAlphaFunction(D3D11_COMPARISON_FUNC value);
+        void __cdecl SetReferenceAlpha(int value);
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        // Prevent copying.
+        AlphaTestEffect(AlphaTestEffect const&) DIRECTX_CTOR_DELETE
+        AlphaTestEffect& operator= (AlphaTestEffect const&) DIRECTX_CTOR_DELETE
+    };
+
+
+
+    // Built-in shader supports two layer multitexturing (eg. for lightmaps or detail textures).
+    class DualTextureEffect : public IEffect, public IEffectMatrices, public IEffectFog
+    {
+    public:
+        explicit DualTextureEffect(_In_ ID3D11Device* device);
+        DualTextureEffect(DualTextureEffect&& moveFrom);
+        DualTextureEffect& operator= (DualTextureEffect&& moveFrom);
+        ~DualTextureEffect();
+
+        // IEffect methods.
+        void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) override;
+
+        void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) override;
+
+        // Camera settings.
+        void XM_CALLCONV SetWorld(FXMMATRIX value) override;
+        void XM_CALLCONV SetView(FXMMATRIX value) override;
+        void XM_CALLCONV SetProjection(FXMMATRIX value) override;
+
+        // Material settings.
+        void XM_CALLCONV SetDiffuseColor(FXMVECTOR value);
+        void __cdecl SetAlpha(float value);
+        
+        // Fog settings.
+        void __cdecl SetFogEnabled(bool value) override;
+        void __cdecl SetFogStart(float value) override;
+        void __cdecl SetFogEnd(float value) override;
+        void XM_CALLCONV SetFogColor(FXMVECTOR value) override;
+
+        // Vertex color setting.
+        void __cdecl SetVertexColorEnabled(bool value);
+
+        // Texture settings.
+        void __cdecl SetTexture(_In_opt_ ID3D11ShaderResourceView* value);
+        void __cdecl SetTexture2(_In_opt_ ID3D11ShaderResourceView* value);
+        
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        // Prevent copying.
+        DualTextureEffect(DualTextureEffect const&) DIRECTX_CTOR_DELETE
+        DualTextureEffect& operator= (DualTextureEffect const&) DIRECTX_CTOR_DELETE
+    };
+
+
+
+    // Built-in shader supports cubic environment mapping.
+    class EnvironmentMapEffect : public IEffect, public IEffectMatrices, public IEffectLights, public IEffectFog
+    {
+    public:
+        explicit EnvironmentMapEffect(_In_ ID3D11Device* device);
+        EnvironmentMapEffect(EnvironmentMapEffect&& moveFrom);
+        EnvironmentMapEffect& operator= (EnvironmentMapEffect&& moveFrom);
+        virtual ~EnvironmentMapEffect();
+
+        // IEffect methods.
+        void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) override;
+
+        void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) override;
+
+        // Camera settings.
+        void XM_CALLCONV SetWorld(FXMMATRIX value) override;
+        void XM_CALLCONV SetView(FXMMATRIX value) override;
+        void XM_CALLCONV SetProjection(FXMMATRIX value) override;
+
+        // Material settings.
+        void XM_CALLCONV SetDiffuseColor(FXMVECTOR value);
+        void XM_CALLCONV SetEmissiveColor(FXMVECTOR value);
+        void __cdecl SetAlpha(float value);
+        
+        // Light settings.
+        void XM_CALLCONV SetAmbientLightColor(FXMVECTOR value) override;
+
+        void __cdecl SetLightEnabled(int whichLight, bool value) override;
+        void XM_CALLCONV SetLightDirection(int whichLight, FXMVECTOR value) override;
+        void XM_CALLCONV SetLightDiffuseColor(int whichLight, FXMVECTOR value) override;
+
+        void __cdecl EnableDefaultLighting() override;
+
+        // Fog settings.
+        void __cdecl SetFogEnabled(bool value) override;
+        void __cdecl SetFogStart(float value) override;
+        void __cdecl SetFogEnd(float value) override;
+        void XM_CALLCONV SetFogColor(FXMVECTOR value) override;
+
+        // Texture setting.
+        void __cdecl SetTexture(_In_opt_ ID3D11ShaderResourceView* value);
+
+        // Environment map settings.
+        void __cdecl SetEnvironmentMap(_In_opt_ ID3D11ShaderResourceView* value);
+        void __cdecl SetEnvironmentMapAmount(float value);
+        void XM_CALLCONV SetEnvironmentMapSpecular(FXMVECTOR value);
+        void __cdecl SetFresnelFactor(float value);
+        
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        // Unsupported interface methods.
+        void __cdecl SetLightingEnabled(bool value) override;
+        void __cdecl SetPerPixelLighting(bool value) override;
+        void XM_CALLCONV SetLightSpecularColor(int whichLight, FXMVECTOR value) override;
+
+        // Prevent copying.
+        EnvironmentMapEffect(EnvironmentMapEffect const&) DIRECTX_CTOR_DELETE
+        EnvironmentMapEffect& operator= (EnvironmentMapEffect const&) DIRECTX_CTOR_DELETE
+    };
+
+
+
+    // Built-in shader supports skinned animation.
+    class SkinnedEffect : public IEffect, public IEffectMatrices, public IEffectLights, public IEffectFog, public IEffectSkinning
+    {
+    public:
+        explicit SkinnedEffect(_In_ ID3D11Device* device);
+        SkinnedEffect(SkinnedEffect&& moveFrom);
+        SkinnedEffect& operator= (SkinnedEffect&& moveFrom);
+        virtual ~SkinnedEffect();
+
+        // IEffect methods.
+        void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) override;
+
+        void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) override;
+
+        // Camera settings.
+        void XM_CALLCONV SetWorld(FXMMATRIX value) override;
+        void XM_CALLCONV SetView(FXMMATRIX value) override;
+        void XM_CALLCONV SetProjection(FXMMATRIX value) override;
+
+        // Material settings.
+        void XM_CALLCONV SetDiffuseColor(FXMVECTOR value);
+        void XM_CALLCONV SetEmissiveColor(FXMVECTOR value);
+        void XM_CALLCONV SetSpecularColor(FXMVECTOR value);
+        void __cdecl SetSpecularPower(float value);
+        void __cdecl DisableSpecular();
+        void __cdecl SetAlpha(float value);
+        
+        // Light settings.
+        void __cdecl SetPerPixelLighting(bool value) override;
+        void XM_CALLCONV SetAmbientLightColor(FXMVECTOR value) override;
+
+        void __cdecl SetLightEnabled(int whichLight, bool value) override;
+        void XM_CALLCONV SetLightDirection(int whichLight, FXMVECTOR value) override;
+        void XM_CALLCONV SetLightDiffuseColor(int whichLight, FXMVECTOR value) override;
+        void XM_CALLCONV SetLightSpecularColor(int whichLight, FXMVECTOR value) override;
+
+        void __cdecl EnableDefaultLighting() override;
+
+        // Fog settings.
+        void __cdecl SetFogEnabled(bool value) override;
+        void __cdecl SetFogStart(float value) override;
+        void __cdecl SetFogEnd(float value) override;
+        void XM_CALLCONV SetFogColor(FXMVECTOR value) override;
+
+        // Texture setting.
+        void __cdecl SetTexture(_In_opt_ ID3D11ShaderResourceView* value);
+        
+        // Animation settings.
+        void __cdecl SetWeightsPerVertex(int value) override;
+        void __cdecl SetBoneTransforms(_In_reads_(count) XMMATRIX const* value, size_t count) override;
+        void __cdecl ResetBoneTransforms() override;
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        // Unsupported interface method.
+        void __cdecl SetLightingEnabled(bool value) override;
+
+        // Prevent copying.
+        SkinnedEffect(SkinnedEffect const&) DIRECTX_CTOR_DELETE
+        SkinnedEffect& operator= (SkinnedEffect const&) DIRECTX_CTOR_DELETE
+    };
+
+    
+
+    //----------------------------------------------------------------------------------
+    // Built-in effect for Visual Studio Shader Designer (DGSL) shaders
+    class DGSLEffect : public IEffect, public IEffectMatrices, public IEffectLights, public IEffectSkinning
+    {
+    public:
+        explicit DGSLEffect( _In_ ID3D11Device* device, _In_opt_ ID3D11PixelShader* pixelShader = nullptr,
+                             _In_ bool enableSkinning = false );
+        DGSLEffect(DGSLEffect&& moveFrom);
+        DGSLEffect& operator= (DGSLEffect&& moveFrom);
+        virtual ~DGSLEffect();
+
+        // IEffect methods.
+        void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) override;
+
+        void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) override;
+
+        // Camera settings.
+        void XM_CALLCONV SetWorld(FXMMATRIX value) override;
+        void XM_CALLCONV SetView(FXMMATRIX value) override;
+        void XM_CALLCONV SetProjection(FXMMATRIX value) override;
+
+        // Material settings.
+        void XM_CALLCONV SetAmbientColor(FXMVECTOR value);
+        void XM_CALLCONV SetDiffuseColor(FXMVECTOR value);
+        void XM_CALLCONV SetEmissiveColor(FXMVECTOR value);
+        void XM_CALLCONV SetSpecularColor(FXMVECTOR value);
+        void __cdecl SetSpecularPower(float value);
+        void __cdecl DisableSpecular();
+        void __cdecl SetAlpha(float value);
+
+        // Additional settings.
+        void XM_CALLCONV SetUVTransform(FXMMATRIX value);
+        void __cdecl SetViewport( float width, float height );
+        void __cdecl SetTime( float time );
+        void __cdecl SetAlphaDiscardEnable(bool value);
+
+        // Light settings.
+        void __cdecl SetLightingEnabled(bool value) override;
+        void XM_CALLCONV SetAmbientLightColor(FXMVECTOR value) override;
+
+        void __cdecl SetLightEnabled(int whichLight, bool value) override;
+        void XM_CALLCONV SetLightDirection(int whichLight, FXMVECTOR value) override;
+        void XM_CALLCONV SetLightDiffuseColor(int whichLight, FXMVECTOR value) override;
+        void XM_CALLCONV SetLightSpecularColor(int whichLight, FXMVECTOR value) override;
+
+        void __cdecl EnableDefaultLighting() override;
+
+        static const int MaxDirectionalLights = 4;
+
+        // Vertex color setting.
+        void __cdecl SetVertexColorEnabled(bool value);
+
+        // Texture settings.
+        void __cdecl SetTextureEnabled(bool value);
+        void __cdecl SetTexture(_In_opt_ ID3D11ShaderResourceView* value);
+        void __cdecl SetTexture2(_In_opt_ ID3D11ShaderResourceView* value);
+        void __cdecl SetTexture(int whichTexture, _In_opt_ ID3D11ShaderResourceView* value);
+
+        static const int MaxTextures = 8;
+
+        // Animation setting.
+        void __cdecl SetWeightsPerVertex(int value) override;
+        void __cdecl SetBoneTransforms(_In_reads_(count) XMMATRIX const* value, size_t count) override;
+        void __cdecl ResetBoneTransforms() override;
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        // Unsupported interface methods.
+        void __cdecl SetPerPixelLighting(bool value) override;
+
+        // Prevent copying.
+        DGSLEffect(DGSLEffect const&) DIRECTX_CTOR_DELETE
+        DGSLEffect& operator= (DGSLEffect const&) DIRECTX_CTOR_DELETE
+    };
+
+
+
+    //----------------------------------------------------------------------------------
+    // Abstract interface to factory for sharing effects and texture resources
+    class IEffectFactory
+    {
+    public:
+        virtual ~IEffectFactory() {}
+
+        struct EffectInfo
+        {
+            const WCHAR*        name;
+            bool                perVertexColor;
+            bool                enableSkinning;
+            bool                enableDualTexture;
+            float               specularPower;
+            float               alpha;
+            DirectX::XMFLOAT3   ambientColor;
+            DirectX::XMFLOAT3   diffuseColor;
+            DirectX::XMFLOAT3   specularColor;
+            DirectX::XMFLOAT3   emissiveColor;
+            const WCHAR*        texture;
+            const WCHAR*        texture2;
+
+            EffectInfo() { memset( this, 0, sizeof(EffectInfo) ); };
+        };
+
+        virtual std::shared_ptr<IEffect> __cdecl CreateEffect( _In_ const EffectInfo& info, _In_opt_ ID3D11DeviceContext* deviceContext ) = 0;
+
+        virtual void __cdecl CreateTexture( _In_z_ const WCHAR* name, _In_opt_ ID3D11DeviceContext* deviceContext, _Outptr_ ID3D11ShaderResourceView** textureView ) = 0;
+    };
+
+
+    // Factory for sharing effects and texture resources
+    class EffectFactory : public IEffectFactory
+    {
+    public:
+        explicit EffectFactory(_In_ ID3D11Device* device);
+        EffectFactory(EffectFactory&& moveFrom);
+        EffectFactory& operator= (EffectFactory&& moveFrom);
+        virtual ~EffectFactory();
+
+        // IEffectFactory methods.
+        virtual std::shared_ptr<IEffect> __cdecl CreateEffect( _In_ const EffectInfo& info, _In_opt_ ID3D11DeviceContext* deviceContext ) override;
+        virtual void __cdecl CreateTexture( _In_z_ const WCHAR* name, _In_opt_ ID3D11DeviceContext* deviceContext, _Outptr_ ID3D11ShaderResourceView** textureView ) override;
+
+        // Settings.
+        void __cdecl ReleaseCache();
+
+        void __cdecl SetSharing( bool enabled );
+
+        void __cdecl SetDirectory( _In_opt_z_ const WCHAR* path );
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::shared_ptr<Impl> pImpl;
+
+        // Prevent copying.
+        EffectFactory(EffectFactory const&) DIRECTX_CTOR_DELETE
+        EffectFactory& operator= (EffectFactory const&) DIRECTX_CTOR_DELETE
+    };
+
+
+    // Factory for sharing Visual Studio Shader Designer (DGSL) shaders and texture resources
+    class DGSLEffectFactory : public IEffectFactory
+    {
+    public:
+        explicit DGSLEffectFactory(_In_ ID3D11Device* device);
+        DGSLEffectFactory(DGSLEffectFactory&& moveFrom);
+        DGSLEffectFactory& operator= (DGSLEffectFactory&& moveFrom);
+        virtual ~DGSLEffectFactory();
+
+        // IEffectFactory methods.
+        virtual std::shared_ptr<IEffect> __cdecl CreateEffect( _In_ const EffectInfo& info, _In_opt_ ID3D11DeviceContext* deviceContext ) override;
+        virtual void __cdecl CreateTexture( _In_z_ const WCHAR* name, _In_opt_ ID3D11DeviceContext* deviceContext, _Outptr_ ID3D11ShaderResourceView** textureView ) override;
+
+        // DGSL methods.
+        struct DGSLEffectInfo : public EffectInfo
+        {
+            const WCHAR*        textures[6];
+            const WCHAR*        pixelShader;
+
+            DGSLEffectInfo() { memset( this, 0, sizeof(DGSLEffectInfo) ); };
+        };
+
+        virtual std::shared_ptr<IEffect> __cdecl CreateDGSLEffect( _In_ const DGSLEffectInfo& info, _In_opt_ ID3D11DeviceContext* deviceContext );
+
+        virtual void __cdecl CreatePixelShader( _In_z_ const WCHAR* shader, _Outptr_ ID3D11PixelShader** pixelShader );
+
+        // Settings.
+        void __cdecl ReleaseCache();
+
+        void __cdecl SetSharing( bool enabled );
+
+        void __cdecl SetDirectory( _In_opt_z_ const WCHAR* path );
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::shared_ptr<Impl> pImpl;
+
+        // Prevent copying.
+        DGSLEffectFactory(DGSLEffectFactory const&) DIRECTX_CTOR_DELETE
+        DGSLEffectFactory& operator= (DGSLEffectFactory const&) DIRECTX_CTOR_DELETE
+    };
+
+}
+
+#pragma warning(pop)
diff --git a/Windows/DirectXTK/GamePad.h b/Windows/DirectXTK/GamePad.h
new file mode 100644
index 0000000..dd1ccd3
--- /dev/null
+++ b/Windows/DirectXTK/GamePad.h
@@ -0,0 +1,244 @@
+//--------------------------------------------------------------------------------------
+// File: GamePad.h
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#if (_WIN32_WINNT < 0x0A00 /*_WIN32_WINNT_WIN10*/)
+#ifndef _XBOX_ONE
+#if !defined(WINAPI_FAMILY) || (WINAPI_FAMILY != WINAPI_FAMILY_PHONE_APP)
+#if (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/ )
+#pragma comment(lib,"xinput.lib")
+#else
+#pragma comment(lib,"xinput9_1_0.lib")
+#endif
+#endif
+#endif
+#endif
+
+// VS 2010/2012 do not support =default =delete
+#ifndef DIRECTX_CTOR_DEFAULT
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+#define DIRECTX_CTOR_DEFAULT {}
+#define DIRECTX_CTOR_DELETE ;
+#else
+#define DIRECTX_CTOR_DEFAULT =default;
+#define DIRECTX_CTOR_DELETE =delete;
+#endif
+#endif
+
+#include <memory>
+
+#pragma warning(push)
+#pragma warning(disable : 4005)
+#include <stdint.h>
+#include <intsafe.h>
+#pragma warning(pop)
+
+
+namespace DirectX
+{
+    class GamePad
+    {
+    public:
+        GamePad();
+        GamePad(GamePad&& moveFrom);
+        GamePad& operator= (GamePad&& moveFrom);
+        virtual ~GamePad();
+
+#if (_WIN32_WINNT >= 0x0A00 /*_WIN32_WINNT_WIN10*/ ) || defined(_XBOX_ONE)
+        static const int MAX_PLAYER_COUNT = 8;
+#else
+        static const int MAX_PLAYER_COUNT = 4;
+#endif
+
+        enum DeadZone
+        {
+            DEAD_ZONE_INDEPENDENT_AXES = 0,
+            DEAD_ZONE_CIRCULAR,
+            DEAD_ZONE_NONE,
+        };
+
+        struct Buttons
+        {
+            bool a;
+            bool b;
+            bool x;
+            bool y;
+            bool leftStick;
+            bool rightStick;
+            bool leftShoulder;
+            bool rightShoulder;
+            bool back;
+            bool start;
+        };
+
+        struct DPad
+        {
+            bool up;
+            bool down;
+            bool right;
+            bool left;
+        };
+
+        struct ThumbSticks
+        {
+            float leftX;
+            float leftY;
+            float rightX;
+            float rightY;
+        };
+
+        struct Triggers
+        {
+            float left;
+            float right;
+        };
+        
+        struct State
+        {
+            bool        connected;
+            uint64_t    packet;
+            Buttons     buttons;
+            DPad        dpad;
+            ThumbSticks thumbSticks;
+            Triggers    triggers;
+
+            bool __cdecl IsConnected() const { return connected; }
+
+            // Is the button pressed currently?
+            bool __cdecl IsAPressed() const { return buttons.a; }
+            bool __cdecl IsBPressed() const { return buttons.b; }
+            bool __cdecl IsXPressed() const { return buttons.x; }
+            bool __cdecl IsYPressed() const { return buttons.y; }
+
+            bool __cdecl IsLeftStickPressed() const { return buttons.leftStick; }
+            bool __cdecl IsRightStickPressed() const { return buttons.rightStick; }
+
+            bool __cdecl IsLeftShoulderPressed() const { return buttons.leftShoulder; }
+            bool __cdecl IsRightShoulderPressed() const { return buttons.rightShoulder; }
+
+            bool __cdecl IsBackPressed() const { return buttons.back; }
+            bool __cdecl IsViewPressed() const { return buttons.back; }
+            bool __cdecl IsStartPressed() const { return buttons.start; }
+            bool __cdecl IsMenuPressed() const { return buttons.start; }
+
+            bool __cdecl IsDPadDownPressed() const { return dpad.down; };
+            bool __cdecl IsDPadUpPressed() const { return dpad.up; };
+            bool __cdecl IsDPadLeftPressed() const { return dpad.left; };
+            bool __cdecl IsDPadRightPressed() const { return dpad.right; };
+
+            bool __cdecl IsLeftThumbStickUp() const { return (thumbSticks.leftY > 0.5f) != 0; }
+            bool __cdecl IsLeftThumbStickDown() const { return (thumbSticks.leftY < -0.5f) != 0; }
+            bool __cdecl IsLeftThumbStickLeft() const { return (thumbSticks.leftX < -0.5f) != 0; }
+            bool __cdecl IsLeftThumbStickRight() const { return (thumbSticks.leftX > 0.5f) != 0; }
+
+            bool __cdecl IsRightThumbStickUp() const { return (thumbSticks.rightY > 0.5f ) != 0; }
+            bool __cdecl IsRightThumbStickDown() const { return (thumbSticks.rightY < -0.5f) != 0; }
+            bool __cdecl IsRightThumbStickLeft() const { return (thumbSticks.rightX < -0.5f) != 0; }
+            bool __cdecl IsRightThumbStickRight() const { return (thumbSticks.rightX > 0.5f) != 0; }
+
+            bool __cdecl IsLeftTriggerPressed() const { return (triggers.left > 0.5f) != 0; }
+            bool __cdecl IsRightTriggerPressed() const { return (triggers.right > 0.5f) != 0; }
+        };
+
+        struct Capabilities
+        {
+            enum Type
+            {
+                UNKNOWN = 0,
+                GAMEPAD,
+                WHEEL,
+                ARCADE_STICK,
+                FLIGHT_STICK,
+                DANCE_PAD,
+                GUITAR,
+                GUITAR_ALTERNATE,
+                DRUM_KIT,
+                GUITAR_BASS = 11,
+                ARCADE_PAD = 19,
+            };
+
+            bool        connected;
+            Type        gamepadType;
+            uint64_t    id;
+
+            bool __cdecl IsConnected() const { return connected; }
+        };
+
+        class ButtonStateTracker
+        {
+        public:
+            enum ButtonState
+            {
+                UP = 0,         // Button is up
+                HELD = 1,       // Button is held down
+                RELEASED = 2,   // Button was just released
+                PRESSED = 3,    // Buton was just pressed
+            };
+
+            ButtonState a;
+            ButtonState b;
+            ButtonState x;
+            ButtonState y;
+
+            ButtonState leftStick;
+            ButtonState rightStick;
+
+            ButtonState leftShoulder;
+            ButtonState rightShoulder;
+
+            ButtonState back;
+            ButtonState start;
+
+            ButtonState dpadUp;
+            ButtonState dpadDown;
+            ButtonState dpadLeft;
+            ButtonState dpadRight;
+
+            ButtonStateTracker() { Reset(); }
+
+            void __cdecl Update( const State& state );
+
+            void __cdecl Reset();
+
+        private:
+            State lastState;
+        };
+
+        // Retrieve the current state of the gamepad of the associated player index
+        State __cdecl GetState(int player, DeadZone deadZoneMode = DEAD_ZONE_INDEPENDENT_AXES);
+
+        // Retrieve the current capabilities of the gamepad of the associated player index
+        Capabilities __cdecl GetCapabilities(int player);
+
+        // Set the vibration motor speeds of the gamepad
+        bool __cdecl SetVibration( int player, float leftMotor, float rightMotor, float leftTrigger = 0.f, float rightTrigger = 0.f );
+
+        // Handle suspending/resuming
+        void __cdecl Suspend();
+        void __cdecl Resume();
+
+        // Singleton
+        static GamePad& __cdecl Get();
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        // Prevent copying.
+        GamePad(GamePad const&) DIRECTX_CTOR_DELETE
+        GamePad& operator=(GamePad const&) DIRECTX_CTOR_DELETE
+    };
+}
diff --git a/Windows/DirectXTK/GeometricPrimitive.h b/Windows/DirectXTK/GeometricPrimitive.h
new file mode 100644
index 0000000..6e0584e
--- /dev/null
+++ b/Windows/DirectXTK/GeometricPrimitive.h
@@ -0,0 +1,110 @@
+//--------------------------------------------------------------------------------------
+// File: GeometricPrimitive.h
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "VertexTypes.h"
+
+#include <DirectXColors.h>
+#include <functional>
+#include <memory>
+#include <vector>
+
+// VS 2010 doesn't support explicit calling convention for std::function
+#ifndef DIRECTX_STD_CALLCONV
+#if defined(_MSC_VER) && (_MSC_VER < 1700)
+#define DIRECTX_STD_CALLCONV
+#else
+#define DIRECTX_STD_CALLCONV __cdecl
+#endif
+#endif
+
+// VS 2010/2012 do not support =default =delete
+#ifndef DIRECTX_CTOR_DEFAULT
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+#define DIRECTX_CTOR_DEFAULT {}
+#define DIRECTX_CTOR_DELETE ;
+#else
+#define DIRECTX_CTOR_DEFAULT =default;
+#define DIRECTX_CTOR_DELETE =delete;
+#endif
+#endif
+
+
+namespace DirectX
+{
+    #if (DIRECTX_MATH_VERSION < 305) && !defined(XM_CALLCONV)
+    #define XM_CALLCONV __fastcall
+    typedef const XMVECTOR& HXMVECTOR;
+    typedef const XMMATRIX& FXMMATRIX;
+    #endif
+
+    class IEffect;
+
+    class GeometricPrimitive
+    {
+    public:
+        virtual ~GeometricPrimitive();
+        
+        // Factory methods.
+        static std::unique_ptr<GeometricPrimitive> __cdecl CreateCube         (_In_ ID3D11DeviceContext* deviceContext, float size = 1, bool rhcoords = true);
+        static std::unique_ptr<GeometricPrimitive> __cdecl CreateBox          (_In_ ID3D11DeviceContext* deviceContext, const XMFLOAT3& size, bool rhcoords = true, bool invertn = false);
+        static std::unique_ptr<GeometricPrimitive> __cdecl CreateSphere       (_In_ ID3D11DeviceContext* deviceContext, float diameter = 1, size_t tessellation = 16, bool rhcoords = true, bool invertn = false);
+        static std::unique_ptr<GeometricPrimitive> __cdecl CreateGeoSphere    (_In_ ID3D11DeviceContext* deviceContext, float diameter = 1, size_t tessellation = 3, bool rhcoords = true);
+        static std::unique_ptr<GeometricPrimitive> __cdecl CreateCylinder     (_In_ ID3D11DeviceContext* deviceContext, float height = 1, float diameter = 1, size_t tessellation = 32, bool rhcoords = true);
+        static std::unique_ptr<GeometricPrimitive> __cdecl CreateCone         (_In_ ID3D11DeviceContext* deviceContext, float diameter = 1, float height = 1, size_t tessellation = 32, bool rhcoords = true);
+        static std::unique_ptr<GeometricPrimitive> __cdecl CreateTorus        (_In_ ID3D11DeviceContext* deviceContext, float diameter = 1, float thickness = 0.333f, size_t tessellation = 32, bool rhcoords = true);
+        static std::unique_ptr<GeometricPrimitive> __cdecl CreateTetrahedron  (_In_ ID3D11DeviceContext* deviceContext, float size = 1, bool rhcoords = true);
+        static std::unique_ptr<GeometricPrimitive> __cdecl CreateOctahedron   (_In_ ID3D11DeviceContext* deviceContext, float size = 1, bool rhcoords = true);
+        static std::unique_ptr<GeometricPrimitive> __cdecl CreateDodecahedron (_In_ ID3D11DeviceContext* deviceContext, float size = 1, bool rhcoords = true);
+        static std::unique_ptr<GeometricPrimitive> __cdecl CreateIcosahedron  (_In_ ID3D11DeviceContext* deviceContext, float size = 1, bool rhcoords = true);
+        static std::unique_ptr<GeometricPrimitive> __cdecl CreateTeapot       (_In_ ID3D11DeviceContext* deviceContext, float size = 1, size_t tessellation = 8, bool rhcoords = true);
+        static std::unique_ptr<GeometricPrimitive> __cdecl CreateCustom       (_In_ ID3D11DeviceContext* deviceContext, const std::vector<VertexPositionNormalTexture>& vertices, const std::vector<uint16_t>& indices);
+
+        static void __cdecl CreateCube          (std::vector<VertexPositionNormalTexture>& vertices, std::vector<uint16_t>& indices, float size = 1, bool rhcoords = true);
+        static void __cdecl CreateBox           (std::vector<VertexPositionNormalTexture>& vertices, std::vector<uint16_t>& indices, const XMFLOAT3& size, bool rhcoords = true, bool invertn = false);
+        static void __cdecl CreateSphere        (std::vector<VertexPositionNormalTexture>& vertices, std::vector<uint16_t>& indices, float diameter = 1, size_t tessellation = 16, bool rhcoords = true, bool invertn = false);
+        static void __cdecl CreateGeoSphere     (std::vector<VertexPositionNormalTexture>& vertices, std::vector<uint16_t>& indices, float diameter = 1, size_t tessellation = 3, bool rhcoords = true);
+        static void __cdecl CreateCylinder      (std::vector<VertexPositionNormalTexture>& vertices, std::vector<uint16_t>& indices, float height = 1, float diameter = 1, size_t tessellation = 32, bool rhcoords = true);
+        static void __cdecl CreateCone          (std::vector<VertexPositionNormalTexture>& vertices, std::vector<uint16_t>& indices, float diameter = 1, float height = 1, size_t tessellation = 32, bool rhcoords = true);
+        static void __cdecl CreateTorus         (std::vector<VertexPositionNormalTexture>& vertices, std::vector<uint16_t>& indices, float diameter = 1, float thickness = 0.333f, size_t tessellation = 32, bool rhcoords = true);
+        static void __cdecl CreateTetrahedron   (std::vector<VertexPositionNormalTexture>& vertices, std::vector<uint16_t>& indices, float size = 1, bool rhcoords = true);
+        static void __cdecl CreateOctahedron    (std::vector<VertexPositionNormalTexture>& vertices, std::vector<uint16_t>& indices, float size = 1, bool rhcoords = true);
+        static void __cdecl CreateDodecahedron  (std::vector<VertexPositionNormalTexture>& vertices, std::vector<uint16_t>& indices, float size = 1, bool rhcoords = true);
+        static void __cdecl CreateIcosahedron   (std::vector<VertexPositionNormalTexture>& vertices, std::vector<uint16_t>& indices, float size = 1, bool rhcoords = true);
+        static void __cdecl CreateTeapot        (std::vector<VertexPositionNormalTexture>& vertices, std::vector<uint16_t>& indices, float size = 1, size_t tessellation = 8, bool rhcoords = true);
+
+        // Draw the primitive.
+        void XM_CALLCONV Draw(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection, FXMVECTOR color = Colors::White, _In_opt_ ID3D11ShaderResourceView* texture = nullptr, bool wireframe = false,
+                              _In_opt_ std::function<void DIRECTX_STD_CALLCONV()> setCustomState = nullptr );
+
+        // Draw the primitive using a custom effect.
+        void __cdecl Draw( _In_ IEffect* effect, _In_ ID3D11InputLayout* inputLayout, bool alpha = false, bool wireframe = false,
+                           _In_opt_ std::function<void DIRECTX_STD_CALLCONV()> setCustomState = nullptr );
+
+        // Create input layout for drawing with a custom effect.
+        void __cdecl CreateInputLayout( _In_ IEffect* effect, _Outptr_ ID3D11InputLayout** inputLayout );
+        
+    private:
+        GeometricPrimitive();
+
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        // Prevent copying.
+        GeometricPrimitive(GeometricPrimitive const&) DIRECTX_CTOR_DELETE
+        GeometricPrimitive& operator= (GeometricPrimitive const&) DIRECTX_CTOR_DELETE
+    };
+}
diff --git a/Windows/DirectXTK/GraphicsMemory.h b/Windows/DirectXTK/GraphicsMemory.h
new file mode 100644
index 0000000..8ba0953
--- /dev/null
+++ b/Windows/DirectXTK/GraphicsMemory.h
@@ -0,0 +1,67 @@
+//--------------------------------------------------------------------------------------
+// File: GraphicsMemory.h
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_XBOX_ONE) && defined(_TITLE)
+#include <d3d11_x.h>
+#else
+#include <d3d11_1.h>
+#endif
+
+// VS 2010/2012 do not support =default =delete
+#ifndef DIRECTX_CTOR_DEFAULT
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+#define DIRECTX_CTOR_DEFAULT {}
+#define DIRECTX_CTOR_DELETE ;
+#else
+#define DIRECTX_CTOR_DEFAULT =default;
+#define DIRECTX_CTOR_DELETE =delete;
+#endif
+#endif
+
+#include <memory>
+
+
+namespace DirectX
+{
+    class GraphicsMemory
+    {
+    public:
+        #if defined(_XBOX_ONE) && defined(_TITLE)
+        GraphicsMemory(_In_ ID3D11DeviceX* device, UINT backBufferCount = 2);
+        #else
+        GraphicsMemory(_In_ ID3D11Device* device, UINT backBufferCount = 2);
+        #endif
+        GraphicsMemory(GraphicsMemory&& moveFrom);
+        GraphicsMemory& operator= (GraphicsMemory&& moveFrom);
+        virtual ~GraphicsMemory();
+
+        void* __cdecl Allocate(_In_opt_ ID3D11DeviceContext* context, size_t size, int alignment);
+
+        void __cdecl Commit();
+
+        // Singleton
+        static GraphicsMemory& __cdecl Get();
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        // Prevent copying.
+        GraphicsMemory(GraphicsMemory const&) DIRECTX_CTOR_DELETE
+        GraphicsMemory& operator=(GraphicsMemory const&) DIRECTX_CTOR_DELETE
+    };
+}
diff --git a/Windows/DirectXTK/Keyboard.h b/Windows/DirectXTK/Keyboard.h
new file mode 100644
index 0000000..3f39f3c
--- /dev/null
+++ b/Windows/DirectXTK/Keyboard.h
@@ -0,0 +1,492 @@
+//--------------------------------------------------------------------------------------
+// File: Keyboard.h
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+// VS 2010/2012 do not support =default =delete
+#ifndef DIRECTX_CTOR_DEFAULT
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+#define DIRECTX_CTOR_DEFAULT {}
+#define DIRECTX_CTOR_DELETE ;
+#else
+#define DIRECTX_CTOR_DEFAULT =default;
+#define DIRECTX_CTOR_DELETE =delete;
+#endif
+#endif
+
+#pragma warning(push)
+#pragma warning(disable : 4005)
+#include <stdint.h>
+#pragma warning(pop)
+
+#include <memory>
+
+#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP)
+namespace ABI { namespace Windows { namespace UI { namespace Core { struct ICoreWindow; } } } }
+#endif
+
+
+namespace DirectX
+{
+    class Keyboard
+    {
+    public:
+        Keyboard();
+        Keyboard(Keyboard&& moveFrom);
+        Keyboard& operator= (Keyboard&& moveFrom);
+        virtual ~Keyboard();
+
+        enum Keys
+        {
+            None                = 0,
+
+            Back                = 0x8,
+            Tab                 = 0x9,
+
+            Enter               = 0xd,
+
+            Pause               = 0x13,
+            CapsLock            = 0x14,
+            Kana                = 0x15,
+
+            Kanji               = 0x19,
+
+            Escape              = 0x1b,
+            ImeConvert          = 0x1c,
+            ImeNoConvert        = 0x1d,
+
+            Space               = 0x20,
+            PageUp              = 0x21,
+            PageDown            = 0x22,
+            End                 = 0x23,
+            Home                = 0x24,
+            Left                = 0x25,
+            Up                  = 0x26,
+            Right               = 0x27,
+            Down                = 0x28,
+            Select              = 0x29,
+            Print               = 0x2a,
+            Execute             = 0x2b,
+            PrintScreen         = 0x2c,
+            Insert              = 0x2d,
+            Delete              = 0x2e,
+            Help                = 0x2f,
+            D0                  = 0x30,
+            D1                  = 0x31,
+            D2                  = 0x32,
+            D3                  = 0x33,
+            D4                  = 0x34,
+            D5                  = 0x35,
+            D6                  = 0x36,
+            D7                  = 0x37,
+            D8                  = 0x38,
+            D9                  = 0x39,
+
+            A                   = 0x41,
+            B                   = 0x42,
+            C                   = 0x43,
+            D                   = 0x44,
+            E                   = 0x45,
+            F                   = 0x46,
+            G                   = 0x47,
+            H                   = 0x48,
+            I                   = 0x49,
+            J                   = 0x4a,
+            K                   = 0x4b,
+            L                   = 0x4c,
+            M                   = 0x4d,
+            N                   = 0x4e,
+            O                   = 0x4f,
+            P                   = 0x50,
+            Q                   = 0x51,
+            R                   = 0x52,
+            S                   = 0x53,
+            T                   = 0x54,
+            U                   = 0x55,
+            V                   = 0x56,
+            W                   = 0x57,
+            X                   = 0x58,
+            Y                   = 0x59,
+            Z                   = 0x5a,
+            LeftWindows         = 0x5b,
+            RightWindows        = 0x5c,
+            Apps                = 0x5d,
+
+            Sleep               = 0x5f,
+            NumPad0             = 0x60,
+            NumPad1             = 0x61,
+            NumPad2             = 0x62,
+            NumPad3             = 0x63,
+            NumPad4             = 0x64,
+            NumPad5             = 0x65,
+            NumPad6             = 0x66,
+            NumPad7             = 0x67,
+            NumPad8             = 0x68,
+            NumPad9             = 0x69,
+            Multiply            = 0x6a,
+            Add                 = 0x6b,
+            Separator           = 0x6c,
+            Subtract            = 0x6d,
+
+            Decimal             = 0x6e,
+            Divide              = 0x6f,
+            F1                  = 0x70,
+            F2                  = 0x71,
+            F3                  = 0x72,
+            F4                  = 0x73,
+            F5                  = 0x74,
+            F6                  = 0x75,
+            F7                  = 0x76,
+            F8                  = 0x77,
+            F9                  = 0x78,
+            F10                 = 0x79,
+            F11                 = 0x7a,
+            F12                 = 0x7b,
+            F13                 = 0x7c,
+            F14                 = 0x7d,
+            F15                 = 0x7e,
+            F16                 = 0x7f,
+            F17                 = 0x80,
+            F18                 = 0x81,
+            F19                 = 0x82,
+            F20                 = 0x83,
+            F21                 = 0x84,
+            F22                 = 0x85,
+            F23                 = 0x86,
+            F24                 = 0x87,
+
+            NumLock             = 0x90,
+            Scroll              = 0x91,
+
+            LeftShift           = 0xa0,
+            RightShift          = 0xa1,
+            LeftControl         = 0xa2,
+            RightControl        = 0xa3,
+            LeftAlt             = 0xa4,
+            RightAlt            = 0xa5,
+            BrowserBack         = 0xa6,
+            BrowserForward      = 0xa7,
+            BrowserRefresh      = 0xa8,
+            BrowserStop         = 0xa9,
+            BrowserSearch       = 0xaa,
+            BrowserFavorites    = 0xab,
+            BrowserHome         = 0xac,
+            VolumeMute          = 0xad,
+            VolumeDown          = 0xae,
+            VolumeUp            = 0xaf,
+            MediaNextTrack      = 0xb0,
+            MediaPreviousTrack  = 0xb1,
+            MediaStop           = 0xb2,
+            MediaPlayPause      = 0xb3,
+            LaunchMail          = 0xb4,
+            SelectMedia         = 0xb5,
+            LaunchApplication1  = 0xb6,
+            LaunchApplication2  = 0xb7,
+
+            OemSemicolon        = 0xba,
+            OemPlus             = 0xbb,
+            OemComma            = 0xbc,
+            OemMinus            = 0xbd,
+            OemPeriod           = 0xbe,
+            OemQuestion         = 0xbf,
+            OemTilde            = 0xc0,
+
+            OemOpenBrackets     = 0xdb,
+            OemPipe             = 0xdc,
+            OemCloseBrackets    = 0xdd,
+            OemQuotes           = 0xde,
+            Oem8                = 0xdf,
+
+            OemBackslash        = 0xe2,
+
+            ProcessKey          = 0xe5,
+
+            OemCopy             = 0xf2,
+            OemAuto             = 0xf3,
+            OemEnlW             = 0xf4,
+
+            Attn                = 0xf6,
+            Crsel               = 0xf7,
+            Exsel               = 0xf8,
+            EraseEof            = 0xf9,
+            Play                = 0xfa,
+            Zoom                = 0xfb,
+
+            Pa1                 = 0xfd,
+            OemClear            = 0xfe,
+        };
+
+        struct State
+        {
+            bool Reserved0 : 8;
+            bool Back : 1;              // VK_BACK, 0x8
+            bool Tab : 1;               // VK_TAB, 0x9
+            bool Reserved1 : 3;
+            bool Enter : 1;             // VK_RETURN, 0xD
+            bool Reserved2 : 2;
+            bool Reserved3 : 3;
+            bool Pause : 1;             // VK_PAUSE, 0x13
+            bool CapsLock : 1;          // VK_CAPITAL, 0x14
+            bool Kana : 1;              // VK_KANA, 0x15
+            bool Reserved4 : 2;
+            bool Reserved5 : 1;
+            bool Kanji : 1;             // VK_KANJI, 0x19
+            bool Reserved6 : 1;
+            bool Escape : 1;            // VK_ESCAPE, 0x1B
+            bool ImeConvert : 1;        // VK_CONVERT, 0x1C
+            bool ImeNoConvert : 1;      // VK_NONCONVERT, 0x1D
+            bool Reserved7 : 2;
+            bool Space : 1;             // VK_SPACE, 0x20
+            bool PageUp : 1;            // VK_PRIOR, 0x21
+            bool PageDown : 1;          // VK_NEXT, 0x22
+            bool End : 1;               // VK_END, 0x23
+            bool Home : 1;              // VK_HOME, 0x24
+            bool Left : 1;              // VK_LEFT, 0x25
+            bool Up : 1;                // VK_UP, 0x26
+            bool Right : 1;             // VK_RIGHT, 0x27
+            bool Down : 1;              // VK_DOWN, 0x28
+            bool Select : 1;            // VK_SELECT, 0x29
+            bool Print : 1;             // VK_PRINT, 0x2A
+            bool Execute : 1;           // VK_EXECUTE, 0x2B
+            bool PrintScreen : 1;       // VK_SNAPSHOT, 0x2C
+            bool Insert : 1;            // VK_INSERT, 0x2D
+            bool Delete : 1;            // VK_DELETE, 0x2E
+            bool Help : 1;              // VK_HELP, 0x2F
+            bool D0 : 1;                // 0x30
+            bool D1 : 1;                // 0x31
+            bool D2 : 1;                // 0x32
+            bool D3 : 1;                // 0x33
+            bool D4 : 1;                // 0x34
+            bool D5 : 1;                // 0x35
+            bool D6 : 1;                // 0x36
+            bool D7 : 1;                // 0x37
+            bool D8 : 1;                // 0x38
+            bool D9 : 1;                // 0x39
+            bool Reserved8 : 6;
+            bool Reserved9 : 1;
+            bool A : 1;                 // 0x41
+            bool B : 1;                 // 0x42
+            bool C : 1;                 // 0x43
+            bool D : 1;                 // 0x44
+            bool E : 1;                 // 0x45
+            bool F : 1;                 // 0x46
+            bool G : 1;                 // 0x47
+            bool H : 1;                 // 0x48
+            bool I : 1;                 // 0x49
+            bool J : 1;                 // 0x4A
+            bool K : 1;                 // 0x4B
+            bool L : 1;                 // 0x4C
+            bool M : 1;                 // 0x4D
+            bool N : 1;                 // 0x4E
+            bool O : 1;                 // 0x4F
+            bool P : 1;                 // 0x50
+            bool Q : 1;                 // 0x51
+            bool R : 1;                 // 0x52
+            bool S : 1;                 // 0x53
+            bool T : 1;                 // 0x54
+            bool U : 1;                 // 0x55
+            bool V : 1;                 // 0x56
+            bool W : 1;                 // 0x57
+            bool X : 1;                 // 0x58
+            bool Y : 1;                 // 0x59
+            bool Z : 1;                 // 0x5A
+            bool LeftWindows : 1;       // VK_LWIN, 0x5B
+            bool RightWindows : 1;      // VK_RWIN, 0x5C
+            bool Apps : 1;              // VK_APPS, 0x5D
+            bool Reserved10 : 1;
+            bool Sleep : 1;             // VK_SLEEP, 0x5F
+            bool NumPad0 : 1;           // VK_NUMPAD0, 0x60
+            bool NumPad1 : 1;           // VK_NUMPAD1, 0x61
+            bool NumPad2 : 1;           // VK_NUMPAD2, 0x62
+            bool NumPad3 : 1;           // VK_NUMPAD3, 0x63
+            bool NumPad4 : 1;           // VK_NUMPAD4, 0x64
+            bool NumPad5 : 1;           // VK_NUMPAD5, 0x65
+            bool NumPad6 : 1;           // VK_NUMPAD6, 0x66
+            bool NumPad7 : 1;           // VK_NUMPAD7, 0x67
+            bool NumPad8 : 1;           // VK_NUMPAD8, 0x68
+            bool NumPad9 : 1;           // VK_NUMPAD9, 0x69
+            bool Multiply : 1;          // VK_MULTIPLY, 0x6A
+            bool Add : 1;               // VK_ADD, 0x6B
+            bool Separator : 1;         // VK_SEPARATOR, 0x6C
+            bool Subtract : 1;          // VK_SUBTRACT, 0x6D
+            bool Decimal : 1;           // VK_DECIMANL, 0x6E
+            bool Divide : 1;            // VK_DIVIDE, 0x6F
+            bool F1 : 1;                // VK_F1, 0x70
+            bool F2 : 1;                // VK_F2, 0x71
+            bool F3 : 1;                // VK_F3, 0x72
+            bool F4 : 1;                // VK_F4, 0x73
+            bool F5 : 1;                // VK_F5, 0x74
+            bool F6 : 1;                // VK_F6, 0x75
+            bool F7 : 1;                // VK_F7, 0x76
+            bool F8 : 1;                // VK_F8, 0x77
+            bool F9 : 1;                // VK_F9, 0x78
+            bool F10 : 1;               // VK_F10, 0x79
+            bool F11 : 1;               // VK_F11, 0x7A
+            bool F12 : 1;               // VK_F12, 0x7B
+            bool F13 : 1;               // VK_F13, 0x7C
+            bool F14 : 1;               // VK_F14, 0x7D
+            bool F15 : 1;               // VK_F15, 0x7E
+            bool F16 : 1;               // VK_F16, 0x7F
+            bool F17 : 1;               // VK_F17, 0x80
+            bool F18 : 1;               // VK_F18, 0x81
+            bool F19 : 1;               // VK_F19, 0x82
+            bool F20 : 1;               // VK_F20, 0x83
+            bool F21 : 1;               // VK_F21, 0x84
+            bool F22 : 1;               // VK_F22, 0x85
+            bool F23 : 1;               // VK_F23, 0x86
+            bool F24 : 1;               // VK_F24, 0x87
+            bool Reserved11 : 8;
+            bool NumLock : 1;           // VK_NUMLOCK, 0x90
+            bool Scroll : 1;            // VK_SCROLL, 0x91
+            bool Reserved12 : 6;
+            bool Reserved13 : 8;
+            bool LeftShift : 1;         // VK_LSHIFT, 0xA0
+            bool RightShift : 1;        // VK_RSHIFT, 0xA1
+            bool LeftControl : 1;       // VK_LCONTROL, 0xA2
+            bool RightControl : 1;      // VK_RCONTROL, 0xA3
+            bool LeftAlt : 1;           // VK_LMENU, 0xA4
+            bool RightAlt : 1;          // VK_RMENU, 0xA5
+            bool BrowserBack : 1;       // VK_BROWSER_BACK, 0xA6
+            bool BrowserForward : 1;    // VK_BROWSER_FORWARD, 0xA7
+            bool BrowserRefresh : 1;    // VK_BROWSER_REFRESH, 0xA8
+            bool BrowserStop : 1;       // VK_BROWSER_STOP, 0xA9
+            bool BrowserSearch : 1;     // VK_BROWSER_SEARCH, 0xAA
+            bool BrowserFavorites : 1;  // VK_BROWSER_FAVORITES, 0xAB
+            bool BrowserHome : 1;       // VK_BROWSER_HOME, 0xAC
+            bool VolumeMute : 1;        // VK_VOLUME_MUTE, 0xAD
+            bool VolumeDown : 1;        // VK_VOLUME_DOWN, 0xAE
+            bool VolumeUp : 1;          // VK_VOLUME_UP, 0xAF
+            bool MediaNextTrack : 1;    // VK_MEDIA_NEXT_TRACK, 0xB0
+            bool MediaPreviousTrack : 1;// VK_MEDIA_PREV_TRACK, 0xB1
+            bool MediaStop : 1;         // VK_MEDIA_STOP, 0xB2
+            bool MediaPlayPause : 1;    // VK_MEDIA_PLAY_PAUSE, 0xB3
+            bool LaunchMail : 1;        // VK_LAUNCH_MAIL, 0xB4
+            bool SelectMedia : 1;       // VK_LAUNCH_MEDIA_SELECT, 0xB5
+            bool LaunchApplication1 : 1;// VK_LAUNCH_APP1, 0xB6
+            bool LaunchApplication2 : 1;// VK_LAUNCH_APP2, 0xB7
+            bool Reserved14 : 2;
+            bool OemSemicolon : 1;      // VK_OEM_1, 0xBA
+            bool OemPlus : 1;           // VK_OEM_PLUS, 0xBB
+            bool OemComma : 1;          // VK_OEM_COMMA, 0xBC
+            bool OemMinus : 1;          // VK_OEM_MINUS, 0xBD
+            bool OemPeriod : 1;         // VK_OEM_PERIOD, 0xBE
+            bool OemQuestion : 1;       // VK_OEM_2, 0xBF
+            bool OemTilde : 1;          // VK_OEM_3, 0xC0
+            bool Reserved15 : 7;
+            bool Reserved16 : 8;
+            bool Reserved17 : 8;
+            bool Reserved18 : 3;
+            bool OemOpenBrackets : 1;   // VK_OEM_4, 0xDB
+            bool OemPipe : 1;           // VK_OEM_5, 0xDC
+            bool OemCloseBrackets : 1;  // VK_OEM_6, 0xDD
+            bool OemQuotes : 1;         // VK_OEM_7, 0xDE
+            bool Oem8 : 1;              // VK_OEM_8, 0xDF
+            bool Reserved19 : 2;
+            bool OemBackslash : 1;      // VK_OEM_102, 0xE2
+            bool Reserved20 : 2;
+            bool ProcessKey : 1;        // VK_PROCESSKEY, 0xE5
+            bool Reserved21 : 2;
+            bool Reserved22 : 8;
+            bool Reserved23 : 2;
+            bool OemCopy : 1;           // 0XF2
+            bool OemAuto : 1;           // 0xF3
+            bool OemEnlW : 1;           // 0xF4
+            bool Reserved24 : 1;
+            bool Attn : 1;              // VK_ATTN, 0xF6
+            bool Crsel : 1;             // VK_CRSEL, 0xF7
+            bool Exsel : 1;             // VK_EXSEL, 0xF8
+            bool EraseEof : 1;          // VK_EREOF, 0xF9
+            bool Play : 1;              // VK_PLAY, 0xFA
+            bool Zoom : 1;              // VK_ZOOM, 0xFB
+            bool Reserved25 : 1;
+            bool Pa1 : 1;               // VK_PA1, 0xFD
+            bool OemClear : 1;          // VK_OEM_CLEAR, 0xFE
+            bool Reserved26: 1;
+
+            bool __cdecl IsKeyDown(Keys key) const
+            {
+                if (key >= 0 && key <= 0xff)
+                {
+                    auto ptr = reinterpret_cast<const uint32_t*>(this);
+                    unsigned int bf = 1u << (key & 0x1f);
+                    return (ptr[(key >> 5)] & bf) != 0;
+                }
+                return false;
+            }
+
+            bool __cdecl IsKeyUp(Keys key) const
+            {
+                if (key >= 0 && key <= 0xfe)
+                {
+                    auto ptr = reinterpret_cast<const uint32_t*>(this);
+                    unsigned int bf = 1u << (key & 0x1f);
+                    return (ptr[(key >> 5)] & bf) == 0;
+                }
+                return false;
+            }
+        };
+
+        class KeyboardStateTracker
+        {
+        public:
+            State released;
+            State pressed;
+
+            KeyboardStateTracker() { Reset(); }
+
+            void __cdecl Update(const State& state);
+
+            void __cdecl Reset();
+
+            bool __cdecl IsKeyPressed(Keys key) const { return pressed.IsKeyDown(key); }
+            bool __cdecl IsKeyReleased(Keys key) const { return released.IsKeyDown(key); }
+
+        public:
+            State lastState;
+        };
+
+        // Retrieve the current state of the keyboard
+        State __cdecl GetState() const;
+
+        // Reset the keyboard state
+        void __cdecl Reset();
+
+#if !defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP) && defined(WM_USER)
+        static void __cdecl ProcessMessage(UINT message, WPARAM wParam, LPARAM lParam);
+#endif
+
+#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP)
+        void __cdecl SetWindow(ABI::Windows::UI::Core::ICoreWindow* window);
+#ifdef __cplusplus_winrt
+        void __cdecl SetWindow(Windows::UI::Core::CoreWindow^ window)
+        {
+            // See https://msdn.microsoft.com/en-us/library/hh755802.aspx
+            SetWindow(reinterpret_cast<ABI::Windows::UI::Core::ICoreWindow*>(window));
+        }
+#endif
+#endif
+
+        // Singleton
+        static Keyboard& __cdecl Get();
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        // Prevent copying.
+        Keyboard(Keyboard const&) DIRECTX_CTOR_DELETE
+        Keyboard& operator=(Keyboard const&) DIRECTX_CTOR_DELETE
+    };
+}
diff --git a/Windows/DirectXTK/Model.h b/Windows/DirectXTK/Model.h
new file mode 100644
index 0000000..75fd90f
--- /dev/null
+++ b/Windows/DirectXTK/Model.h
@@ -0,0 +1,163 @@
+//--------------------------------------------------------------------------------------
+// File: Model.h
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_XBOX_ONE) && defined(_TITLE)
+#include <d3d11_x.h>
+#else
+#include <d3d11_1.h>
+#endif
+
+#include <DirectXMath.h>
+#include <DirectXCollision.h>
+
+#include <memory>
+#include <functional>
+#include <set>
+#include <string>
+#include <vector>
+
+#pragma warning(push)
+#pragma warning(disable : 4005)
+#include <stdint.h>
+#include <intsafe.h>
+#pragma warning(pop)
+
+#include <wrl\client.h>
+
+// VS 2010 doesn't support explicit calling convention for std::function
+#ifndef DIRECTX_STD_CALLCONV
+#if defined(_MSC_VER) && (_MSC_VER < 1700)
+#define DIRECTX_STD_CALLCONV
+#else
+#define DIRECTX_STD_CALLCONV __cdecl
+#endif
+#endif
+
+namespace DirectX
+{
+    #if (DIRECTX_MATH_VERSION < 305) && !defined(XM_CALLCONV)
+    #define XM_CALLCONV __fastcall
+    typedef const XMVECTOR& HXMVECTOR;
+    typedef const XMMATRIX& FXMMATRIX;
+    #endif
+
+    class IEffect;
+    class IEffectFactory;
+    class CommonStates;
+    class ModelMesh;
+
+    //----------------------------------------------------------------------------------
+    // Each mesh part is a submesh with a single effect
+    class ModelMeshPart
+    {
+    public:
+        ModelMeshPart();
+        virtual ~ModelMeshPart();
+
+        uint32_t                                                indexCount;
+        uint32_t                                                startIndex;
+        uint32_t                                                vertexOffset;
+        uint32_t                                                vertexStride;
+        D3D_PRIMITIVE_TOPOLOGY                                  primitiveType;
+        DXGI_FORMAT                                             indexFormat;
+        Microsoft::WRL::ComPtr<ID3D11InputLayout>               inputLayout;
+        Microsoft::WRL::ComPtr<ID3D11Buffer>                    indexBuffer;
+        Microsoft::WRL::ComPtr<ID3D11Buffer>                    vertexBuffer;
+        std::shared_ptr<IEffect>                                effect;
+        std::shared_ptr<std::vector<D3D11_INPUT_ELEMENT_DESC>>  vbDecl;
+        bool                                                    isAlpha;
+
+        typedef std::vector<std::unique_ptr<ModelMeshPart>> Collection;
+
+        // Draw mesh part with custom effect
+        void __cdecl Draw( _In_ ID3D11DeviceContext* deviceContext, _In_ IEffect* ieffect, _In_ ID3D11InputLayout* iinputLayout,
+                           _In_opt_ std::function<void DIRECTX_STD_CALLCONV()> setCustomState = nullptr ) const;
+
+        // Create input layout for drawing with a custom effect.
+        void __cdecl CreateInputLayout( _In_ ID3D11Device* d3dDevice, _In_ IEffect* ieffect, _Outptr_ ID3D11InputLayout** iinputLayout );
+
+        // Change effect used by part and regenerate input layout (be sure to call Model::Modified as well)
+        void __cdecl ModifyEffect( _In_ ID3D11Device* d3dDevice, _In_ std::shared_ptr<IEffect>& ieffect, bool isalpha = false );
+    };
+
+
+    //----------------------------------------------------------------------------------
+    // A mesh consists of one or more model mesh parts
+    class ModelMesh
+    {
+    public:
+        ModelMesh();
+        virtual ~ModelMesh();
+
+        BoundingSphere              boundingSphere;
+        BoundingBox                 boundingBox;
+        ModelMeshPart::Collection   meshParts;
+        std::wstring                name;
+        bool                        ccw;
+        bool                        pmalpha;
+
+        typedef std::vector<std::shared_ptr<ModelMesh>> Collection;
+
+        // Setup states for drawing mesh
+        void __cdecl PrepareForRendering( _In_ ID3D11DeviceContext* deviceContext, CommonStates& states, bool alpha = false, bool wireframe = false ) const;
+
+        // Draw the mesh
+        void XM_CALLCONV Draw( _In_ ID3D11DeviceContext* deviceContext, FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection,
+                               bool alpha = false, _In_opt_ std::function<void DIRECTX_STD_CALLCONV()> setCustomState = nullptr ) const;
+    };
+
+
+    //----------------------------------------------------------------------------------
+    // A model consists of one or more meshes
+    class Model
+    {
+    public:
+        virtual ~Model();
+
+        ModelMesh::Collection   meshes;
+        std::wstring            name;
+
+        // Draw all the meshes in the model
+        void XM_CALLCONV Draw( _In_ ID3D11DeviceContext* deviceContext, CommonStates& states, FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection,
+                               bool wireframe = false, _In_opt_ std::function<void DIRECTX_STD_CALLCONV()> setCustomState = nullptr ) const;
+
+        // Notify model that effects, parts list, or mesh list has changed
+        void __cdecl Modified() { mEffectCache.clear(); }
+
+        // Update all effects used by the model
+        void __cdecl UpdateEffects( _In_ std::function<void DIRECTX_STD_CALLCONV(IEffect*)> setEffect );
+
+        // Loads a model from a Visual Studio Starter Kit .CMO file
+        static std::unique_ptr<Model> __cdecl CreateFromCMO( _In_ ID3D11Device* d3dDevice, _In_reads_bytes_(dataSize) const uint8_t* meshData, size_t dataSize,
+                                                             _In_ IEffectFactory& fxFactory, bool ccw = true, bool pmalpha = false );
+        static std::unique_ptr<Model> __cdecl CreateFromCMO( _In_ ID3D11Device* d3dDevice, _In_z_ const wchar_t* szFileName,
+                                                             _In_ IEffectFactory& fxFactory, bool ccw = true, bool pmalpha = false );
+
+        // Loads a model from a DirectX SDK .SDKMESH file
+        static std::unique_ptr<Model> __cdecl CreateFromSDKMESH( _In_ ID3D11Device* d3dDevice, _In_reads_bytes_(dataSize) const uint8_t* meshData, _In_ size_t dataSize,
+                                                                 _In_ IEffectFactory& fxFactory, bool ccw = false, bool pmalpha = false );
+        static std::unique_ptr<Model> __cdecl CreateFromSDKMESH( _In_ ID3D11Device* d3dDevice, _In_z_ const wchar_t* szFileName,
+                                                                 _In_ IEffectFactory& fxFactory, bool ccw = false, bool pmalpha = false );
+
+        // Loads a model from a .VBO file
+        static std::unique_ptr<Model> __cdecl CreateFromVBO( _In_ ID3D11Device* d3dDevice, _In_reads_bytes_(dataSize) const uint8_t* meshData, _In_ size_t dataSize,
+                                                             _In_opt_ std::shared_ptr<IEffect> ieffect = nullptr, bool ccw = false, bool pmalpha = false );
+        static std::unique_ptr<Model> __cdecl CreateFromVBO( _In_ ID3D11Device* d3dDevice, _In_z_ const wchar_t* szFileName, 
+                                                             _In_opt_ std::shared_ptr<IEffect> ieffect = nullptr, bool ccw = false, bool pmalpha = false );
+
+    private:
+        std::set<IEffect*>  mEffectCache;
+    };
+ }
\ No newline at end of file
diff --git a/Windows/DirectXTK/Mouse.h b/Windows/DirectXTK/Mouse.h
new file mode 100644
index 0000000..56f02f3
--- /dev/null
+++ b/Windows/DirectXTK/Mouse.h
@@ -0,0 +1,129 @@
+//--------------------------------------------------------------------------------------
+// File: Mouse.h
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+// VS 2010/2012 do not support =default =delete
+#ifndef DIRECTX_CTOR_DEFAULT
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+#define DIRECTX_CTOR_DEFAULT {}
+#define DIRECTX_CTOR_DELETE ;
+#else
+#define DIRECTX_CTOR_DEFAULT =default;
+#define DIRECTX_CTOR_DELETE =delete;
+#endif
+#endif
+
+#include <memory>
+
+#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP)
+namespace ABI { namespace Windows { namespace UI { namespace Core { struct ICoreWindow; } } } }
+#endif
+
+
+namespace DirectX
+{
+    class Mouse
+    {
+    public:
+        Mouse();
+        Mouse(Mouse&& moveFrom);
+        Mouse& operator= (Mouse&& moveFrom);
+        virtual ~Mouse();
+
+        enum Mode
+        {
+            MODE_ABSOLUTE = 0,
+            MODE_RELATIVE,
+        };
+
+        struct State
+        {
+            bool    leftButton;
+            bool    middleButton;
+            bool    rightButton;
+            bool    xButton1;
+            bool    xButton2;
+            int     x;
+            int     y;
+            int     scrollWheelValue;
+            Mode    positionMode;
+        };
+
+        class ButtonStateTracker
+        {
+        public:
+            enum ButtonState
+            {
+                UP = 0,         // Button is up
+                HELD = 1,       // Button is held down
+                RELEASED = 2,   // Button was just released
+                PRESSED = 3,    // Buton was just pressed
+            };
+
+            ButtonState leftButton;
+            ButtonState middleButton;
+            ButtonState rightButton;
+            ButtonState xButton1;
+            ButtonState xButton2;
+
+            ButtonStateTracker() { Reset(); }
+
+            void __cdecl Update( const State& state );
+
+            void __cdecl Reset();
+
+        private:
+            State lastState;
+        };
+
+        // Retrieve the current state of the mouse
+        State __cdecl GetState() const;
+
+        // Resets the accumulated scroll wheel value
+        void __cdecl ResetScrollWheelValue();
+
+        // Sets mouse mode (defaults to absolute)
+        void __cdecl SetMode(Mode mode);
+        
+#if !defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP) && defined(WM_USER)
+        void __cdecl SetWindow(HWND window);
+        static void __cdecl ProcessMessage(UINT message, WPARAM wParam, LPARAM lParam);
+#endif
+
+#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP)
+        void __cdecl SetWindow(ABI::Windows::UI::Core::ICoreWindow* window);
+#ifdef __cplusplus_winrt
+        void __cdecl SetWindow(Windows::UI::Core::CoreWindow^ window)
+        {
+            // See https://msdn.microsoft.com/en-us/library/hh755802.aspx
+            SetWindow(reinterpret_cast<ABI::Windows::UI::Core::ICoreWindow*>(window));
+        }
+#endif
+        static void __cdecl SetDpi(float dpi);
+#endif
+
+        // Singleton
+        static Mouse& __cdecl Get();
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        // Prevent copying.
+        Mouse(Mouse const&) DIRECTX_CTOR_DELETE
+        Mouse& operator=(Mouse const&) DIRECTX_CTOR_DELETE
+    };
+}
diff --git a/Windows/DirectXTK/PrimitiveBatch.h b/Windows/DirectXTK/PrimitiveBatch.h
new file mode 100644
index 0000000..c2e7965
--- /dev/null
+++ b/Windows/DirectXTK/PrimitiveBatch.h
@@ -0,0 +1,158 @@
+//--------------------------------------------------------------------------------------
+// File: PrimitiveBatch.h
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_XBOX_ONE) && defined(_TITLE)
+#include <d3d11_x.h>
+#else
+#include <d3d11_1.h>
+#endif
+
+// VS 2010/2012 do not support =default =delete
+#ifndef DIRECTX_CTOR_DEFAULT
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+#define DIRECTX_CTOR_DEFAULT {}
+#define DIRECTX_CTOR_DELETE ;
+#else
+#define DIRECTX_CTOR_DEFAULT =default;
+#define DIRECTX_CTOR_DELETE =delete;
+#endif
+#endif
+
+#include <memory.h>
+#include <memory>
+
+#pragma warning(push)
+#pragma warning(disable: 4005)
+#include <stdint.h>
+#pragma warning(pop)
+
+
+namespace DirectX
+{
+    namespace Internal
+    {
+        // Base class, not to be used directly: clients should access this via the derived PrimitiveBatch<T>.
+        class PrimitiveBatchBase
+        {
+        protected:
+            PrimitiveBatchBase(_In_ ID3D11DeviceContext* deviceContext, size_t maxIndices, size_t maxVertices, size_t vertexSize);
+            PrimitiveBatchBase(PrimitiveBatchBase&& moveFrom);
+            PrimitiveBatchBase& operator= (PrimitiveBatchBase&& moveFrom);
+            virtual ~PrimitiveBatchBase();
+
+        public:
+            // Begin/End a batch of primitive drawing operations.
+            void __cdecl Begin();
+            void __cdecl End();
+
+        protected:
+            // Internal, untyped drawing method.
+            void __cdecl Draw(D3D11_PRIMITIVE_TOPOLOGY topology, bool isIndexed, _In_opt_count_(indexCount) uint16_t const* indices, size_t indexCount, size_t vertexCount, _Out_ void** pMappedVertices);
+
+        private:
+            // Private implementation.
+            class Impl;
+
+            std::unique_ptr<Impl> pImpl;
+
+            // Prevent copying.
+            PrimitiveBatchBase(PrimitiveBatchBase const&) DIRECTX_CTOR_DELETE
+            PrimitiveBatchBase& operator= (PrimitiveBatchBase const&) DIRECTX_CTOR_DELETE
+        };
+    }
+
+
+    // Template makes the API typesafe, eg. PrimitiveBatch<VertexPositionColor>.
+    template<typename TVertex>
+    class PrimitiveBatch : public Internal::PrimitiveBatchBase
+    {
+        static const size_t DefaultBatchSize = 2048;
+
+    public:
+        PrimitiveBatch(_In_ ID3D11DeviceContext* deviceContext, size_t maxIndices = DefaultBatchSize * 3, size_t maxVertices = DefaultBatchSize)
+          : PrimitiveBatchBase(deviceContext, maxIndices, maxVertices, sizeof(TVertex))
+        { }
+
+        PrimitiveBatch(PrimitiveBatch&& moveFrom)
+          : PrimitiveBatchBase(std::move(moveFrom))
+        { }
+
+        PrimitiveBatch& __cdecl operator= (PrimitiveBatch&& moveFrom)
+        {
+            PrimitiveBatchBase::operator=(std::move(moveFrom));
+            return *this;
+        }
+
+
+        // Similar to the D3D9 API DrawPrimitiveUP.
+        void __cdecl Draw(D3D11_PRIMITIVE_TOPOLOGY topology, _In_reads_(vertexCount) TVertex const* vertices, size_t vertexCount)
+        {
+            void* mappedVertices;
+
+            PrimitiveBatchBase::Draw(topology, false, nullptr, 0, vertexCount, &mappedVertices);
+
+            memcpy(mappedVertices, vertices, vertexCount * sizeof(TVertex));
+        }
+
+
+        // Similar to the D3D9 API DrawIndexedPrimitiveUP.
+        void __cdecl DrawIndexed(D3D11_PRIMITIVE_TOPOLOGY topology, _In_reads_(indexCount) uint16_t const* indices, size_t indexCount, _In_reads_(vertexCount) TVertex const* vertices, size_t vertexCount)
+        {
+            void* mappedVertices;
+
+            PrimitiveBatchBase::Draw(topology, true, indices, indexCount, vertexCount, &mappedVertices);
+
+            memcpy(mappedVertices, vertices, vertexCount * sizeof(TVertex));
+        }
+
+
+        void __cdecl DrawLine(TVertex const& v1, TVertex const& v2)
+        {
+            TVertex* mappedVertices;
+
+            PrimitiveBatchBase::Draw(D3D11_PRIMITIVE_TOPOLOGY_LINELIST, false, nullptr, 0, 2, reinterpret_cast<void**>(&mappedVertices));
+
+            mappedVertices[0] = v1;
+            mappedVertices[1] = v2;
+        }
+
+
+        void __cdecl DrawTriangle(TVertex const& v1, TVertex const& v2, TVertex const& v3)
+        {
+            TVertex* mappedVertices;
+
+            PrimitiveBatchBase::Draw(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST, false, nullptr, 0, 3, reinterpret_cast<void**>(&mappedVertices));
+
+            mappedVertices[0] = v1;
+            mappedVertices[1] = v2;
+            mappedVertices[2] = v3;
+        }
+
+
+        void __cdecl DrawQuad(TVertex const& v1, TVertex const& v2, TVertex const& v3, TVertex const& v4)
+        {
+            static const uint16_t quadIndices[] = { 0, 1, 2, 0, 2, 3 };
+
+            TVertex* mappedVertices;
+
+            PrimitiveBatchBase::Draw(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST, true, quadIndices, 6, 4, reinterpret_cast<void**>(&mappedVertices));
+
+            mappedVertices[0] = v1;
+            mappedVertices[1] = v2;
+            mappedVertices[2] = v3;
+            mappedVertices[3] = v4;
+        }
+    };
+}
diff --git a/Windows/DirectXTK/ScreenGrab.h b/Windows/DirectXTK/ScreenGrab.h
new file mode 100644
index 0000000..2a1b661
--- /dev/null
+++ b/Windows/DirectXTK/ScreenGrab.h
@@ -0,0 +1,64 @@
+//--------------------------------------------------------------------------------------
+// File: ScreenGrab.h
+//
+// Function for capturing a 2D texture and saving it to a file (aka a 'screenshot'
+// when used on a Direct3D 11 Render Target).
+//
+// Note these functions are useful as a light-weight runtime screen grabber. For
+// full-featured texture capture, DDS writer, and texture processing pipeline,
+// see the 'Texconv' sample and the 'DirectXTex' library.
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248926
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_XBOX_ONE) && defined(_TITLE)
+#include <d3d11_x.h>
+#else
+#include <d3d11_1.h>
+#endif
+
+#include <ocidl.h>
+
+#pragma warning(push)
+#pragma warning(disable : 4005)
+#include <stdint.h>
+#pragma warning(pop)
+
+#include <functional>
+
+// VS 2010 doesn't support explicit calling convention for std::function
+#ifndef DIRECTX_STD_CALLCONV
+#if defined(_MSC_VER) && (_MSC_VER < 1700)
+#define DIRECTX_STD_CALLCONV
+#else
+#define DIRECTX_STD_CALLCONV __cdecl
+#endif
+#endif
+
+namespace DirectX
+{
+    HRESULT __cdecl SaveDDSTextureToFile( _In_ ID3D11DeviceContext* pContext,
+                                          _In_ ID3D11Resource* pSource,
+                                          _In_z_ LPCWSTR fileName );
+
+#if !defined(WINAPI_FAMILY) || (WINAPI_FAMILY != WINAPI_FAMILY_PHONE_APP) || (_WIN32_WINNT > _WIN32_WINNT_WIN8)
+
+    HRESULT __cdecl SaveWICTextureToFile( _In_ ID3D11DeviceContext* pContext,
+                                          _In_ ID3D11Resource* pSource,
+                                          _In_ REFGUID guidContainerFormat, 
+                                          _In_z_ LPCWSTR fileName,
+                                          _In_opt_ const GUID* targetFormat = nullptr,
+                                          _In_opt_ std::function<void DIRECTX_STD_CALLCONV(IPropertyBag2*)> setCustomProps = nullptr );
+
+#endif
+}
\ No newline at end of file
diff --git a/Windows/DirectXTK/SimpleMath.h b/Windows/DirectXTK/SimpleMath.h
new file mode 100644
index 0000000..c0cf1c8
--- /dev/null
+++ b/Windows/DirectXTK/SimpleMath.h
@@ -0,0 +1,927 @@
+//-------------------------------------------------------------------------------------
+// SimpleMath.h -- Simplified C++ Math wrapper for DirectXMath
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//  
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_XBOX_ONE) && defined(_TITLE)
+#include <d3d11_x.h>
+#else
+#include <d3d11_1.h>
+#endif
+
+#include <functional>
+#include <memory.h>
+
+#include <DirectXMath.h>
+#include <DirectXPackedVector.h>
+#include <DirectXCollision.h>
+
+namespace DirectX
+{
+
+namespace SimpleMath
+{
+    
+struct Vector4;
+struct Matrix;
+struct Quaternion;
+struct Plane;
+
+//------------------------------------------------------------------------------
+// 2D vector
+struct Vector2 : public XMFLOAT2
+{
+    Vector2() : XMFLOAT2(0.f, 0.f) {}
+    explicit Vector2(float x) : XMFLOAT2( x, x ) {}
+    Vector2(float _x, float _y) : XMFLOAT2(_x, _y) {}
+    explicit Vector2(_In_reads_(2) const float *pArray) : XMFLOAT2(pArray) {}
+    Vector2(FXMVECTOR V) { XMStoreFloat2( this, V ); }
+    Vector2(const XMFLOAT2& V) { this->x = V.x; this->y = V.y; }
+
+    operator XMVECTOR() const { return XMLoadFloat2( this ); }
+
+    // Comparison operators
+    bool operator == ( const Vector2& V ) const;
+    bool operator != ( const Vector2& V ) const;
+
+    // Assignment operators
+    Vector2& operator= (const Vector2& V) { x = V.x; y = V.y; return *this; }
+    Vector2& operator= (const XMFLOAT2& V) { x = V.x; y = V.y; return *this; }
+    Vector2& operator+= (const Vector2& V);
+    Vector2& operator-= (const Vector2& V);
+    Vector2& operator*= (const Vector2& V);
+    Vector2& operator*= (float S);
+    Vector2& operator/= (float S);
+
+    // Unary operators
+    Vector2 operator+ () const { return *this; }
+    Vector2 operator- () const { return Vector2(-x, -y); }
+
+    // Vector operations
+    bool InBounds( const Vector2& Bounds ) const;
+
+    float Length() const;
+    float LengthSquared() const;
+
+    float Dot( const Vector2& V ) const;
+    void Cross( const Vector2& V, Vector2& result ) const;
+    Vector2 Cross( const Vector2& V ) const;
+    
+    void Normalize();
+    void Normalize( Vector2& result ) const;
+
+    void Clamp( const Vector2& vmin, const Vector2& vmax );
+    void Clamp( const Vector2& vmin, const Vector2& vmax, Vector2& result ) const;
+
+    // Static functions
+    static float Distance( const Vector2& v1, const Vector2& v2 );
+    static float DistanceSquared( const Vector2& v1, const Vector2& v2 );
+
+    static void Min( const Vector2& v1, const Vector2& v2, Vector2& result );
+    static Vector2 Min( const Vector2& v1, const Vector2& v2 );
+
+    static void Max( const Vector2& v1, const Vector2& v2, Vector2& result );
+    static Vector2 Max( const Vector2& v1, const Vector2& v2 );
+
+    static void Lerp( const Vector2& v1, const Vector2& v2, float t, Vector2& result );
+    static Vector2 Lerp( const Vector2& v1, const Vector2& v2, float t );
+
+    static void SmoothStep( const Vector2& v1, const Vector2& v2, float t, Vector2& result );
+    static Vector2 SmoothStep( const Vector2& v1, const Vector2& v2, float t );
+
+    static void Barycentric( const Vector2& v1, const Vector2& v2, const Vector2& v3, float f, float g, Vector2& result );
+    static Vector2 Barycentric( const Vector2& v1, const Vector2& v2, const Vector2& v3, float f, float g );
+
+    static void CatmullRom( const Vector2& v1, const Vector2& v2, const Vector2& v3, const Vector2& v4, float t, Vector2& result );
+    static Vector2 CatmullRom( const Vector2& v1, const Vector2& v2, const Vector2& v3, const Vector2& v4, float t );
+
+    static void Hermite( const Vector2& v1, const Vector2& t1, const Vector2& v2, const Vector2& t2, float t, Vector2& result );
+    static Vector2 Hermite( const Vector2& v1, const Vector2& t1, const Vector2& v2, const Vector2& t2, float t );
+
+    static void Reflect( const Vector2& ivec, const Vector2& nvec, Vector2& result );
+    static Vector2 Reflect( const Vector2& ivec, const Vector2& nvec );
+
+    static void Refract( const Vector2& ivec, const Vector2& nvec, float refractionIndex, Vector2& result );
+    static Vector2 Refract( const Vector2& ivec, const Vector2& nvec, float refractionIndex );
+
+    static void Transform( const Vector2& v, const Quaternion& quat, Vector2& result );
+    static Vector2 Transform( const Vector2& v, const Quaternion& quat );
+
+    static void Transform( const Vector2& v, const Matrix& m, Vector2& result );
+    static Vector2 Transform( const Vector2& v, const Matrix& m );
+    static void Transform( _In_reads_(count) const Vector2* varray, size_t count, const Matrix& m, _Out_writes_(count) Vector2* resultArray );
+
+    static void Transform( const Vector2& v, const Matrix& m, Vector4& result );
+    static void Transform( _In_reads_(count) const Vector2* varray, size_t count, const Matrix& m, _Out_writes_(count) Vector4* resultArray );
+
+    static void TransformNormal( const Vector2& v, const Matrix& m, Vector2& result );
+    static Vector2 TransformNormal( const Vector2& v, const Matrix& m );
+    static void TransformNormal( _In_reads_(count) const Vector2* varray, size_t count, const Matrix& m, _Out_writes_(count) Vector2* resultArray );
+
+    // Constants
+    static const Vector2 Zero;
+    static const Vector2 One;
+    static const Vector2 UnitX;
+    static const Vector2 UnitY;
+};
+
+// Binary operators
+Vector2 operator+ (const Vector2& V1, const Vector2& V2);
+Vector2 operator- (const Vector2& V1, const Vector2& V2);
+Vector2 operator* (const Vector2& V1, const Vector2& V2);
+Vector2 operator* (const Vector2& V, float S);
+Vector2 operator/ (const Vector2& V1, const Vector2& V2);
+Vector2 operator* (float S, const Vector2& V);
+
+//------------------------------------------------------------------------------
+// 3D vector
+struct Vector3 : public XMFLOAT3
+{
+    Vector3() : XMFLOAT3(0.f, 0.f, 0.f) {}
+    explicit Vector3(float x) : XMFLOAT3( x, x, x ) {}
+    Vector3(float _x, float _y, float _z) : XMFLOAT3(_x, _y, _z) {}
+    explicit Vector3(_In_reads_(3) const float *pArray) : XMFLOAT3(pArray) {}
+    Vector3(FXMVECTOR V) { XMStoreFloat3( this, V ); }
+    Vector3(const XMFLOAT3& V) { this->x = V.x; this->y = V.y; this->z = V.z; }
+
+    operator XMVECTOR() const { return XMLoadFloat3( this ); }
+
+    // Comparison operators
+    bool operator == ( const Vector3& V ) const;
+    bool operator != ( const Vector3& V ) const;
+
+    // Assignment operators
+    Vector3& operator= (const Vector3& V) { x = V.x; y = V.y; z = V.z; return *this; }
+    Vector3& operator= (const XMFLOAT3& V) { x = V.x; y = V.y; z = V.z; return *this; }
+    Vector3& operator+= (const Vector3& V);
+    Vector3& operator-= (const Vector3& V);
+    Vector3& operator*= (const Vector3& V);
+    Vector3& operator*= (float S);
+    Vector3& operator/= (float S);
+
+    // Unary operators
+    Vector3 operator+ () const { return *this; }
+    Vector3 operator- () const;
+
+    // Vector operations
+    bool InBounds( const Vector3& Bounds ) const;
+
+    float Length() const;
+    float LengthSquared() const;
+
+    float Dot( const Vector3& V ) const;
+    void Cross( const Vector3& V, Vector3& result ) const;
+    Vector3 Cross( const Vector3& V ) const;
+
+    void Normalize();
+    void Normalize( Vector3& result ) const;
+
+    void Clamp( const Vector3& vmin, const Vector3& vmax );
+    void Clamp( const Vector3& vmin, const Vector3& vmax, Vector3& result ) const;
+
+    // Static functions
+    static float Distance( const Vector3& v1, const Vector3& v2 );
+    static float DistanceSquared( const Vector3& v1, const Vector3& v2 );
+
+    static void Min( const Vector3& v1, const Vector3& v2, Vector3& result );
+    static Vector3 Min( const Vector3& v1, const Vector3& v2 );
+
+    static void Max( const Vector3& v1, const Vector3& v2, Vector3& result );
+    static Vector3 Max( const Vector3& v1, const Vector3& v2 );
+
+    static void Lerp( const Vector3& v1, const Vector3& v2, float t, Vector3& result );
+    static Vector3 Lerp( const Vector3& v1, const Vector3& v2, float t );
+
+    static void SmoothStep( const Vector3& v1, const Vector3& v2, float t, Vector3& result );
+    static Vector3 SmoothStep( const Vector3& v1, const Vector3& v2, float t );
+
+    static void Barycentric( const Vector3& v1, const Vector3& v2, const Vector3& v3, float f, float g, Vector3& result );
+    static Vector3 Barycentric( const Vector3& v1, const Vector3& v2, const Vector3& v3, float f, float g );
+
+    static void CatmullRom( const Vector3& v1, const Vector3& v2, const Vector3& v3, const Vector3& v4, float t, Vector3& result );
+    static Vector3 CatmullRom( const Vector3& v1, const Vector3& v2, const Vector3& v3, const Vector3& v4, float t );
+
+    static void Hermite( const Vector3& v1, const Vector3& t1, const Vector3& v2, const Vector3& t2, float t, Vector3& result );
+    static Vector3 Hermite( const Vector3& v1, const Vector3& t1, const Vector3& v2, const Vector3& t2, float t );
+
+    static void Reflect( const Vector3& ivec, const Vector3& nvec, Vector3& result );
+    static Vector3 Reflect( const Vector3& ivec, const Vector3& nvec );
+
+    static void Refract( const Vector3& ivec, const Vector3& nvec, float refractionIndex, Vector3& result );
+    static Vector3 Refract( const Vector3& ivec, const Vector3& nvec, float refractionIndex );
+
+    static void Transform( const Vector3& v, const Quaternion& quat, Vector3& result );
+    static Vector3 Transform( const Vector3& v, const Quaternion& quat );
+
+    static void Transform( const Vector3& v, const Matrix& m, Vector3& result );
+    static Vector3 Transform( const Vector3& v, const Matrix& m );
+    static void Transform( _In_reads_(count) const Vector3* varray, size_t count, const Matrix& m, _Out_writes_(count) Vector3* resultArray );
+
+    static void Transform( const Vector3& v, const Matrix& m, Vector4& result );
+    static void Transform( _In_reads_(count) const Vector3* varray, size_t count, const Matrix& m, _Out_writes_(count) Vector4* resultArray );
+
+    static void TransformNormal( const Vector3& v, const Matrix& m, Vector3& result );
+    static Vector3 TransformNormal( const Vector3& v, const Matrix& m );
+    static void TransformNormal( _In_reads_(count) const Vector3* varray, size_t count, const Matrix& m, _Out_writes_(count) Vector3* resultArray );
+
+    // Constants
+    static const Vector3 Zero;
+    static const Vector3 One;
+    static const Vector3 UnitX;
+    static const Vector3 UnitY;
+    static const Vector3 UnitZ;
+    static const Vector3 Up;
+    static const Vector3 Down;
+    static const Vector3 Right;
+    static const Vector3 Left;
+    static const Vector3 Forward;
+    static const Vector3 Backward;
+};
+
+// Binary operators
+Vector3 operator+ (const Vector3& V1, const Vector3& V2);
+Vector3 operator- (const Vector3& V1, const Vector3& V2);
+Vector3 operator* (const Vector3& V1, const Vector3& V2);
+Vector3 operator* (const Vector3& V, float S);
+Vector3 operator/ (const Vector3& V1, const Vector3& V2);
+Vector3 operator* (float S, const Vector3& V);
+
+//------------------------------------------------------------------------------
+// 4D vector
+struct Vector4 : public XMFLOAT4
+{
+    Vector4() : XMFLOAT4(0.f, 0.f, 0.f, 0.f) {}
+    explicit Vector4(float x) : XMFLOAT4( x, x, x, x ) {}
+    Vector4(float _x, float _y, float _z, float _w) : XMFLOAT4(_x, _y, _z, _w) {}
+    explicit Vector4(_In_reads_(4) const float *pArray) : XMFLOAT4(pArray) {}
+    Vector4(FXMVECTOR V) { XMStoreFloat4( this, V ); }
+    Vector4(const XMFLOAT4& V) { this->x = V.x; this->y = V.y; this->z = V.z; this->w = V.w; }
+
+    operator XMVECTOR() const { return XMLoadFloat4( this ); }
+
+    // Comparison operators
+    bool operator == ( const Vector4& V ) const;
+    bool operator != ( const Vector4& V ) const;
+
+    // Assignment operators
+    Vector4& operator= (const Vector4& V) { x = V.x; y = V.y; z = V.z; w = V.w; return *this; }
+    Vector4& operator= (const XMFLOAT4& V) { x = V.x; y = V.y; z = V.z; w = V.w; return *this; }
+    Vector4& operator+= (const Vector4& V);
+    Vector4& operator-= (const Vector4& V);
+    Vector4& operator*= (const Vector4& V);
+    Vector4& operator*= (float S);
+    Vector4& operator/= (float S);
+
+    // Unary operators
+    Vector4 operator+ () const { return *this; }
+    Vector4 operator- () const;
+
+    // Vector operations
+    bool InBounds( const Vector4& Bounds ) const;
+
+    float Length() const;
+    float LengthSquared() const;
+
+    float Dot( const Vector4& V ) const;
+    void Cross( const Vector4& v1, const Vector4& v2, Vector4& result ) const;
+    Vector4 Cross( const Vector4& v1, const Vector4& v2 ) const;
+
+    void Normalize();
+    void Normalize( Vector4& result ) const;
+
+    void Clamp( const Vector4& vmin, const Vector4& vmax );
+    void Clamp( const Vector4& vmin, const Vector4& vmax, Vector4& result ) const;
+
+    // Static functions
+    static float Distance( const Vector4& v1, const Vector4& v2 );
+    static float DistanceSquared( const Vector4& v1, const Vector4& v2 );
+
+    static void Min( const Vector4& v1, const Vector4& v2, Vector4& result );
+    static Vector4 Min( const Vector4& v1, const Vector4& v2 );
+
+    static void Max( const Vector4& v1, const Vector4& v2, Vector4& result );
+    static Vector4 Max( const Vector4& v1, const Vector4& v2 );
+
+    static void Lerp( const Vector4& v1, const Vector4& v2, float t, Vector4& result );
+    static Vector4 Lerp( const Vector4& v1, const Vector4& v2, float t );
+
+    static void SmoothStep( const Vector4& v1, const Vector4& v2, float t, Vector4& result );
+    static Vector4 SmoothStep( const Vector4& v1, const Vector4& v2, float t );
+
+    static void Barycentric( const Vector4& v1, const Vector4& v2, const Vector4& v3, float f, float g, Vector4& result );
+    static Vector4 Barycentric( const Vector4& v1, const Vector4& v2, const Vector4& v3, float f, float g );
+
+    static void CatmullRom( const Vector4& v1, const Vector4& v2, const Vector4& v3, const Vector4& v4, float t, Vector4& result );
+    static Vector4 CatmullRom( const Vector4& v1, const Vector4& v2, const Vector4& v3, const Vector4& v4, float t );
+
+    static void Hermite( const Vector4& v1, const Vector4& t1, const Vector4& v2, const Vector4& t2, float t, Vector4& result );
+    static Vector4 Hermite( const Vector4& v1, const Vector4& t1, const Vector4& v2, const Vector4& t2, float t );
+
+    static void Reflect( const Vector4& ivec, const Vector4& nvec, Vector4& result );
+    static Vector4 Reflect( const Vector4& ivec, const Vector4& nvec );
+
+    static void Refract( const Vector4& ivec, const Vector4& nvec, float refractionIndex, Vector4& result );
+    static Vector4 Refract( const Vector4& ivec, const Vector4& nvec, float refractionIndex );
+
+    static void Transform( const Vector2& v, const Quaternion& quat, Vector4& result );
+    static Vector4 Transform( const Vector2& v, const Quaternion& quat );
+
+    static void Transform( const Vector3& v, const Quaternion& quat, Vector4& result );
+    static Vector4 Transform( const Vector3& v, const Quaternion& quat );
+
+    static void Transform( const Vector4& v, const Quaternion& quat, Vector4& result );
+    static Vector4 Transform( const Vector4& v, const Quaternion& quat );
+
+    static void Transform( const Vector4& v, const Matrix& m, Vector4& result );
+    static Vector4 Transform( const Vector4& v, const Matrix& m );
+    static void Transform( _In_reads_(count) const Vector4* varray, size_t count, const Matrix& m, _Out_writes_(count) Vector4* resultArray );
+
+    // Constants
+    static const Vector4 Zero;
+    static const Vector4 One;
+    static const Vector4 UnitX;
+    static const Vector4 UnitY;
+    static const Vector4 UnitZ;
+    static const Vector4 UnitW;
+};
+
+// Binary operators
+Vector4 operator+ (const Vector4& V1, const Vector4& V2);
+Vector4 operator- (const Vector4& V1, const Vector4& V2);
+Vector4 operator* (const Vector4& V1, const Vector4& V2);
+Vector4 operator* (const Vector4& V, float S);
+Vector4 operator/ (const Vector4& V1, const Vector4& V2);
+Vector4 operator* (float S, const Vector4& V);
+
+//------------------------------------------------------------------------------
+// 4x4 Matrix (assumes right-handed cooordinates)
+struct Matrix : public XMFLOAT4X4
+{
+    Matrix() : XMFLOAT4X4( 1.f,   0,   0,   0,
+                             0, 1.f,   0,   0,
+                             0,   0, 1.f,   0,
+                             0,   0,   0, 1.f ) {}
+    Matrix(float m00, float m01, float m02, float m03,
+           float m10, float m11, float m12, float m13,
+           float m20, float m21, float m22, float m23,
+           float m30, float m31, float m32, float m33) : XMFLOAT4X4(m00, m01, m02, m03,
+                                                                    m10, m11, m12, m13,
+                                                                    m20, m21, m22, m23,
+                                                                    m30, m31, m32, m33) {}
+    explicit Matrix( const Vector3& r0, const Vector3& r1, const Vector3& r2 ) : XMFLOAT4X4( r0.x, r0.y, r0.z, 0,
+                                                                                             r1.x, r1.y, r1.z, 0,
+                                                                                             r2.x, r2.y, r2.z, 0,
+                                                                                             0,       0,    0, 1.f ) {}
+    explicit Matrix( const Vector4& r0, const Vector4& r1, const Vector4& r2, const Vector4& r3 ) : XMFLOAT4X4( r0.x, r0.y, r0.z, r0.w,
+                                                                                                                r1.x, r1.y, r1.z, r1.w,
+                                                                                                                r2.x, r2.y, r2.z, r2.w,
+                                                                                                                r3.x, r3.y, r3.z, r3.w ) {}
+    Matrix(const XMFLOAT4X4& M) { memcpy_s(this, sizeof(float)*16, &M, sizeof(XMFLOAT4X4)); }
+    Matrix(const XMFLOAT3X3& M);
+    Matrix(const XMFLOAT4X3& M);
+
+    explicit Matrix(_In_reads_(16) const float *pArray) : XMFLOAT4X4(pArray) {}
+    Matrix( CXMMATRIX M ) { XMStoreFloat4x4( this, M ); }
+
+    operator XMMATRIX() const { return XMLoadFloat4x4( this ); }
+
+    // Comparison operators
+    bool operator == ( const Matrix& M ) const;
+    bool operator != ( const Matrix& M ) const;
+
+    // Assignment operators
+    Matrix& operator= (const Matrix& M) { memcpy_s( this, sizeof(float)*16, &M, sizeof(float)*16 ); return *this; }
+    Matrix& operator= (const XMFLOAT4X4& M) { memcpy_s( this, sizeof(float)*16, &M, sizeof(XMFLOAT4X4) ); return *this; }
+    Matrix& operator= (const XMFLOAT3X3& M);
+    Matrix& operator= (const XMFLOAT4X3& M);
+    Matrix& operator+= (const Matrix& M);
+    Matrix& operator-= (const Matrix& M);
+    Matrix& operator*= (const Matrix& M);
+    Matrix& operator*= (float S);
+    Matrix& operator/= (float S);
+
+    Matrix& operator/= (const Matrix& M);
+        // Element-wise divide
+
+    // Unary operators
+    Matrix operator+ () const { return *this; }
+    Matrix operator- () const;
+
+    // Properties
+    Vector3 Up() const { return Vector3( _21, _22, _23); }
+    void Up( const Vector3& v ) { _21 = v.x; _22 = v.y; _23 = v.z; }
+
+    Vector3 Down() const { return Vector3( -_21, -_22, -_23); }
+    void Down( const Vector3& v ) { _21 = -v.x; _22 = -v.y; _23 = -v.z; }
+
+    Vector3 Right() const { return Vector3( _11, _12, _13 ); }
+    void Right( const Vector3& v ) { _11 = v.x; _12 = v.y; _13 = v.z; }
+
+    Vector3 Left() const { return Vector3( -_11, -_12, -_13 ); }
+    void Left( const Vector3& v ) { _11 = -v.x; _12 = -v.y; _13 = -v.z; }
+
+    Vector3 Forward() const  { return Vector3( -_31, -_32, -_33 ); }
+    void Forward( const Vector3& v ) { _31 = -v.x; _32 = -v.y; _33 = -v.z; }
+
+    Vector3 Backward() const { return Vector3( _31, _32, _33 ); }
+    void Backward( const Vector3& v ) { _31 = v.x; _32 = v.y; _33 = v.z; }
+
+    Vector3 Translation() const { return Vector3( _41, _42, _43 ); }
+    void Translation( const Vector3& v ) { _41 = v.x; _42 = v.y; _43 = v.z; }
+
+    // Matrix operations
+    bool Decompose( Vector3& scale, Quaternion& rotation, Vector3& translation );
+
+    Matrix Transpose() const;
+    void Transpose( Matrix& result ) const;
+
+    Matrix Invert() const;
+    void Invert( Matrix& result ) const;
+
+    float Determinant() const;
+
+    // Static functions
+    static Matrix CreateBillboard( const Vector3& object, const Vector3& cameraPosition, const Vector3& cameraUp, _In_opt_ const Vector3* cameraForward = nullptr );
+
+    static Matrix CreateConstrainedBillboard( const Vector3& object, const Vector3& cameraPosition, const Vector3& rotateAxis,
+                                              _In_opt_ const Vector3* cameraForward = nullptr, _In_opt_ const Vector3* objectForward = nullptr);
+
+    static Matrix CreateTranslation( const Vector3& position );
+    static Matrix CreateTranslation( float x, float y, float z );
+
+    static Matrix CreateScale( const Vector3& scales );
+    static Matrix CreateScale( float xs, float ys, float zs );
+    static Matrix CreateScale( float scale );
+
+    static Matrix CreateRotationX( float radians );
+    static Matrix CreateRotationY( float radians );
+    static Matrix CreateRotationZ( float radians );
+
+    static Matrix CreateFromAxisAngle( const Vector3& axis, float angle );
+
+    static Matrix CreatePerspectiveFieldOfView( float fov, float aspectRatio, float nearPlane, float farPlane );
+    static Matrix CreatePerspective( float width, float height, float nearPlane, float farPlane );
+    static Matrix CreatePerspectiveOffCenter( float left, float right, float bottom, float top, float nearPlane, float farPlane );
+    static Matrix CreateOrthographic( float width, float height, float zNearPlane, float zFarPlane );
+    static Matrix CreateOrthographicOffCenter( float left, float right, float bottom, float top, float zNearPlane, float zFarPlane );
+
+    static Matrix CreateLookAt( const Vector3& position, const Vector3& target, const Vector3& up );
+    static Matrix CreateWorld( const Vector3& position, const Vector3& forward, const Vector3& up );
+
+    static Matrix CreateFromQuaternion( const Quaternion& quat );
+
+    static Matrix CreateFromYawPitchRoll( float yaw, float pitch, float roll );
+
+    static Matrix CreateShadow( const Vector3& lightDir, const Plane& plane );
+
+    static Matrix CreateReflection( const Plane& plane );
+
+    static void Lerp( const Matrix& M1, const Matrix& M2, float t, Matrix& result );
+    static Matrix Lerp( const Matrix& M1, const Matrix& M2, float t );
+
+    static void Transform( const Matrix& M, const Quaternion& rotation, Matrix& result );
+    static Matrix Transform( const Matrix& M, const Quaternion& rotation );
+
+    // Constants
+    static const Matrix Identity;
+};
+
+// Binary operators
+Matrix operator+ (const Matrix& M1, const Matrix& M2);
+Matrix operator- (const Matrix& M1, const Matrix& M2);
+Matrix operator* (const Matrix& M1, const Matrix& M2);
+Matrix operator* (const Matrix& M, float S);
+Matrix operator/ (const Matrix& M, float S);
+Matrix operator/ (const Matrix& M1, const Matrix& M2);
+    // Element-wise divide
+Matrix operator* (float S, const Matrix& M);
+
+
+//-----------------------------------------------------------------------------
+// Plane
+struct Plane : public XMFLOAT4
+{
+    Plane() : XMFLOAT4(0.f, 1.f, 0.f, 0.f) {}
+    Plane(float _x, float _y, float _z, float _w) : XMFLOAT4(_x, _y, _z, _w) {}
+    Plane(const Vector3& normal, float d) : XMFLOAT4(normal.x, normal.y, normal.z, d) {}
+    Plane(const Vector3& point1, const Vector3& point2, const Vector3& point3 );
+    Plane(const Vector3& point, const Vector3& normal);
+    explicit Plane(const Vector4& v) : XMFLOAT4(v.x, v.y, v.z, v.w) {}
+    explicit Plane(_In_reads_(4) const float *pArray) : XMFLOAT4(pArray) {}
+    Plane(FXMVECTOR V) { XMStoreFloat4( this, V ); }
+    Plane(const XMFLOAT4& p) { this->x = p.x; this->y = p.y; this->z = p.z; this->w = p.w; }
+
+    operator XMVECTOR() const { return XMLoadFloat4( this ); }
+
+    // Comparison operators
+    bool operator == ( const Plane& p ) const;
+    bool operator != ( const Plane& p ) const;
+
+    // Assignment operators
+    Plane& operator= (const Plane& p) { x = p.x; y = p.y; z = p.z; w = p.w; return *this; }
+    Plane& operator= (const XMFLOAT4& p) { x = p.x; y = p.y; z = p.z; w = p.w; return *this; }
+
+    // Properties
+    Vector3 Normal() const { return Vector3( x, y, z ); }
+    void Normal( const Vector3& normal ) { x = normal.x; y = normal.y; z = normal.z; }
+
+    float D() const { return w; }
+    void D(float d) { w = d; }
+
+    // Plane operations
+    void Normalize();
+    void Normalize( Plane& result ) const;
+
+    float Dot( const Vector4& v ) const;
+    float DotCoordinate( const Vector3& position ) const;
+    float DotNormal( const Vector3& normal ) const;
+
+    // Static functions
+    static void Transform( const Plane& plane, const Matrix& M, Plane& result );
+    static Plane Transform( const Plane& plane, const Matrix& M );
+
+    static void Transform( const Plane& plane, const Quaternion& rotation, Plane& result );
+    static Plane Transform( const Plane& plane, const Quaternion& rotation );
+        // Input quaternion must be the inverse transpose of the transformation
+};
+
+//------------------------------------------------------------------------------
+// Quaternion
+struct Quaternion : public XMFLOAT4
+{
+    Quaternion() : XMFLOAT4(0, 0, 0, 1.f) {}
+    Quaternion( float _x, float _y, float _z, float _w ) : XMFLOAT4(_x, _y, _z, _w) {}
+    Quaternion( const Vector3& v, float scalar ) : XMFLOAT4( v.x, v.y, v.z, scalar ) {}
+    explicit Quaternion( const Vector4& v ) : XMFLOAT4( v.x, v.y, v.z, v.w ) {}
+    explicit Quaternion(_In_reads_(4) const float *pArray) : XMFLOAT4(pArray) {}
+    Quaternion(FXMVECTOR V) { XMStoreFloat4( this, V ); }
+    Quaternion(const XMFLOAT4& q) { this->x = q.x; this->y = q.y; this->z = q.z; this->w = q.w; }
+
+    operator XMVECTOR() const { return XMLoadFloat4( this ); }
+
+    // Comparison operators
+    bool operator == ( const Quaternion& q ) const;
+    bool operator != ( const Quaternion& q ) const;
+
+    // Assignment operators
+    Quaternion& operator= (const Quaternion& q) { x = q.x; y = q.y; z = q.z; w = q.w; return *this; }
+    Quaternion& operator= (const XMFLOAT4& q) { x = q.x; y = q.y; z = q.z; w = q.w; return *this; }
+    Quaternion& operator+= (const Quaternion& q);
+    Quaternion& operator-= (const Quaternion& q);
+    Quaternion& operator*= (const Quaternion& q);
+    Quaternion& operator*= (float S);
+    Quaternion& operator/= (const Quaternion& q);
+
+    // Unary operators
+    Quaternion operator+ () const { return *this; }
+    Quaternion operator- () const;
+
+    // Quaternion operations
+    float Length() const;
+    float LengthSquared() const;
+
+    void Normalize();
+    void Normalize( Quaternion& result ) const;
+
+    void Conjugate();
+    void Conjugate( Quaternion& result ) const;
+
+    void Inverse( Quaternion& result ) const;
+
+    float Dot( const Quaternion& Q ) const;
+
+    // Static functions
+    static Quaternion CreateFromAxisAngle( const Vector3& axis, float angle );
+    static Quaternion CreateFromYawPitchRoll( float yaw, float pitch, float roll );
+    static Quaternion CreateFromRotationMatrix( const Matrix& M );
+
+    static void Lerp( const Quaternion& q1, const Quaternion& q2, float t, Quaternion& result );
+    static Quaternion Lerp( const Quaternion& q1, const Quaternion& q2, float t );
+
+    static void Slerp( const Quaternion& q1, const Quaternion& q2, float t, Quaternion& result );
+    static Quaternion Slerp( const Quaternion& q1, const Quaternion& q2, float t );
+
+    static void Concatenate( const Quaternion& q1, const Quaternion& q2, Quaternion& result );
+    static Quaternion Concatenate( const Quaternion& q1, const Quaternion& q2 );
+
+    // Constants
+    static const Quaternion Identity;
+};
+
+// Binary operators
+Quaternion operator+ (const Quaternion& Q1, const Quaternion& Q2);
+Quaternion operator- (const Quaternion& Q1, const Quaternion& Q2);
+Quaternion operator* (const Quaternion& Q1, const Quaternion& Q2);
+Quaternion operator* (const Quaternion& Q, float S);
+Quaternion operator/ (const Quaternion& Q1, const Quaternion& Q2);
+Quaternion operator* (float S, const Quaternion& Q);
+
+//------------------------------------------------------------------------------
+// Color
+struct Color : public XMFLOAT4
+{
+    Color() : XMFLOAT4(0, 0, 0, 1.f) {}
+    Color( float _r, float _g, float _b ) : XMFLOAT4(_r, _g, _b, 1.f) {}
+    Color( float _r, float _g, float _b, float _a ) : XMFLOAT4(_r, _g, _b, _a) {}
+    explicit Color( const Vector3& clr ) : XMFLOAT4( clr.x, clr.y, clr.z, 1.f ) {}
+    explicit Color( const Vector4& clr ) : XMFLOAT4( clr.x, clr.y, clr.z, clr.w ) {}
+    explicit Color(_In_reads_(4) const float *pArray) : XMFLOAT4(pArray) {}
+    Color(FXMVECTOR V) { XMStoreFloat4( this, V ); }
+    Color(const XMFLOAT4& c) { this->x = c.x; this->y = c.y; this->z = c.z; this->w = c.w; }
+
+    explicit Color( const DirectX::PackedVector::XMCOLOR& Packed );
+        // BGRA Direct3D 9 D3DCOLOR packed color
+    
+    explicit Color( const DirectX::PackedVector::XMUBYTEN4& Packed );
+        // RGBA XNA Game Studio packed color
+
+    operator XMVECTOR() const { return XMLoadFloat4( this ); }
+    operator const float*() const { return reinterpret_cast<const float*>(this); }
+
+    // Comparison operators
+    bool operator == ( const Color& c ) const;
+    bool operator != ( const Color& c ) const;
+
+    // Assignment operators
+    Color& operator= (const Color& c) { x = c.x; y = c.y; z = c.z; w = c.w; return *this; }
+    Color& operator= (const XMFLOAT4& c) { x = c.x; y = c.y; z = c.z; w = c.w; return *this; }
+    Color& operator= (const DirectX::PackedVector::XMCOLOR& Packed);
+    Color& operator= (const DirectX::PackedVector::XMUBYTEN4& Packed);
+    Color& operator+= (const Color& c);
+    Color& operator-= (const Color& c);
+    Color& operator*= (const Color& c);
+    Color& operator*= (float S);
+    Color& operator/= (const Color& c);
+
+    // Unary operators
+    Color operator+ () const { return *this; }
+    Color operator- () const;
+
+    // Properties
+    float R() const { return x; }
+    void R(float r) { x = r; }
+
+    float G() const { return y; }
+    void G(float g) { y = g; }
+
+    float B() const { return z; }
+    void B(float b) { z = b; }
+
+    float A() const { return w; }
+    void A(float a) { w = a; }
+
+    // Color operations
+    DirectX::PackedVector::XMCOLOR BGRA() const;
+    DirectX::PackedVector::XMUBYTEN4 RGBA() const;
+
+    Vector3 ToVector3() const;
+    Vector4 ToVector4() const;
+
+    void Negate();
+    void Negate( Color& result ) const;
+
+    void Saturate();
+    void Saturate( Color& result ) const;
+
+    void Premultiply();
+    void Premultiply( Color& result ) const;
+
+    void AdjustSaturation( float sat );
+    void AdjustSaturation( float sat, Color& result ) const;
+
+    void AdjustContrast( float contrast );
+    void AdjustContrast( float contrast, Color& result ) const;
+    
+    // Static functions
+    static void Modulate( const Color& c1, const Color& c2, Color& result ); 
+    static Color Modulate( const Color& c1, const Color& c2 ); 
+
+    static void Lerp( const Color& c1, const Color& c2, float t, Color& result );
+    static Color Lerp( const Color& c1, const Color& c2, float t );
+};
+
+// Binary operators
+Color operator+ (const Color& C1, const Color& C2);
+Color operator- (const Color& C1, const Color& C2);
+Color operator* (const Color& C1, const Color& C2);
+Color operator* (const Color& C, float S);
+Color operator/ (const Color& C1, const Color& C2);
+Color operator* (float S, const Color& C);
+
+//------------------------------------------------------------------------------
+// Ray
+class Ray
+{
+public:
+    Vector3 position;
+    Vector3 direction;
+
+    Ray() : position(0,0,0), direction(0,0,1) {}
+    Ray( const Vector3& pos, const Vector3& dir ) : position(pos), direction(dir) {}
+
+    // Comparison operators
+    bool operator == ( const Ray& r ) const;
+    bool operator != ( const Ray& r ) const;
+
+    // Ray operations
+    bool Intersects( const BoundingSphere& sphere, _Out_ float& Dist ) const;
+    bool Intersects( const BoundingBox& box, _Out_ float& Dist ) const;
+    bool Intersects( const Vector3& tri0, const Vector3& tri1, const Vector3& tri2, _Out_ float& Dist ) const;
+    bool Intersects( const Plane& plane, _Out_ float& Dist ) const;
+};
+
+//------------------------------------------------------------------------------
+// Viewport
+class Viewport
+{
+public:
+    float x;
+    float y;
+    float width;
+    float height;
+    float minDepth;
+    float maxDepth;
+
+    Viewport() :
+        x(0.f), y(0.f), width(0.f), height(0.f), minDepth(0.f), maxDepth(1.f) {}
+    Viewport( float ix, float iy, float iw, float ih, float iminz = 0.f, float imaxz = 1.f ) :
+        x(ix), y(iy), width(iw), height(ih), minDepth(iminz), maxDepth(imaxz) {}
+    explicit Viewport(const RECT& rct) :
+        x(float(rct.left)), y(float(rct.top)),
+        width(float(rct.right - rct.left)),
+        height(float(rct.bottom - rct.top)),
+        minDepth(0.f), maxDepth(1.f) {}
+    explicit Viewport(const D3D11_VIEWPORT& vp) :
+        x(vp.TopLeftX), y(vp.TopLeftY),
+        width(vp.Width), height(vp.Height),
+        minDepth(vp.MinDepth), maxDepth(vp.MaxDepth) {}
+
+    // Direct3D 11 interop
+    operator D3D11_VIEWPORT() { return *reinterpret_cast<D3D11_VIEWPORT*>(this); }
+    const D3D11_VIEWPORT* Get11() const { return reinterpret_cast<const D3D11_VIEWPORT*>(this); }
+
+    // Comparison operators
+    bool operator == ( const Viewport& vp ) const;
+    bool operator != ( const Viewport& vp ) const;
+
+    // Assignment operators
+    Viewport& operator= (const Viewport& vp);
+    Viewport& operator= (const RECT& rct);
+    Viewport& operator= (const D3D11_VIEWPORT& vp);
+
+    // Viewport operations
+    float AspectRatio() const;
+
+    Vector3 Project(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world ) const;
+    void Project(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world, Vector3& result ) const;
+
+    Vector3 Unproject(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world ) const;
+    void Unproject(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world, Vector3& result ) const;
+
+    // Static methods
+    static RECT __cdecl ComputeDisplayArea(DXGI_SCALING scaling, UINT backBufferWidth, UINT backBufferHeight, int outputWidth, int outputHeight);
+    static RECT __cdecl ComputeTitleSafeArea(UINT backBufferWidth, UINT backBufferHeight);
+};
+
+#include "SimpleMath.inl"
+
+}; // namespace SimpleMath
+
+}; // namespace DirectX
+
+//------------------------------------------------------------------------------
+// Support for SimpleMath and Standard C++ Library containers
+namespace std
+{
+
+    template<> struct less<DirectX::SimpleMath::Vector2>
+    {
+        bool operator()(const DirectX::SimpleMath::Vector2& V1, const DirectX::SimpleMath::Vector2& V2) const
+        {
+            return ( (V1.x < V2.x) || ((V1.x == V2.x) && (V1.y < V2.y)) );
+        }
+    };
+
+    template<> struct less<DirectX::SimpleMath::Vector3>
+    {
+        bool operator()(const DirectX::SimpleMath::Vector3& V1, const DirectX::SimpleMath::Vector3& V2) const
+        {
+            return ( (V1.x < V2.x)
+                     || ((V1.x == V2.x) && (V1.y < V2.y))
+                     || ((V1.x == V2.x) && (V1.y == V2.y) && (V1.z < V2.z)) );
+        }
+    };
+
+    template<> struct less<DirectX::SimpleMath::Vector4>
+    {
+        bool operator()(const DirectX::SimpleMath::Vector4& V1, const DirectX::SimpleMath::Vector4& V2) const
+        {
+            return ( (V1.x < V2.x)
+                     || ((V1.x == V2.x) && (V1.y < V2.y))
+                     || ((V1.x == V2.x) && (V1.y == V2.y) && (V1.z < V2.z))
+                     || ((V1.x == V2.x) && (V1.y == V2.y) && (V1.z == V2.z) && (V1.w < V2.w)) );
+        }
+    };
+
+    template<> struct less<DirectX::SimpleMath::Matrix>
+    {
+        bool operator()(const DirectX::SimpleMath::Matrix& M1, const DirectX::SimpleMath::Matrix& M2) const
+        {
+            if (M1._11 != M2._11) return M1._11 < M2._11;
+            if (M1._12 != M2._12) return M1._12 < M2._12;
+            if (M1._13 != M2._13) return M1._13 < M2._13;
+            if (M1._14 != M2._14) return M1._14 < M2._14;
+            if (M1._21 != M2._21) return M1._21 < M2._21;
+            if (M1._22 != M2._22) return M1._22 < M2._22;
+            if (M1._23 != M2._23) return M1._23 < M2._23;
+            if (M1._24 != M2._24) return M1._24 < M2._24;
+            if (M1._31 != M2._31) return M1._31 < M2._31;
+            if (M1._32 != M2._32) return M1._32 < M2._32;
+            if (M1._33 != M2._33) return M1._33 < M2._33;
+            if (M1._34 != M2._34) return M1._34 < M2._34;
+            if (M1._41 != M2._41) return M1._41 < M2._41;
+            if (M1._42 != M2._42) return M1._42 < M2._42;
+            if (M1._43 != M2._43) return M1._43 < M2._43;
+            if (M1._44 != M2._44) return M1._44 < M2._44;
+
+            return false;
+        }
+    };
+
+    template<> struct less<DirectX::SimpleMath::Plane>
+    {
+        bool operator()(const DirectX::SimpleMath::Plane& P1, const DirectX::SimpleMath::Plane& P2) const
+        {
+            return ( (P1.x < P2.x)
+                     || ((P1.x == P2.x) && (P1.y < P2.y))
+                     || ((P1.x == P2.x) && (P1.y == P2.y) && (P1.z < P2.z))
+                     || ((P1.x == P2.x) && (P1.y == P2.y) && (P1.z == P2.z) && (P1.w < P2.w)) );
+        }
+    };
+
+    template<> struct less<DirectX::SimpleMath::Quaternion>
+    {
+        bool operator()(const DirectX::SimpleMath::Quaternion& Q1, const DirectX::SimpleMath::Quaternion& Q2) const
+        {
+            return ( (Q1.x < Q2.x)
+                     || ((Q1.x == Q2.x) && (Q1.y < Q2.y))
+                     || ((Q1.x == Q2.x) && (Q1.y == Q2.y) && (Q1.z < Q2.z))
+                     || ((Q1.x == Q2.x) && (Q1.y == Q2.y) && (Q1.z == Q2.z) && (Q1.w < Q2.w)) );
+        }
+    };
+
+    template<> struct less<DirectX::SimpleMath::Color>
+    {
+        bool operator()(const DirectX::SimpleMath::Color& C1, const DirectX::SimpleMath::Color& C2) const
+        {
+            return ( (C1.x < C2.x)
+                     || ((C1.x == C2.x) && (C1.y < C2.y))
+                     || ((C1.x == C2.x) && (C1.y == C2.y) && (C1.z < C2.z))
+                     || ((C1.x == C2.x) && (C1.y == C2.y) && (C1.z == C2.z) && (C1.w < C2.w)) );
+        }
+    };
+
+    template<> struct less<DirectX::SimpleMath::Ray>
+    {
+        bool operator()(const DirectX::SimpleMath::Ray& R1, const DirectX::SimpleMath::Ray& R2) const
+        {
+            if (R1.position.x != R2.position.x) return R1.position.x < R2.position.x;
+            if (R1.position.y != R2.position.y) return R1.position.y < R2.position.y;
+            if (R1.position.z != R2.position.z) return R1.position.z < R2.position.z;
+
+            if (R1.direction.x != R2.direction.x) return R1.direction.x < R2.direction.x;
+            if (R1.direction.y != R2.direction.y) return R1.direction.y < R2.direction.y;
+            if (R1.direction.z != R2.direction.z) return R1.direction.z < R2.direction.z;
+
+            return false;
+        }
+    };
+
+    template<> struct less<DirectX::SimpleMath::Viewport>
+    {
+        bool operator()(const DirectX::SimpleMath::Viewport& vp1, const DirectX::SimpleMath::Viewport& vp2) const
+        {
+            if (vp1.x != vp2.x) return (vp1.x < vp2.x);
+            if (vp1.y != vp2.y) return (vp1.y < vp2.y);
+
+            if (vp1.width != vp2.width) return (vp1.width < vp2.width);
+            if (vp1.height != vp2.height) return (vp1.height < vp2.height);
+
+            if (vp1.minDepth != vp2.minDepth) return (vp1.minDepth < vp2.minDepth);
+            if (vp1.maxDepth != vp2.maxDepth) return (vp1.maxDepth < vp2.maxDepth);
+
+            return false;
+        }
+    };
+
+} // namespace std
diff --git a/Windows/DirectXTK/SimpleMath.inl b/Windows/DirectXTK/SimpleMath.inl
new file mode 100644
index 0000000..0d719ec
--- /dev/null
+++ b/Windows/DirectXTK/SimpleMath.inl
@@ -0,0 +1,3563 @@
+//-------------------------------------------------------------------------------------
+// SimpleMath.inl -- Simplified C++ Math wrapper for DirectXMath
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//  
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+/****************************************************************************
+ *
+ * Vector2
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparision operators
+//------------------------------------------------------------------------------
+
+inline bool Vector2::operator == ( const Vector2& V ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR v2 = XMLoadFloat2( &V );
+    return XMVector2Equal( v1, v2 );
+}
+
+inline bool Vector2::operator != ( const Vector2& V ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR v2 = XMLoadFloat2( &V );
+    return XMVector2NotEqual( v1, v2 );
+}
+
+//------------------------------------------------------------------------------
+// Assignment operators
+//------------------------------------------------------------------------------
+
+inline Vector2& Vector2::operator+= (const Vector2& V)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR v2 = XMLoadFloat2( &V );
+    XMVECTOR X = XMVectorAdd(v1,v2);
+    XMStoreFloat2( this, X );
+    return *this;
+}
+
+inline Vector2& Vector2::operator-= (const Vector2& V)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR v2 = XMLoadFloat2( &V );
+    XMVECTOR X = XMVectorSubtract(v1,v2);
+    XMStoreFloat2( this, X );
+    return *this;
+}
+
+inline Vector2& Vector2::operator*= (const Vector2& V)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR v2 = XMLoadFloat2( &V );
+    XMVECTOR X = XMVectorMultiply(v1,v2);
+    XMStoreFloat2( this, X );
+    return *this;
+}
+
+inline Vector2& Vector2::operator*= (float S)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR X = XMVectorScale(v1,S);
+    XMStoreFloat2( this, X );
+    return *this;
+} 
+
+inline Vector2& Vector2::operator/= (float S)
+{
+    using namespace DirectX;
+    assert( S != 0.0f );
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR X = XMVectorScale(v1, 1.f/S);
+    XMStoreFloat2( this, X );
+    return *this;
+} 
+
+//------------------------------------------------------------------------------
+// Binary operators
+//------------------------------------------------------------------------------
+
+inline Vector2 operator+ (const Vector2& V1, const Vector2& V2)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( &V1 );
+    XMVECTOR v2 = XMLoadFloat2( &V2 );
+    XMVECTOR X = XMVectorAdd(v1,v2);
+    Vector2 R;
+    XMStoreFloat2( &R, X );
+    return R;
+}
+
+inline Vector2 operator- (const Vector2& V1, const Vector2& V2)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( &V1 );
+    XMVECTOR v2 = XMLoadFloat2( &V2 );
+    XMVECTOR X = XMVectorSubtract(v1,v2);
+    Vector2 R;
+    XMStoreFloat2( &R, X );
+    return R;
+}
+
+inline Vector2 operator* (const Vector2& V1, const Vector2& V2)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( &V1 );
+    XMVECTOR v2 = XMLoadFloat2( &V2 );
+    XMVECTOR X = XMVectorMultiply(v1,v2);
+    Vector2 R;
+    XMStoreFloat2( &R, X );
+    return R;
+}
+
+inline Vector2 operator* (const Vector2& V, float S)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( &V );
+    XMVECTOR X = XMVectorScale(v1,S);
+    Vector2 R;
+    XMStoreFloat2( &R, X );
+    return R;
+}
+
+inline Vector2 operator/ (const Vector2& V1, const Vector2& V2)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( &V1 );
+    XMVECTOR v2 = XMLoadFloat2( &V2 );
+    XMVECTOR X = XMVectorDivide(v1,v2);
+    Vector2 R;
+    XMStoreFloat2( &R, X );
+    return R;
+}
+
+inline Vector2 operator* (float S, const Vector2& V)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( &V );
+    XMVECTOR X = XMVectorScale(v1,S);
+    Vector2 R;
+    XMStoreFloat2( &R, X );
+    return R;
+}
+
+//------------------------------------------------------------------------------
+// Vector operations
+//------------------------------------------------------------------------------
+
+inline bool Vector2::InBounds( const Vector2& Bounds ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR v2 = XMLoadFloat2( &Bounds );
+    return XMVector2InBounds( v1, v2 );
+}
+
+inline float Vector2::Length() const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR X = XMVector2Length( v1 );
+    return XMVectorGetX( X );
+}
+
+inline float Vector2::LengthSquared() const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR X = XMVector2LengthSq( v1 );
+    return XMVectorGetX( X );
+}
+
+inline float Vector2::Dot( const Vector2& V ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR v2 = XMLoadFloat2( &V );
+    XMVECTOR X = XMVector2Dot( v1, v2 );
+    return XMVectorGetX( X );
+}
+
+inline void Vector2::Cross( const Vector2& V, Vector2& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR v2 = XMLoadFloat2( &V );
+    XMVECTOR R = XMVector2Cross( v1, v2 );
+    XMStoreFloat2( &result, R );
+}
+
+inline Vector2 Vector2::Cross( const Vector2& V ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR v2 = XMLoadFloat2( &V );
+    XMVECTOR R = XMVector2Cross( v1, v2 );
+
+    Vector2 result;
+    XMStoreFloat2( &result, R );
+    return result;
+}
+
+inline void Vector2::Normalize()
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR X = XMVector2Normalize( v1 );
+    XMStoreFloat2( this, X );
+}
+
+inline void Vector2::Normalize( Vector2& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR X = XMVector2Normalize( v1 );
+    XMStoreFloat2( &result, X );
+}
+
+inline void Vector2::Clamp( const Vector2& vmin, const Vector2& vmax )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR v2 = XMLoadFloat2( &vmin );
+    XMVECTOR v3 = XMLoadFloat2( &vmax );
+    XMVECTOR X = XMVectorClamp( v1, v2, v3 );
+    XMStoreFloat2( this, X );
+}
+
+inline void Vector2::Clamp( const Vector2& vmin, const Vector2& vmax, Vector2& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( this );
+    XMVECTOR v2 = XMLoadFloat2( &vmin );
+    XMVECTOR v3 = XMLoadFloat2( &vmax );
+    XMVECTOR X = XMVectorClamp( v1, v2, v3 );
+    XMStoreFloat2( &result, X );
+}
+
+//------------------------------------------------------------------------------
+// Static functions
+//------------------------------------------------------------------------------
+
+inline float Vector2::Distance( const Vector2& v1, const Vector2& v2 )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &v2 );
+    XMVECTOR V = XMVectorSubtract( x2, x1 );
+    XMVECTOR X = XMVector2Length( V );
+    return XMVectorGetX( X );
+}
+
+inline float Vector2::DistanceSquared( const Vector2& v1, const Vector2& v2 )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &v2 );
+    XMVECTOR V = XMVectorSubtract( x2, x1 );
+    XMVECTOR X = XMVector2LengthSq( V );
+    return XMVectorGetX( X );
+}
+
+inline void Vector2::Min( const Vector2& v1, const Vector2& v2, Vector2& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &v2 );
+    XMVECTOR X = XMVectorMin( x1, x2 );
+    XMStoreFloat2( &result, X );
+}
+
+inline Vector2 Vector2::Min( const Vector2& v1, const Vector2& v2 )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &v2 );
+    XMVECTOR X = XMVectorMin( x1, x2 );
+
+    Vector2 result;
+    XMStoreFloat2( &result, X );
+    return result;
+}
+
+inline void Vector2::Max( const Vector2& v1, const Vector2& v2, Vector2& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &v2 );
+    XMVECTOR X = XMVectorMax( x1, x2 );
+    XMStoreFloat2( &result, X );
+}
+
+inline Vector2 Vector2::Max( const Vector2& v1, const Vector2& v2 )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &v2 );
+    XMVECTOR X = XMVectorMax( x1, x2 );
+
+    Vector2 result;
+    XMStoreFloat2( &result, X );
+    return result;
+}
+
+inline void Vector2::Lerp( const Vector2& v1, const Vector2& v2, float t, Vector2& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &v2 );
+    XMVECTOR X = XMVectorLerp( x1, x2, t );
+    XMStoreFloat2( &result, X );
+}
+
+inline Vector2 Vector2::Lerp( const Vector2& v1, const Vector2& v2, float t )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &v2 );
+    XMVECTOR X = XMVectorLerp( x1, x2, t );
+
+    Vector2 result;
+    XMStoreFloat2( &result, X );
+    return result;
+}
+
+inline void Vector2::SmoothStep( const Vector2& v1, const Vector2& v2, float t, Vector2& result )
+{
+    using namespace DirectX;
+    t = (t > 1.0f) ? 1.0f : ((t < 0.0f) ? 0.0f : t);  // Clamp value to 0 to 1
+    t = t*t*(3.f - 2.f*t);
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &v2 );
+    XMVECTOR X = XMVectorLerp( x1, x2, t );
+    XMStoreFloat2( &result, X );
+}
+
+inline Vector2 Vector2::SmoothStep( const Vector2& v1, const Vector2& v2, float t )
+{
+    using namespace DirectX;
+    t = (t > 1.0f) ? 1.0f : ((t < 0.0f) ? 0.0f : t);  // Clamp value to 0 to 1
+    t = t*t*(3.f - 2.f*t);
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &v2 );
+    XMVECTOR X = XMVectorLerp( x1, x2, t );
+
+    Vector2 result;
+    XMStoreFloat2( &result, X );
+    return result;
+}
+
+inline void Vector2::Barycentric( const Vector2& v1, const Vector2& v2, const Vector2& v3, float f, float g, Vector2& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &v2 );
+    XMVECTOR x3 = XMLoadFloat2( &v3 );
+    XMVECTOR X = XMVectorBaryCentric( x1, x2, x3, f, g );
+    XMStoreFloat2( &result, X );
+}
+
+inline Vector2 Vector2::Barycentric( const Vector2& v1, const Vector2& v2, const Vector2& v3, float f, float g )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &v2 );
+    XMVECTOR x3 = XMLoadFloat2( &v3 );
+    XMVECTOR X = XMVectorBaryCentric( x1, x2, x3, f, g );
+
+    Vector2 result;
+    XMStoreFloat2( &result, X );
+    return result;
+}
+
+inline void Vector2::CatmullRom( const Vector2& v1, const Vector2& v2, const Vector2& v3, const Vector2& v4, float t, Vector2& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &v2 );
+    XMVECTOR x3 = XMLoadFloat2( &v3 );
+    XMVECTOR x4 = XMLoadFloat2( &v4 );
+    XMVECTOR X = XMVectorCatmullRom( x1, x2, x3, x4, t );
+    XMStoreFloat2( &result, X );
+}
+
+inline Vector2 Vector2::CatmullRom( const Vector2& v1, const Vector2& v2, const Vector2& v3, const Vector2& v4, float t )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &v2 );
+    XMVECTOR x3 = XMLoadFloat2( &v3 );
+    XMVECTOR x4 = XMLoadFloat2( &v4 );
+    XMVECTOR X = XMVectorCatmullRom( x1, x2, x3, x4, t );
+
+    Vector2 result;
+    XMStoreFloat2( &result, X );
+    return result;
+}
+
+inline void Vector2::Hermite( const Vector2& v1, const Vector2& t1, const Vector2& v2, const Vector2& t2, float t, Vector2& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &t1 );
+    XMVECTOR x3 = XMLoadFloat2( &v2 );
+    XMVECTOR x4 = XMLoadFloat2( &t2 );
+    XMVECTOR X = XMVectorHermite( x1, x2, x3, x4, t );
+    XMStoreFloat2( &result, X );
+}
+
+inline Vector2 Vector2::Hermite( const Vector2& v1, const Vector2& t1, const Vector2& v2, const Vector2& t2, float t )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat2( &v1 );
+    XMVECTOR x2 = XMLoadFloat2( &t1 );
+    XMVECTOR x3 = XMLoadFloat2( &v2 );
+    XMVECTOR x4 = XMLoadFloat2( &t2 );
+    XMVECTOR X = XMVectorHermite( x1, x2, x3, x4, t );
+
+    Vector2 result;
+    XMStoreFloat2( &result, X );
+    return result;
+}
+
+inline void Vector2::Reflect( const Vector2& ivec, const Vector2& nvec, Vector2& result )
+{
+    using namespace DirectX;
+    XMVECTOR i = XMLoadFloat2( &ivec );
+    XMVECTOR n = XMLoadFloat2( &nvec );
+    XMVECTOR X = XMVector2Reflect( i, n );
+    XMStoreFloat2( &result, X );
+}
+
+inline Vector2 Vector2::Reflect( const Vector2& ivec, const Vector2& nvec )
+{
+    using namespace DirectX;
+    XMVECTOR i = XMLoadFloat2( &ivec );
+    XMVECTOR n = XMLoadFloat2( &nvec );
+    XMVECTOR X = XMVector2Reflect( i, n );
+
+    Vector2 result;
+    XMStoreFloat2( &result, X );
+    return result;
+}
+
+inline void Vector2::Refract( const Vector2& ivec, const Vector2& nvec, float refractionIndex, Vector2& result )
+{
+    using namespace DirectX;
+    XMVECTOR i = XMLoadFloat2( &ivec );
+    XMVECTOR n = XMLoadFloat2( &nvec );
+    XMVECTOR X = XMVector2Refract( i, n, refractionIndex );
+    XMStoreFloat2( &result, X );
+}
+
+inline Vector2 Vector2::Refract( const Vector2& ivec, const Vector2& nvec, float refractionIndex )
+{
+    using namespace DirectX;
+    XMVECTOR i = XMLoadFloat2( &ivec );
+    XMVECTOR n = XMLoadFloat2( &nvec );
+    XMVECTOR X = XMVector2Refract( i, n, refractionIndex );
+
+    Vector2 result;
+    XMStoreFloat2( &result, X );
+    return result;
+}
+
+inline void Vector2::Transform( const Vector2& v, const Quaternion& quat, Vector2& result )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( &v );
+    XMVECTOR q = XMLoadFloat4( &quat );
+    XMVECTOR X = XMVector3Rotate( v1, q );
+    XMStoreFloat2( &result, X );
+}
+
+inline Vector2 Vector2::Transform( const Vector2& v, const Quaternion& quat )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( &v );
+    XMVECTOR q = XMLoadFloat4( &quat );
+    XMVECTOR X = XMVector3Rotate( v1, q );
+
+    Vector2 result;
+    XMStoreFloat2( &result, X );
+    return result;
+}
+
+inline void Vector2::Transform( const Vector2& v, const Matrix& m, Vector2& result )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( &v );
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVECTOR X = XMVector2TransformCoord( v1, M );
+    XMStoreFloat2( &result, X );
+}
+
+inline Vector2 Vector2::Transform( const Vector2& v, const Matrix& m )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( &v );
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVECTOR X = XMVector2TransformCoord( v1, M );
+
+    Vector2 result;
+    XMStoreFloat2( &result, X );
+    return result;
+}
+
+_Use_decl_annotations_
+inline void Vector2::Transform( const Vector2* varray, size_t count, const Matrix& m, Vector2* resultArray )
+{
+    using namespace DirectX;
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVector2TransformCoordStream( resultArray, sizeof(XMFLOAT2), varray, sizeof(XMFLOAT2), count, M );
+}
+
+inline void Vector2::Transform( const Vector2& v, const Matrix& m, Vector4& result )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( &v );
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVECTOR X = XMVector2Transform( v1, M );
+    XMStoreFloat4( &result, X );
+}
+
+_Use_decl_annotations_
+inline void Vector2::Transform( const Vector2* varray, size_t count, const Matrix& m, Vector4* resultArray )
+{
+    using namespace DirectX;
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVector2TransformStream( resultArray, sizeof(XMFLOAT4), varray, sizeof(XMFLOAT2), count, M );
+}
+
+inline void Vector2::TransformNormal( const Vector2& v, const Matrix& m, Vector2& result )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( &v );
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVECTOR X = XMVector2TransformNormal( v1, M );
+    XMStoreFloat2( &result, X );
+}
+
+inline Vector2 Vector2::TransformNormal( const Vector2& v, const Matrix& m )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( &v );
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVECTOR X = XMVector2TransformNormal( v1, M );
+
+    Vector2 result;
+    XMStoreFloat2( &result, X );
+    return result;
+}
+
+_Use_decl_annotations_
+inline void Vector2::TransformNormal( const Vector2* varray, size_t count, const Matrix& m, Vector2* resultArray )
+{
+    using namespace DirectX;
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVector2TransformNormalStream( resultArray, sizeof(XMFLOAT2), varray, sizeof(XMFLOAT2), count, M );
+}
+
+
+/****************************************************************************
+ *
+ * Vector3
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparision operators
+//------------------------------------------------------------------------------
+
+inline bool Vector3::operator == ( const Vector3& V ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR v2 = XMLoadFloat3( &V );
+    return XMVector3Equal( v1, v2 );
+}
+
+inline bool Vector3::operator != ( const Vector3& V ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR v2 = XMLoadFloat3( &V );
+    return XMVector3NotEqual( v1, v2 );
+}
+
+//------------------------------------------------------------------------------
+// Assignment operators
+//------------------------------------------------------------------------------
+
+inline Vector3& Vector3::operator+= (const Vector3& V)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR v2 = XMLoadFloat3( &V );
+    XMVECTOR X = XMVectorAdd(v1,v2);
+    XMStoreFloat3( this, X );
+    return *this;
+}
+
+inline Vector3& Vector3::operator-= (const Vector3& V)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR v2 = XMLoadFloat3( &V );
+    XMVECTOR X = XMVectorSubtract(v1,v2);
+    XMStoreFloat3( this, X );
+    return *this;
+}
+
+inline Vector3& Vector3::operator*= (const Vector3& V)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR v2 = XMLoadFloat3( &V );
+    XMVECTOR X = XMVectorMultiply(v1,v2);
+    XMStoreFloat3( this, X );
+    return *this;
+}
+
+inline Vector3& Vector3::operator*= (float S)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR X = XMVectorScale(v1,S);
+    XMStoreFloat3( this, X );
+    return *this;
+} 
+
+inline Vector3& Vector3::operator/= (float S)
+{
+    using namespace DirectX;
+    assert( S != 0.0f );
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR X = XMVectorScale(v1, 1.f/S);
+    XMStoreFloat3( this, X );
+    return *this;
+} 
+
+//------------------------------------------------------------------------------
+// Urnary operators
+//------------------------------------------------------------------------------
+
+inline Vector3 Vector3::operator- () const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR X = XMVectorNegate( v1 );
+    Vector3 R;
+    XMStoreFloat3( &R, X );
+    return R;
+}
+
+//------------------------------------------------------------------------------
+// Binary operators
+//------------------------------------------------------------------------------
+
+inline Vector3 operator+ (const Vector3& V1, const Vector3& V2)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( &V1 );
+    XMVECTOR v2 = XMLoadFloat3( &V2 );
+    XMVECTOR X = XMVectorAdd(v1,v2);
+    Vector3 R;
+    XMStoreFloat3( &R, X );
+    return R;
+}
+
+inline Vector3 operator- (const Vector3& V1, const Vector3& V2)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( &V1 );
+    XMVECTOR v2 = XMLoadFloat3( &V2 );
+    XMVECTOR X = XMVectorSubtract(v1,v2);
+    Vector3 R;
+    XMStoreFloat3( &R, X );
+    return R;
+}
+
+inline Vector3 operator* (const Vector3& V1, const Vector3& V2)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( &V1 );
+    XMVECTOR v2 = XMLoadFloat3( &V2 );
+    XMVECTOR X = XMVectorMultiply(v1,v2);
+    Vector3 R;
+    XMStoreFloat3( &R, X );
+    return R;
+}
+
+inline Vector3 operator* (const Vector3& V, float S)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( &V );
+    XMVECTOR X = XMVectorScale(v1,S);
+    Vector3 R;
+    XMStoreFloat3( &R, X );
+    return R;
+}
+
+inline Vector3 operator/ (const Vector3& V1, const Vector3& V2)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( &V1 );
+    XMVECTOR v2 = XMLoadFloat3( &V2 );
+    XMVECTOR X = XMVectorDivide(v1,v2);
+    Vector3 R;
+    XMStoreFloat3( &R, X );
+    return R;
+}
+
+inline Vector3 operator* (float S, const Vector3& V)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( &V );
+    XMVECTOR X = XMVectorScale(v1,S);
+    Vector3 R;
+    XMStoreFloat3( &R, X );
+    return R;
+}
+
+//------------------------------------------------------------------------------
+// Vector operations
+//------------------------------------------------------------------------------
+
+inline bool Vector3::InBounds( const Vector3& Bounds ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR v2 = XMLoadFloat3( &Bounds );
+    return XMVector3InBounds( v1, v2 );
+}
+
+inline float Vector3::Length() const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR X = XMVector3Length( v1 );
+    return XMVectorGetX( X );
+}
+
+inline float Vector3::LengthSquared() const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR X = XMVector3LengthSq( v1 );
+    return XMVectorGetX( X );
+}
+
+inline float Vector3::Dot( const Vector3& V ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR v2 = XMLoadFloat3( &V );
+    XMVECTOR X = XMVector3Dot( v1, v2 );
+    return XMVectorGetX( X );
+}
+
+inline void Vector3::Cross( const Vector3& V, Vector3& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR v2 = XMLoadFloat3( &V );
+    XMVECTOR R = XMVector3Cross( v1, v2 );
+    XMStoreFloat3( &result, R );
+}
+
+inline Vector3 Vector3::Cross( const Vector3& V ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR v2 = XMLoadFloat3( &V );
+    XMVECTOR R = XMVector3Cross( v1, v2 );
+
+    Vector3 result;
+    XMStoreFloat3( &result, R );
+    return result;
+}
+
+inline void Vector3::Normalize()
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR X = XMVector3Normalize( v1 );
+    XMStoreFloat3( this, X );
+}
+
+inline void Vector3::Normalize( Vector3& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR X = XMVector3Normalize( v1 );
+    XMStoreFloat3( &result, X );
+}
+
+inline void Vector3::Clamp( const Vector3& vmin, const Vector3& vmax )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR v2 = XMLoadFloat3( &vmin );
+    XMVECTOR v3 = XMLoadFloat3( &vmax );
+    XMVECTOR X = XMVectorClamp( v1, v2, v3 );
+    XMStoreFloat3( this, X );
+}
+
+inline void Vector3::Clamp( const Vector3& vmin, const Vector3& vmax, Vector3& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( this );
+    XMVECTOR v2 = XMLoadFloat3( &vmin );
+    XMVECTOR v3 = XMLoadFloat3( &vmax );
+    XMVECTOR X = XMVectorClamp( v1, v2, v3 );
+    XMStoreFloat3( &result, X );
+}
+
+//------------------------------------------------------------------------------
+// Static functions
+//------------------------------------------------------------------------------
+
+inline float Vector3::Distance( const Vector3& v1, const Vector3& v2 )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &v2 );
+    XMVECTOR V = XMVectorSubtract( x2, x1 );
+    XMVECTOR X = XMVector3Length( V );
+    return XMVectorGetX( X );
+}
+
+inline float Vector3::DistanceSquared( const Vector3& v1, const Vector3& v2 )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &v2 );
+    XMVECTOR V = XMVectorSubtract( x2, x1 );
+    XMVECTOR X = XMVector3LengthSq( V );
+    return XMVectorGetX( X );
+}
+
+inline void Vector3::Min( const Vector3& v1, const Vector3& v2, Vector3& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &v2 );
+    XMVECTOR X = XMVectorMin( x1, x2 );
+    XMStoreFloat3( &result, X );
+}
+
+inline Vector3 Vector3::Min( const Vector3& v1, const Vector3& v2 )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &v2 );
+    XMVECTOR X = XMVectorMin( x1, x2 );
+
+    Vector3 result;
+    XMStoreFloat3( &result, X );
+    return result;
+}
+
+inline void Vector3::Max( const Vector3& v1, const Vector3& v2, Vector3& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &v2 );
+    XMVECTOR X = XMVectorMax( x1, x2 );
+    XMStoreFloat3( &result, X );
+}
+
+inline Vector3 Vector3::Max( const Vector3& v1, const Vector3& v2 )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &v2 );
+    XMVECTOR X = XMVectorMax( x1, x2 );
+
+    Vector3 result;
+    XMStoreFloat3( &result, X );
+    return result;
+}
+
+inline void Vector3::Lerp( const Vector3& v1, const Vector3& v2, float t, Vector3& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &v2 );
+    XMVECTOR X = XMVectorLerp( x1, x2, t );
+    XMStoreFloat3( &result, X );
+}
+
+inline Vector3 Vector3::Lerp( const Vector3& v1, const Vector3& v2, float t )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &v2 );
+    XMVECTOR X = XMVectorLerp( x1, x2, t );
+
+    Vector3 result;
+    XMStoreFloat3( &result, X );
+    return result;
+}
+
+inline void Vector3::SmoothStep( const Vector3& v1, const Vector3& v2, float t, Vector3& result )
+{
+    using namespace DirectX;
+    t = (t > 1.0f) ? 1.0f : ((t < 0.0f) ? 0.0f : t);  // Clamp value to 0 to 1
+    t = t*t*(3.f - 2.f*t);
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &v2 );
+    XMVECTOR X = XMVectorLerp( x1, x2, t );
+    XMStoreFloat3( &result, X );
+}
+
+inline Vector3 Vector3::SmoothStep( const Vector3& v1, const Vector3& v2, float t )
+{
+    using namespace DirectX;
+    t = (t > 1.0f) ? 1.0f : ((t < 0.0f) ? 0.0f : t);  // Clamp value to 0 to 1
+    t = t*t*(3.f - 2.f*t);
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &v2 );
+    XMVECTOR X = XMVectorLerp( x1, x2, t );
+
+    Vector3 result;
+    XMStoreFloat3( &result, X );
+    return result;
+}
+
+inline void Vector3::Barycentric( const Vector3& v1, const Vector3& v2, const Vector3& v3, float f, float g, Vector3& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &v2 );
+    XMVECTOR x3 = XMLoadFloat3( &v3 );
+    XMVECTOR X = XMVectorBaryCentric( x1, x2, x3, f, g );
+    XMStoreFloat3( &result, X );
+}
+
+inline Vector3 Vector3::Barycentric( const Vector3& v1, const Vector3& v2, const Vector3& v3, float f, float g )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &v2 );
+    XMVECTOR x3 = XMLoadFloat3( &v3 );
+    XMVECTOR X = XMVectorBaryCentric( x1, x2, x3, f, g );
+
+    Vector3 result;
+    XMStoreFloat3( &result, X );
+    return result;
+}
+
+inline void Vector3::CatmullRom( const Vector3& v1, const Vector3& v2, const Vector3& v3, const Vector3& v4, float t, Vector3& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &v2 );
+    XMVECTOR x3 = XMLoadFloat3( &v3 );
+    XMVECTOR x4 = XMLoadFloat3( &v4 );
+    XMVECTOR X = XMVectorCatmullRom( x1, x2, x3, x4, t );
+    XMStoreFloat3( &result, X );
+}
+
+inline Vector3 Vector3::CatmullRom( const Vector3& v1, const Vector3& v2, const Vector3& v3, const Vector3& v4, float t )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &v2 );
+    XMVECTOR x3 = XMLoadFloat3( &v3 );
+    XMVECTOR x4 = XMLoadFloat3( &v4 );
+    XMVECTOR X = XMVectorCatmullRom( x1, x2, x3, x4, t );
+
+    Vector3 result;
+    XMStoreFloat3( &result, X );
+    return result;
+}
+
+inline void Vector3::Hermite( const Vector3& v1, const Vector3& t1, const Vector3& v2, const Vector3& t2, float t, Vector3& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &t1 );
+    XMVECTOR x3 = XMLoadFloat3( &v2 );
+    XMVECTOR x4 = XMLoadFloat3( &t2 );
+    XMVECTOR X = XMVectorHermite( x1, x2, x3, x4, t );
+    XMStoreFloat3( &result, X );
+}
+
+inline Vector3 Vector3::Hermite( const Vector3& v1, const Vector3& t1, const Vector3& v2, const Vector3& t2, float t )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat3( &v1 );
+    XMVECTOR x2 = XMLoadFloat3( &t1 );
+    XMVECTOR x3 = XMLoadFloat3( &v2 );
+    XMVECTOR x4 = XMLoadFloat3( &t2 );
+    XMVECTOR X = XMVectorHermite( x1, x2, x3, x4, t );
+
+    Vector3 result;
+    XMStoreFloat3( &result, X );
+    return result;
+}
+
+inline void Vector3::Reflect( const Vector3& ivec, const Vector3& nvec, Vector3& result )
+{
+    using namespace DirectX;
+    XMVECTOR i = XMLoadFloat3( &ivec );
+    XMVECTOR n = XMLoadFloat3( &nvec );
+    XMVECTOR X = XMVector3Reflect( i, n );
+    XMStoreFloat3( &result, X );
+}
+
+inline Vector3 Vector3::Reflect( const Vector3& ivec, const Vector3& nvec )
+{
+    using namespace DirectX;
+    XMVECTOR i = XMLoadFloat3( &ivec );
+    XMVECTOR n = XMLoadFloat3( &nvec );
+    XMVECTOR X = XMVector3Reflect( i, n );
+
+    Vector3 result;
+    XMStoreFloat3( &result, X );
+    return result;
+}
+
+inline void Vector3::Refract( const Vector3& ivec, const Vector3& nvec, float refractionIndex, Vector3& result )
+{
+    using namespace DirectX;
+    XMVECTOR i = XMLoadFloat3( &ivec );
+    XMVECTOR n = XMLoadFloat3( &nvec );
+    XMVECTOR X = XMVector3Refract( i, n, refractionIndex );
+    XMStoreFloat3( &result, X );
+}
+
+inline Vector3 Vector3::Refract( const Vector3& ivec, const Vector3& nvec, float refractionIndex )
+{
+    using namespace DirectX;
+    XMVECTOR i = XMLoadFloat3( &ivec );
+    XMVECTOR n = XMLoadFloat3( &nvec );
+    XMVECTOR X = XMVector3Refract( i, n, refractionIndex );
+
+    Vector3 result;
+    XMStoreFloat3( &result, X );
+    return result;
+}
+
+inline void Vector3::Transform( const Vector3& v, const Quaternion& quat, Vector3& result )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( &v );
+    XMVECTOR q = XMLoadFloat4( &quat );
+    XMVECTOR X = XMVector3Rotate( v1, q );
+    XMStoreFloat3( &result, X );
+}
+
+inline Vector3 Vector3::Transform( const Vector3& v, const Quaternion& quat )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( &v );
+    XMVECTOR q = XMLoadFloat4( &quat );
+    XMVECTOR X = XMVector3Rotate( v1, q );
+
+    Vector3 result;
+    XMStoreFloat3( &result, X );
+    return result;
+}
+
+inline void Vector3::Transform( const Vector3& v, const Matrix& m, Vector3& result )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( &v );
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVECTOR X = XMVector3TransformCoord( v1, M );
+    XMStoreFloat3( &result, X );
+}
+
+inline Vector3 Vector3::Transform( const Vector3& v, const Matrix& m )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( &v );
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVECTOR X = XMVector3TransformCoord( v1, M );
+
+    Vector3 result;
+    XMStoreFloat3( &result, X );
+    return result;
+}
+
+_Use_decl_annotations_
+inline void Vector3::Transform( const Vector3* varray, size_t count, const Matrix& m, Vector3* resultArray )
+{
+    using namespace DirectX;
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVector3TransformCoordStream( resultArray, sizeof(XMFLOAT3), varray, sizeof(XMFLOAT3), count, M );
+}
+
+inline void Vector3::Transform( const Vector3& v, const Matrix& m, Vector4& result )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( &v );
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVECTOR X = XMVector3Transform( v1, M );
+    XMStoreFloat4( &result, X );
+}
+
+_Use_decl_annotations_
+inline void Vector3::Transform( const Vector3* varray, size_t count, const Matrix& m, Vector4* resultArray )
+{
+    using namespace DirectX;
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVector3TransformStream( resultArray, sizeof(XMFLOAT4), varray, sizeof(XMFLOAT3), count, M );
+}
+
+inline void Vector3::TransformNormal( const Vector3& v, const Matrix& m, Vector3& result )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( &v );
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVECTOR X = XMVector3TransformNormal( v1, M );
+    XMStoreFloat3( &result, X );
+}
+
+inline Vector3 Vector3::TransformNormal( const Vector3& v, const Matrix& m )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( &v );
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVECTOR X = XMVector3TransformNormal( v1, M );
+
+    Vector3 result;
+    XMStoreFloat3( &result, X );
+    return result;
+}
+
+_Use_decl_annotations_
+inline void Vector3::TransformNormal( const Vector3* varray, size_t count, const Matrix& m, Vector3* resultArray )
+{
+    using namespace DirectX;
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVector3TransformNormalStream( resultArray, sizeof(XMFLOAT3), varray, sizeof(XMFLOAT3), count, M );
+}
+
+
+/****************************************************************************
+ *
+ * Vector4
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparision operators
+//------------------------------------------------------------------------------
+
+inline bool Vector4::operator == ( const Vector4& V ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR v2 = XMLoadFloat4( &V );
+    return XMVector4Equal( v1, v2 );
+}
+
+inline bool Vector4::operator != ( const Vector4& V ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR v2 = XMLoadFloat4( &V );
+    return XMVector4NotEqual( v1, v2 );
+}
+
+//------------------------------------------------------------------------------
+// Assignment operators
+//------------------------------------------------------------------------------
+
+inline Vector4& Vector4::operator+= (const Vector4& V)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR v2 = XMLoadFloat4( &V );
+    XMVECTOR X = XMVectorAdd(v1,v2);
+    XMStoreFloat4( this, X );
+    return *this;
+}
+
+inline Vector4& Vector4::operator-= (const Vector4& V)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR v2 = XMLoadFloat4( &V );
+    XMVECTOR X = XMVectorSubtract(v1,v2);
+    XMStoreFloat4( this, X );
+    return *this;
+}
+
+inline Vector4& Vector4::operator*= (const Vector4& V)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR v2 = XMLoadFloat4( &V );
+    XMVECTOR X = XMVectorMultiply(v1,v2);
+    XMStoreFloat4( this, X );
+    return *this;
+}
+
+inline Vector4& Vector4::operator*= (float S)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR X = XMVectorScale(v1,S);
+    XMStoreFloat4( this, X );
+    return *this;
+} 
+
+inline Vector4& Vector4::operator/= (float S)
+{
+    using namespace DirectX;
+    assert( S != 0.0f );
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR X = XMVectorScale(v1, 1.f/S);
+    XMStoreFloat4( this, X );
+    return *this;
+} 
+
+//------------------------------------------------------------------------------
+// Urnary operators
+//------------------------------------------------------------------------------
+
+inline Vector4 Vector4::operator- () const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR X = XMVectorNegate( v1 );
+    Vector4 R;
+    XMStoreFloat4( &R, X );
+    return R;
+}
+
+//------------------------------------------------------------------------------
+// Binary operators
+//------------------------------------------------------------------------------
+
+inline Vector4 operator+ (const Vector4& V1, const Vector4& V2)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( &V1 );
+    XMVECTOR v2 = XMLoadFloat4( &V2 );
+    XMVECTOR X = XMVectorAdd(v1,v2);
+    Vector4 R;
+    XMStoreFloat4( &R, X );
+    return R;
+}
+
+inline Vector4 operator- (const Vector4& V1, const Vector4& V2)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( &V1 );
+    XMVECTOR v2 = XMLoadFloat4( &V2 );
+    XMVECTOR X = XMVectorSubtract(v1,v2);
+    Vector4 R;
+    XMStoreFloat4( &R, X );
+    return R;
+}
+
+inline Vector4 operator* (const Vector4& V1, const Vector4& V2)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( &V1 );
+    XMVECTOR v2 = XMLoadFloat4( &V2 );
+    XMVECTOR X = XMVectorMultiply(v1,v2);
+    Vector4 R;
+    XMStoreFloat4( &R, X );
+    return R;
+}
+
+inline Vector4 operator* (const Vector4& V, float S)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( &V );
+    XMVECTOR X = XMVectorScale(v1,S);
+    Vector4 R;
+    XMStoreFloat4( &R, X );
+    return R;
+}
+
+inline Vector4 operator/ (const Vector4& V1, const Vector4& V2)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( &V1 );
+    XMVECTOR v2 = XMLoadFloat4( &V2 );
+    XMVECTOR X = XMVectorDivide(v1,v2);
+    Vector4 R;
+    XMStoreFloat4( &R, X );
+    return R;
+}
+
+inline Vector4 operator* (float S, const Vector4& V)
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( &V );
+    XMVECTOR X = XMVectorScale(v1,S);
+    Vector4 R;
+    XMStoreFloat4( &R, X );
+    return R;
+}
+
+//------------------------------------------------------------------------------
+// Vector operations
+//------------------------------------------------------------------------------
+
+inline bool Vector4::InBounds( const Vector4& Bounds ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR v2 = XMLoadFloat4( &Bounds );
+    return XMVector4InBounds( v1, v2 );
+}
+
+inline float Vector4::Length() const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR X = XMVector4Length( v1 );
+    return XMVectorGetX( X );
+}
+
+inline float Vector4::LengthSquared() const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR X = XMVector4LengthSq( v1 );
+    return XMVectorGetX( X );
+}
+
+inline float Vector4::Dot( const Vector4& V ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR v2 = XMLoadFloat4( &V );
+    XMVECTOR X = XMVector4Dot( v1, v2 );
+    return XMVectorGetX( X );
+}
+
+inline void Vector4::Cross( const Vector4& v1, const Vector4& v2, Vector4& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( this );
+    XMVECTOR x2 = XMLoadFloat4( &v1 );
+    XMVECTOR x3 = XMLoadFloat4( &v2 );
+    XMVECTOR R = XMVector4Cross( x1, x2, x3 );
+    XMStoreFloat4( &result, R );
+}
+
+inline Vector4 Vector4::Cross( const Vector4& v1, const Vector4& v2 ) const
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( this );
+    XMVECTOR x2 = XMLoadFloat4( &v1 );
+    XMVECTOR x3 = XMLoadFloat4( &v2 );
+    XMVECTOR R = XMVector4Cross( x1, x2, x3 );
+
+    Vector4 result;
+    XMStoreFloat4( &result, R );
+    return result;
+}
+
+inline void Vector4::Normalize()
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR X = XMVector4Normalize( v1 );
+    XMStoreFloat4( this, X );
+}
+
+inline void Vector4::Normalize( Vector4& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR X = XMVector4Normalize( v1 );
+    XMStoreFloat4( &result, X );
+}
+
+inline void Vector4::Clamp( const Vector4& vmin, const Vector4& vmax )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR v2 = XMLoadFloat4( &vmin );
+    XMVECTOR v3 = XMLoadFloat4( &vmax );
+    XMVECTOR X = XMVectorClamp( v1, v2, v3 );
+    XMStoreFloat4( this, X );
+}
+
+inline void Vector4::Clamp( const Vector4& vmin, const Vector4& vmax, Vector4& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( this );
+    XMVECTOR v2 = XMLoadFloat4( &vmin );
+    XMVECTOR v3 = XMLoadFloat4( &vmax );
+    XMVECTOR X = XMVectorClamp( v1, v2, v3 );
+    XMStoreFloat4( &result, X );
+}
+
+//------------------------------------------------------------------------------
+// Static functions
+//------------------------------------------------------------------------------
+
+inline float Vector4::Distance( const Vector4& v1, const Vector4& v2 )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &v2 );
+    XMVECTOR V = XMVectorSubtract( x2, x1 );
+    XMVECTOR X = XMVector4Length( V );
+    return XMVectorGetX( X );
+}
+
+inline float Vector4::DistanceSquared( const Vector4& v1, const Vector4& v2 )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &v2 );
+    XMVECTOR V = XMVectorSubtract( x2, x1 );
+    XMVECTOR X = XMVector4LengthSq( V );
+    return XMVectorGetX( X );
+}
+
+inline void Vector4::Min( const Vector4& v1, const Vector4& v2, Vector4& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &v2 );
+    XMVECTOR X = XMVectorMin( x1, x2 );
+    XMStoreFloat4( &result, X );
+}
+
+inline Vector4 Vector4::Min( const Vector4& v1, const Vector4& v2 )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &v2 );
+    XMVECTOR X = XMVectorMin( x1, x2 );
+
+    Vector4 result;
+    XMStoreFloat4( &result, X );
+    return result;
+}
+
+inline void Vector4::Max( const Vector4& v1, const Vector4& v2, Vector4& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &v2 );
+    XMVECTOR X = XMVectorMax( x1, x2 );
+    XMStoreFloat4( &result, X );
+}
+
+inline Vector4 Vector4::Max( const Vector4& v1, const Vector4& v2 )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &v2 );
+    XMVECTOR X = XMVectorMax( x1, x2 );
+
+    Vector4 result;
+    XMStoreFloat4( &result, X );
+    return result;
+}
+
+inline void Vector4::Lerp( const Vector4& v1, const Vector4& v2, float t, Vector4& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &v2 );
+    XMVECTOR X = XMVectorLerp( x1, x2, t );
+    XMStoreFloat4( &result, X );
+}
+
+inline Vector4 Vector4::Lerp( const Vector4& v1, const Vector4& v2, float t )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &v2 );
+    XMVECTOR X = XMVectorLerp( x1, x2, t );
+
+    Vector4 result;
+    XMStoreFloat4( &result, X );
+    return result;
+}
+
+inline void Vector4::SmoothStep( const Vector4& v1, const Vector4& v2, float t, Vector4& result )
+{
+    using namespace DirectX;
+    t = (t > 1.0f) ? 1.0f : ((t < 0.0f) ? 0.0f : t);  // Clamp value to 0 to 1
+    t = t*t*(3.f - 2.f*t);
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &v2 );
+    XMVECTOR X = XMVectorLerp( x1, x2, t );
+    XMStoreFloat4( &result, X );
+}
+
+inline Vector4 Vector4::SmoothStep( const Vector4& v1, const Vector4& v2, float t )
+{
+    using namespace DirectX;
+    t = (t > 1.0f) ? 1.0f : ((t < 0.0f) ? 0.0f : t);  // Clamp value to 0 to 1
+    t = t*t*(3.f - 2.f*t);
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &v2 );
+    XMVECTOR X = XMVectorLerp( x1, x2, t );
+
+    Vector4 result;
+    XMStoreFloat4( &result, X );
+    return result;
+}
+
+inline void Vector4::Barycentric( const Vector4& v1, const Vector4& v2, const Vector4& v3, float f, float g, Vector4& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &v2 );
+    XMVECTOR x3 = XMLoadFloat4( &v3 );
+    XMVECTOR X = XMVectorBaryCentric( x1, x2, x3, f, g );
+    XMStoreFloat4( &result, X );
+}
+
+inline Vector4 Vector4::Barycentric( const Vector4& v1, const Vector4& v2, const Vector4& v3, float f, float g )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &v2 );
+    XMVECTOR x3 = XMLoadFloat4( &v3 );
+    XMVECTOR X = XMVectorBaryCentric( x1, x2, x3, f, g );
+
+    Vector4 result;
+    XMStoreFloat4( &result, X );
+    return result;
+}
+
+inline void Vector4::CatmullRom( const Vector4& v1, const Vector4& v2, const Vector4& v3, const Vector4& v4, float t, Vector4& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &v2 );
+    XMVECTOR x3 = XMLoadFloat4( &v3 );
+    XMVECTOR x4 = XMLoadFloat4( &v4 );
+    XMVECTOR X = XMVectorCatmullRom( x1, x2, x3, x4, t );
+    XMStoreFloat4( &result, X );
+}
+
+inline Vector4 Vector4::CatmullRom( const Vector4& v1, const Vector4& v2, const Vector4& v3, const Vector4& v4, float t )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &v2 );
+    XMVECTOR x3 = XMLoadFloat4( &v3 );
+    XMVECTOR x4 = XMLoadFloat4( &v4 );
+    XMVECTOR X = XMVectorCatmullRom( x1, x2, x3, x4, t );
+
+    Vector4 result;
+    XMStoreFloat4( &result, X );
+    return result;
+}
+
+inline void Vector4::Hermite( const Vector4& v1, const Vector4& t1, const Vector4& v2, const Vector4& t2, float t, Vector4& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &t1 );
+    XMVECTOR x3 = XMLoadFloat4( &v2 );
+    XMVECTOR x4 = XMLoadFloat4( &t2 );
+    XMVECTOR X = XMVectorHermite( x1, x2, x3, x4, t );
+    XMStoreFloat4( &result, X );
+}
+
+inline Vector4 Vector4::Hermite( const Vector4& v1, const Vector4& t1, const Vector4& v2, const Vector4& t2, float t )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( &v1 );
+    XMVECTOR x2 = XMLoadFloat4( &t1 );
+    XMVECTOR x3 = XMLoadFloat4( &v2 );
+    XMVECTOR x4 = XMLoadFloat4( &t2 );
+    XMVECTOR X = XMVectorHermite( x1, x2, x3, x4, t );
+
+    Vector4 result;
+    XMStoreFloat4( &result, X );
+    return result;
+}
+
+inline void Vector4::Reflect( const Vector4& ivec, const Vector4& nvec, Vector4& result )
+{
+    using namespace DirectX;
+    XMVECTOR i = XMLoadFloat4( &ivec );
+    XMVECTOR n = XMLoadFloat4( &nvec );
+    XMVECTOR X = XMVector4Reflect( i, n );
+    XMStoreFloat4( &result, X );
+}
+
+inline Vector4 Vector4::Reflect( const Vector4& ivec, const Vector4& nvec )
+{
+    using namespace DirectX;
+    XMVECTOR i = XMLoadFloat4( &ivec );
+    XMVECTOR n = XMLoadFloat4( &nvec );
+    XMVECTOR X = XMVector4Reflect( i, n );
+
+    Vector4 result;
+    XMStoreFloat4( &result, X );
+    return result;
+}
+
+inline void Vector4::Refract( const Vector4& ivec, const Vector4& nvec, float refractionIndex, Vector4& result )
+{
+    using namespace DirectX;
+    XMVECTOR i = XMLoadFloat4( &ivec );
+    XMVECTOR n = XMLoadFloat4( &nvec );
+    XMVECTOR X = XMVector4Refract( i, n, refractionIndex );
+    XMStoreFloat4( &result, X );
+}
+
+inline Vector4 Vector4::Refract( const Vector4& ivec, const Vector4& nvec, float refractionIndex )
+{
+    using namespace DirectX;
+    XMVECTOR i = XMLoadFloat4( &ivec );
+    XMVECTOR n = XMLoadFloat4( &nvec );
+    XMVECTOR X = XMVector4Refract( i, n, refractionIndex );
+
+    Vector4 result;
+    XMStoreFloat4( &result, X );
+    return result;
+}
+
+inline void Vector4::Transform( const Vector2& v, const Quaternion& quat, Vector4& result )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( &v );
+    XMVECTOR q = XMLoadFloat4( &quat );
+    XMVECTOR X = XMVector3Rotate( v1, q );
+    X = XMVectorSelect( g_XMIdentityR3, X, g_XMSelect1110 ); // result.w = 1.f
+    XMStoreFloat4( &result, X );
+}
+
+inline Vector4 Vector4::Transform( const Vector2& v, const Quaternion& quat )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat2( &v );
+    XMVECTOR q = XMLoadFloat4( &quat );
+    XMVECTOR X = XMVector3Rotate( v1, q );
+    X = XMVectorSelect( g_XMIdentityR3, X, g_XMSelect1110 ); // result.w = 1.f
+
+    Vector4 result;
+    XMStoreFloat4( &result, X );
+    return result;
+}
+
+inline void Vector4::Transform( const Vector3& v, const Quaternion& quat, Vector4& result )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( &v );
+    XMVECTOR q = XMLoadFloat4( &quat );
+    XMVECTOR X = XMVector3Rotate( v1, q );
+    X = XMVectorSelect( g_XMIdentityR3, X, g_XMSelect1110 ); // result.w = 1.f
+    XMStoreFloat4( &result, X );
+}
+
+inline Vector4 Vector4::Transform( const Vector3& v, const Quaternion& quat )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat3( &v );
+    XMVECTOR q = XMLoadFloat4( &quat );
+    XMVECTOR X = XMVector3Rotate( v1, q );
+    X = XMVectorSelect( g_XMIdentityR3, X, g_XMSelect1110 ); // result.w = 1.f
+
+    Vector4 result;
+    XMStoreFloat4( &result, X );
+    return result;
+}
+
+inline void Vector4::Transform( const Vector4& v, const Quaternion& quat, Vector4& result )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( &v );
+    XMVECTOR q = XMLoadFloat4( &quat );
+    XMVECTOR X = XMVector3Rotate( v1, q );
+    X = XMVectorSelect( v1, X, g_XMSelect1110 ); // result.w = v.w
+    XMStoreFloat4( &result, X );
+}
+
+inline Vector4 Vector4::Transform( const Vector4& v, const Quaternion& quat )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( &v );
+    XMVECTOR q = XMLoadFloat4( &quat );
+    XMVECTOR X = XMVector3Rotate( v1, q );
+    X = XMVectorSelect( v1, X, g_XMSelect1110 ); // result.w = v.w
+
+    Vector4 result;
+    XMStoreFloat4( &result, X );
+    return result;
+}
+
+inline void Vector4::Transform( const Vector4& v, const Matrix& m, Vector4& result )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( &v );
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVECTOR X = XMVector4Transform( v1, M );
+    XMStoreFloat4( &result, X );
+}
+
+inline Vector4 Vector4::Transform( const Vector4& v, const Matrix& m )
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( &v );
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVECTOR X = XMVector4Transform( v1, M );
+
+    Vector4 result;
+    XMStoreFloat4( &result, X );
+    return result;
+}
+
+_Use_decl_annotations_
+inline void Vector4::Transform( const Vector4* varray, size_t count, const Matrix& m, Vector4* resultArray )
+{
+    using namespace DirectX;
+    XMMATRIX M = XMLoadFloat4x4( &m );
+    XMVector4TransformStream( resultArray, sizeof(XMFLOAT4), varray, sizeof(XMFLOAT4), count, M );
+}
+
+
+/****************************************************************************
+ *
+ * Matrix
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparision operators
+//------------------------------------------------------------------------------
+
+inline bool Matrix::operator == ( const Matrix& M ) const
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&_11) );
+    XMVECTOR x2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&_21) );
+    XMVECTOR x3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&_31) );
+    XMVECTOR x4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&_41) );
+
+    XMVECTOR y1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._11) );
+    XMVECTOR y2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._21) );
+    XMVECTOR y3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._31) );
+    XMVECTOR y4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._41) );
+
+    return ( XMVector4Equal( x1, y1 )
+             && XMVector4Equal( x2, y2 )
+             && XMVector4Equal( x3, y3 )
+             && XMVector4Equal( x4, y4 ) ) != 0;
+}
+
+inline bool Matrix::operator != ( const Matrix& M ) const
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&_11) );
+    XMVECTOR x2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&_21) );
+    XMVECTOR x3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&_31) );
+    XMVECTOR x4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&_41) );
+
+    XMVECTOR y1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._11) );
+    XMVECTOR y2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._21) );
+    XMVECTOR y3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._31) );
+    XMVECTOR y4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._41) );
+
+    return ( XMVector4NotEqual( x1, y1 )
+             || XMVector4NotEqual( x2, y2 )
+             || XMVector4NotEqual( x3, y3 )
+             || XMVector4NotEqual( x4, y4 ) ) != 0;
+}
+
+//------------------------------------------------------------------------------
+// Assignment operators
+//------------------------------------------------------------------------------
+
+inline Matrix::Matrix(const XMFLOAT3X3& M)
+{
+    _11 = M._11; _12 = M._12; _13 = M._13; _14 = 0.f;
+    _21 = M._21; _22 = M._22; _23 = M._23; _24 = 0.f;
+    _31 = M._31; _32 = M._32; _33 = M._33; _34 = 0.f;
+    _41 = 0.f;   _42 = 0.f;   _43 = 0.f;   _44 = 1.f;
+}
+
+inline Matrix::Matrix(const XMFLOAT4X3& M)
+{
+    _11 = M._11; _12 = M._12; _13 = M._13; _14 = 0.f;
+    _21 = M._21; _22 = M._22; _23 = M._23; _24 = 0.f;
+    _31 = M._31; _32 = M._32; _33 = M._33; _34 = 0.f;
+    _41 = M._41; _42 = M._42; _43 = M._43; _44 = 1.f;
+}
+
+inline Matrix& Matrix::operator= (const XMFLOAT3X3& M)
+{
+    _11 = M._11; _12 = M._12; _13 = M._13; _14 = 0.f;
+    _21 = M._21; _22 = M._22; _23 = M._23; _24 = 0.f;
+    _31 = M._31; _32 = M._32; _33 = M._33; _34 = 0.f;
+    _41 = 0.f;   _42 = 0.f;   _43 = 0.f;   _44 = 1.f;
+    return *this;
+}
+
+inline Matrix& Matrix::operator= (const XMFLOAT4X3& M)
+{
+    _11 = M._11; _12 = M._12; _13 = M._13; _14 = 0.f;
+    _21 = M._21; _22 = M._22; _23 = M._23; _24 = 0.f;
+    _31 = M._31; _32 = M._32; _33 = M._33; _34 = 0.f;
+    _41 = M._41; _42 = M._42; _43 = M._43; _44 = 1.f;
+    return *this;
+}
+
+inline Matrix& Matrix::operator+= (const Matrix& M)
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_11) );
+    XMVECTOR x2 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_21) );
+    XMVECTOR x3 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_31) );
+    XMVECTOR x4 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_41) );
+
+    XMVECTOR y1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._11) );
+    XMVECTOR y2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._21) );
+    XMVECTOR y3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._31) );
+    XMVECTOR y4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._41) );
+
+    x1 = XMVectorAdd( x1, y1 );
+    x2 = XMVectorAdd( x2, y2 );
+    x3 = XMVectorAdd( x3, y3 );
+    x4 = XMVectorAdd( x4, y4 );
+
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_11), x1 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_21), x2 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_31), x3 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_41), x4 );
+    return *this;
+}
+
+inline Matrix& Matrix::operator-= (const Matrix& M)
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_11) );
+    XMVECTOR x2 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_21) );
+    XMVECTOR x3 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_31) );
+    XMVECTOR x4 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_41) );
+
+    XMVECTOR y1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._11) );
+    XMVECTOR y2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._21) );
+    XMVECTOR y3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._31) );
+    XMVECTOR y4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._41) );
+
+    x1 = XMVectorSubtract( x1, y1 );
+    x2 = XMVectorSubtract( x2, y2 );
+    x3 = XMVectorSubtract( x3, y3 );
+    x4 = XMVectorSubtract( x4, y4 );
+
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_11), x1 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_21), x2 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_31), x3 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_41), x4 );
+    return *this;
+}
+
+inline Matrix& Matrix::operator*= (const Matrix& M)
+{
+    using namespace DirectX;
+    XMMATRIX M1 = XMLoadFloat4x4( this );
+    XMMATRIX M2 = XMLoadFloat4x4( &M );
+    XMMATRIX X = XMMatrixMultiply( M1, M2 );
+    XMStoreFloat4x4( this, X );
+    return *this;
+}
+
+inline Matrix& Matrix::operator*= (float S)
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_11) );
+    XMVECTOR x2 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_21) );
+    XMVECTOR x3 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_31) );
+    XMVECTOR x4 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_41) );
+
+    x1 = XMVectorScale( x1, S );
+    x2 = XMVectorScale( x2, S );
+    x3 = XMVectorScale( x3, S );
+    x4 = XMVectorScale( x4, S );
+
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_11), x1 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_21), x2 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_31), x3 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_41), x4 );
+    return *this;
+}
+
+inline Matrix& Matrix::operator/= (float S)
+{
+    using namespace DirectX;
+    assert( S != 0.f );
+    XMVECTOR x1 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_11) );
+    XMVECTOR x2 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_21) );
+    XMVECTOR x3 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_31) );
+    XMVECTOR x4 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_41) );
+
+    float rs = 1.f / S;
+
+    x1 = XMVectorScale( x1, rs );
+    x2 = XMVectorScale( x2, rs );
+    x3 = XMVectorScale( x3, rs );
+    x4 = XMVectorScale( x4, rs );
+
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_11), x1 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_21), x2 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_31), x3 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_41), x4 );
+    return *this;
+}
+
+inline Matrix& Matrix::operator/= (const Matrix& M)
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_11) );
+    XMVECTOR x2 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_21) );
+    XMVECTOR x3 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_31) );
+    XMVECTOR x4 = XMLoadFloat4( reinterpret_cast<XMFLOAT4*>(&_41) );
+
+    XMVECTOR y1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._11) );
+    XMVECTOR y2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._21) );
+    XMVECTOR y3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._31) );
+    XMVECTOR y4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._41) );
+
+    x1 = XMVectorDivide( x1, y1 );
+    x2 = XMVectorDivide( x2, y2 );
+    x3 = XMVectorDivide( x3, y3 );
+    x4 = XMVectorDivide( x4, y4 );
+
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_11), x1 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_21), x2 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_31), x3 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&_41), x4 );
+    return *this;
+}
+
+//------------------------------------------------------------------------------
+// Urnary operators
+//------------------------------------------------------------------------------
+
+inline Matrix Matrix::operator- () const
+{
+    using namespace DirectX;
+    XMVECTOR v1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&_11) );
+    XMVECTOR v2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&_21) );
+    XMVECTOR v3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&_31) );
+    XMVECTOR v4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&_41) );
+
+    v1 = XMVectorNegate( v1 );
+    v2 = XMVectorNegate( v2 );
+    v3 = XMVectorNegate( v3 );
+    v4 = XMVectorNegate( v4 );
+
+    Matrix R;
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._11), v1 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._21), v2 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._31), v3 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._41), v4 );
+    return R;
+}
+
+//------------------------------------------------------------------------------
+// Binary operators
+//------------------------------------------------------------------------------
+
+inline Matrix operator+ (const Matrix& M1, const Matrix& M2)
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._11) );
+    XMVECTOR x2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._21) );
+    XMVECTOR x3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._31) );
+    XMVECTOR x4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._41) );
+
+    XMVECTOR y1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._11) );
+    XMVECTOR y2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._21) );
+    XMVECTOR y3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._31) );
+    XMVECTOR y4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._41) );
+
+    x1 = XMVectorAdd( x1, y1 );
+    x2 = XMVectorAdd( x2, y2 );
+    x3 = XMVectorAdd( x3, y3 );
+    x4 = XMVectorAdd( x4, y4 );
+
+    Matrix R;
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._11), x1 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._21), x2 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._31), x3 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._41), x4 );
+    return R;
+}
+
+inline Matrix operator- (const Matrix& M1, const Matrix& M2)
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._11) );
+    XMVECTOR x2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._21) );
+    XMVECTOR x3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._31) );
+    XMVECTOR x4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._41) );
+
+    XMVECTOR y1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._11) );
+    XMVECTOR y2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._21) );
+    XMVECTOR y3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._31) );
+    XMVECTOR y4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._41) );
+
+    x1 = XMVectorSubtract( x1, y1 );
+    x2 = XMVectorSubtract( x2, y2 );
+    x3 = XMVectorSubtract( x3, y3 );
+    x4 = XMVectorSubtract( x4, y4 );
+
+    Matrix R;
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._11), x1 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._21), x2 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._31), x3 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._41), x4 );
+    return R;
+}
+
+inline Matrix operator* (const Matrix& M1, const Matrix& M2)
+{
+    using namespace DirectX;
+    XMMATRIX m1 = XMLoadFloat4x4( &M1 );
+    XMMATRIX m2 = XMLoadFloat4x4( &M2 );
+    XMMATRIX X = XMMatrixMultiply( m1, m2 );
+
+    Matrix R;
+    XMStoreFloat4x4( &R, X );
+    return R;
+}
+
+inline Matrix operator* (const Matrix& M, float S)
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._11) );
+    XMVECTOR x2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._21) );
+    XMVECTOR x3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._31) );
+    XMVECTOR x4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._41) );
+
+    x1 = XMVectorScale( x1, S );
+    x2 = XMVectorScale( x2, S );
+    x3 = XMVectorScale( x3, S );
+    x4 = XMVectorScale( x4, S );
+
+    Matrix R;
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._11), x1 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._21), x2 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._31), x3 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._41), x4 );
+    return R;
+}
+
+inline Matrix operator/ (const Matrix& M, float S)
+{
+    using namespace DirectX;
+    assert( S != 0.f );
+
+    XMVECTOR x1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._11) );
+    XMVECTOR x2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._21) );
+    XMVECTOR x3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._31) );
+    XMVECTOR x4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._41) );
+
+    float rs = 1.f / S;
+
+    x1 = XMVectorScale( x1, rs );
+    x2 = XMVectorScale( x2, rs );
+    x3 = XMVectorScale( x3, rs );
+    x4 = XMVectorScale( x4, rs );
+
+    Matrix R;
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._11), x1 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._21), x2 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._31), x3 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._41), x4 );
+    return R;
+}
+
+inline Matrix operator/ (const Matrix& M1, const Matrix& M2)
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._11) );
+    XMVECTOR x2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._21) );
+    XMVECTOR x3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._31) );
+    XMVECTOR x4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._41) );
+
+    XMVECTOR y1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._11) );
+    XMVECTOR y2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._21) );
+    XMVECTOR y3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._31) );
+    XMVECTOR y4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._41) );
+
+    x1 = XMVectorDivide( x1, y1 );
+    x2 = XMVectorDivide( x2, y2 );
+    x3 = XMVectorDivide( x3, y3 );
+    x4 = XMVectorDivide( x4, y4 );
+
+    Matrix R;
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._11), x1 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._21), x2 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._31), x3 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._41), x4 );
+    return R;
+}
+
+inline Matrix operator* (float S, const Matrix& M)
+{
+    using namespace DirectX;
+
+    XMVECTOR x1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._11) );
+    XMVECTOR x2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._21) );
+    XMVECTOR x3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._31) );
+    XMVECTOR x4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M._41) );
+
+    x1 = XMVectorScale( x1, S );
+    x2 = XMVectorScale( x2, S );
+    x3 = XMVectorScale( x3, S );
+    x4 = XMVectorScale( x4, S );
+
+    Matrix R;
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._11), x1 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._21), x2 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._31), x3 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&R._41), x4 );
+    return R;
+}
+
+//------------------------------------------------------------------------------
+// Matrix operations
+//------------------------------------------------------------------------------
+
+inline bool Matrix::Decompose( Vector3& scale, Quaternion& rotation, Vector3& translation )
+{
+    using namespace DirectX;
+
+    XMVECTOR s, r, t;
+
+    if ( !XMMatrixDecompose( &s, &r, &t, *this ) )
+        return false;
+
+    XMStoreFloat3( &scale, s );
+    XMStoreFloat4( &rotation, r );
+    XMStoreFloat3( &translation, t );
+
+    return true;
+}
+
+inline Matrix Matrix::Transpose() const
+{
+    using namespace DirectX;
+    XMMATRIX M = XMLoadFloat4x4( this );
+    Matrix R;
+    XMStoreFloat4x4( &R, XMMatrixTranspose( M ) );
+    return R;
+}
+
+inline void Matrix::Transpose( Matrix& result ) const
+{
+    using namespace DirectX;
+    XMMATRIX M = XMLoadFloat4x4( this );
+    XMStoreFloat4x4( &result, XMMatrixTranspose( M ) );
+}
+
+inline Matrix Matrix::Invert() const
+{
+    using namespace DirectX;
+    XMMATRIX M = XMLoadFloat4x4( this );
+    Matrix R;
+    XMVECTOR det;
+    XMStoreFloat4x4( &R, XMMatrixInverse( &det, M ) );
+    return R;
+}
+
+inline void Matrix::Invert( Matrix& result ) const
+{
+    using namespace DirectX;
+    XMMATRIX M = XMLoadFloat4x4( this );
+    XMVECTOR det;
+    XMStoreFloat4x4( &result, XMMatrixInverse( &det, M ) );
+}
+
+inline float Matrix::Determinant() const
+{
+    using namespace DirectX;
+    XMMATRIX M = XMLoadFloat4x4( this );
+    return XMVectorGetX( XMMatrixDeterminant( M ) );
+}
+
+//------------------------------------------------------------------------------
+// Static functions
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline Matrix Matrix::CreateBillboard( const Vector3& object, const Vector3& cameraPosition, const Vector3& cameraUp, const Vector3* cameraForward )
+{
+    using namespace DirectX;
+    XMVECTOR O = XMLoadFloat3( &object );
+    XMVECTOR C = XMLoadFloat3( &cameraPosition );
+    XMVECTOR Z = XMVectorSubtract( O, C );
+
+    XMVECTOR N = XMVector3LengthSq( Z );
+    if ( XMVector3Less( N, g_XMEpsilon ) )
+    {
+        if ( cameraForward )
+        {
+            XMVECTOR F = XMLoadFloat3( cameraForward );
+            Z = XMVectorNegate( F );
+        }
+        else
+            Z = g_XMNegIdentityR2;
+    }
+    else
+    {
+        Z = XMVector3Normalize( Z );
+    }
+
+    XMVECTOR up = XMLoadFloat3( &cameraUp );
+    XMVECTOR X = XMVector3Cross( up, Z );
+    X = XMVector3Normalize( X );
+
+    XMVECTOR Y = XMVector3Cross( Z, X );
+
+    XMMATRIX M;
+    M.r[0] = X;
+    M.r[1] = Y;
+    M.r[2] = Z;
+    M.r[3] = XMVectorSetW( O, 1.f );
+
+    Matrix R;
+    XMStoreFloat4x4( &R, M );
+    return R;
+}
+
+_Use_decl_annotations_
+inline Matrix Matrix::CreateConstrainedBillboard( const Vector3& object, const Vector3& cameraPosition, const Vector3& rotateAxis,
+                                                  const Vector3* cameraForward, const Vector3* objectForward )
+{
+    using namespace DirectX;
+
+    static const XMVECTORF32 s_minAngle = { 0.99825467075f, 0.99825467075f, 0.99825467075f, 0.99825467075f }; // 1.0 - XMConvertToRadians( 0.1f );
+
+    XMVECTOR O = XMLoadFloat3( &object );
+    XMVECTOR C = XMLoadFloat3( &cameraPosition );
+    XMVECTOR faceDir = XMVectorSubtract( O, C );
+
+    XMVECTOR N = XMVector3LengthSq( faceDir );
+    if (XMVector3Less(N, g_XMEpsilon))
+    {
+        if (cameraForward)
+        {
+            XMVECTOR F = XMLoadFloat3( cameraForward );
+            faceDir = XMVectorNegate( F );
+        }
+        else
+            faceDir = g_XMNegIdentityR2;
+    }
+    else
+    {
+        faceDir = XMVector3Normalize( faceDir );
+    }
+
+    XMVECTOR Y = XMLoadFloat3( &rotateAxis );
+    XMVECTOR X, Z;
+
+    XMVECTOR dot = XMVectorAbs( XMVector3Dot( Y, faceDir ) );
+    if ( XMVector3Greater( dot, s_minAngle ) )
+    {
+        if ( objectForward )
+        {
+            Z = XMLoadFloat3( objectForward );
+            dot = XMVectorAbs( XMVector3Dot( Y, Z ) );
+            if ( XMVector3Greater( dot, s_minAngle ) )
+            {
+                dot = XMVectorAbs( XMVector3Dot( Y, g_XMNegIdentityR2 ) );
+                Z = ( XMVector3Greater( dot, s_minAngle ) ) ? g_XMIdentityR0 : g_XMNegIdentityR2;
+            }
+        }
+        else
+        {
+            dot = XMVectorAbs( XMVector3Dot( Y, g_XMNegIdentityR2 ) );
+            Z = ( XMVector3Greater( dot, s_minAngle ) ) ? g_XMIdentityR0 : g_XMNegIdentityR2;
+        }
+
+        X = XMVector3Cross( Y, Z );
+        X = XMVector3Normalize( X );
+
+        Z = XMVector3Cross( X, Y );
+        Z = XMVector3Normalize( Z );
+    }
+    else
+    {
+        X = XMVector3Cross( Y, faceDir );
+        X = XMVector3Normalize( X );
+
+        Z = XMVector3Cross( X, Y );
+        Z = XMVector3Normalize( Z );
+    }
+
+    XMMATRIX M;
+    M.r[0] = X;
+    M.r[1] = Y;
+    M.r[2] = Z;
+    M.r[3] = XMVectorSetW( O, 1.f );
+
+    Matrix R;
+    XMStoreFloat4x4( &R, M );
+    return R;
+}
+
+inline Matrix Matrix::CreateTranslation( const Vector3& position )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMStoreFloat4x4( &R, XMMatrixTranslation( position.x, position.y, position.z ) );
+    return R;
+}
+
+inline Matrix Matrix::CreateTranslation( float x, float y, float z )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMStoreFloat4x4( &R, XMMatrixTranslation( x, y, z ) );
+    return R;
+}
+
+inline Matrix Matrix::CreateScale( const Vector3& scales )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMStoreFloat4x4( &R, XMMatrixScaling( scales.x, scales.y, scales.z ) );
+    return R;
+}
+
+inline Matrix Matrix::CreateScale( float xs, float ys, float zs )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMStoreFloat4x4( &R, XMMatrixScaling( xs, ys, zs ) );
+    return R;
+}
+
+inline Matrix Matrix::CreateScale( float scale )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMStoreFloat4x4( &R, XMMatrixScaling( scale, scale, scale ) );
+    return R;
+}
+
+inline Matrix Matrix::CreateRotationX( float radians )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMStoreFloat4x4( &R, XMMatrixRotationX( radians ) );
+    return R;
+}
+
+inline Matrix Matrix::CreateRotationY( float radians )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMStoreFloat4x4( &R, XMMatrixRotationY( radians ) );
+    return R;
+}
+
+inline Matrix Matrix::CreateRotationZ( float radians )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMStoreFloat4x4( &R, XMMatrixRotationZ( radians ) );
+    return R;
+}
+
+inline Matrix Matrix::CreateFromAxisAngle( const Vector3& axis, float angle )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMVECTOR a = XMLoadFloat3( &axis );
+    XMStoreFloat4x4( &R, XMMatrixRotationAxis( a, angle ) );
+    return R;
+}
+
+inline Matrix Matrix::CreatePerspectiveFieldOfView( float fov, float aspectRatio, float nearPlane, float farPlane )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMStoreFloat4x4( &R, XMMatrixPerspectiveFovRH( fov, aspectRatio, nearPlane, farPlane ) );
+    return R;
+}
+
+inline Matrix Matrix::CreatePerspective( float width, float height, float nearPlane, float farPlane )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMStoreFloat4x4( &R, XMMatrixPerspectiveRH( width, height, nearPlane, farPlane ) );
+    return R;
+}
+
+inline Matrix Matrix::CreatePerspectiveOffCenter( float left, float right, float bottom, float top, float nearPlane, float farPlane )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMStoreFloat4x4( &R, XMMatrixPerspectiveOffCenterRH( left, right, bottom, top, nearPlane, farPlane ) );
+    return R;
+}
+
+inline Matrix Matrix::CreateOrthographic( float width, float height, float zNearPlane, float zFarPlane )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMStoreFloat4x4( &R, XMMatrixOrthographicRH( width, height, zNearPlane, zFarPlane ) );
+    return R;
+}
+
+inline Matrix Matrix::CreateOrthographicOffCenter( float left, float right, float bottom, float top, float zNearPlane, float zFarPlane )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMStoreFloat4x4( &R, XMMatrixOrthographicOffCenterRH( left, right, bottom, top, zNearPlane, zFarPlane ) );
+    return R;
+}
+
+inline Matrix Matrix::CreateLookAt( const Vector3& eye, const Vector3& target, const Vector3& up )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMVECTOR eyev = XMLoadFloat3( &eye );
+    XMVECTOR targetv = XMLoadFloat3( &target );
+    XMVECTOR upv = XMLoadFloat3( &up );
+    XMStoreFloat4x4( &R, XMMatrixLookAtRH( eyev, targetv, upv ) );
+    return R;
+}
+
+inline Matrix Matrix::CreateWorld( const Vector3& position, const Vector3& forward, const Vector3& up )
+{
+    using namespace DirectX;
+    XMVECTOR zaxis = XMVector3Normalize( XMVectorNegate( XMLoadFloat3( &forward ) ) );
+    XMVECTOR yaxis = XMLoadFloat3( &up );
+    XMVECTOR xaxis = XMVector3Normalize( XMVector3Cross( yaxis, zaxis ) );
+    yaxis = XMVector3Cross( zaxis, xaxis );
+    
+    Matrix R;
+    XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &R._11 ), xaxis );
+    XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &R._21 ), yaxis );
+    XMStoreFloat3( reinterpret_cast<XMFLOAT3*>( &R._31 ), zaxis );
+    R._14 = R._24 = R._34 = 0.f;
+    R._41 = position.x; R._42 = position.y; R._43 = position.z;
+    R._44 = 1.f;
+    return R;
+}
+
+inline Matrix Matrix::CreateFromQuaternion( const Quaternion& rotation )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMVECTOR quatv = XMLoadFloat4( &rotation );
+    XMStoreFloat4x4( &R, XMMatrixRotationQuaternion( quatv ) );
+    return R;
+}
+
+inline Matrix Matrix::CreateFromYawPitchRoll( float yaw, float pitch, float roll )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMStoreFloat4x4( &R, XMMatrixRotationRollPitchYaw( pitch, yaw, roll ) );
+    return R;
+}
+
+inline Matrix Matrix::CreateShadow( const Vector3& lightDir, const Plane& plane )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMVECTOR light = XMLoadFloat3( &lightDir );
+    XMVECTOR planev = XMLoadFloat4( &plane );
+    XMStoreFloat4x4( &R, XMMatrixShadow( planev, light ) );
+    return R;
+}
+
+inline Matrix Matrix::CreateReflection( const Plane& plane )
+{
+    using namespace DirectX;
+    Matrix R;
+    XMVECTOR planev = XMLoadFloat4( &plane );
+    XMStoreFloat4x4( &R, XMMatrixReflect( planev ) );
+    return R;
+}
+
+inline void Matrix::Lerp( const Matrix& M1, const Matrix& M2, float t, Matrix& result )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._11) );
+    XMVECTOR x2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._21) );
+    XMVECTOR x3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._31) );
+    XMVECTOR x4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._41) );
+
+    XMVECTOR y1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._11) );
+    XMVECTOR y2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._21) );
+    XMVECTOR y3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._31) );
+    XMVECTOR y4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._41) );
+
+    x1 = XMVectorLerp( x1, y1, t );
+    x2 = XMVectorLerp( x2, y2, t );
+    x3 = XMVectorLerp( x3, y3, t );
+    x4 = XMVectorLerp( x4, y4, t );
+
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&result._11), x1 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&result._21), x2 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&result._31), x3 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&result._41), x4 );
+}
+
+inline Matrix Matrix::Lerp( const Matrix& M1, const Matrix& M2, float t )
+{
+    using namespace DirectX;
+    XMVECTOR x1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._11) );
+    XMVECTOR x2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._21) );
+    XMVECTOR x3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._31) );
+    XMVECTOR x4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M1._41) );
+
+    XMVECTOR y1 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._11) );
+    XMVECTOR y2 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._21) );
+    XMVECTOR y3 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._31) );
+    XMVECTOR y4 = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(&M2._41) );
+
+    x1 = XMVectorLerp( x1, y1, t );
+    x2 = XMVectorLerp( x2, y2, t );
+    x3 = XMVectorLerp( x3, y3, t );
+    x4 = XMVectorLerp( x4, y4, t );
+
+    Matrix result;
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&result._11), x1 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&result._21), x2 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&result._31), x3 );
+    XMStoreFloat4( reinterpret_cast<XMFLOAT4*>(&result._41), x4 );
+    return result;
+}
+
+inline void Matrix::Transform( const Matrix& M, const Quaternion& rotation, Matrix& result )
+{
+    using namespace DirectX;
+    XMVECTOR quatv = XMLoadFloat4( &rotation );
+
+    XMMATRIX M0 = XMLoadFloat4x4( &M );
+    XMMATRIX M1 = XMMatrixRotationQuaternion( quatv );
+
+    XMStoreFloat4x4( &result, XMMatrixMultiply( M0, M1 ) );
+}
+
+inline Matrix Matrix::Transform( const Matrix& M, const Quaternion& rotation )
+{
+    using namespace DirectX;
+    XMVECTOR quatv = XMLoadFloat4( &rotation );
+
+    XMMATRIX M0 = XMLoadFloat4x4( &M );
+    XMMATRIX M1 = XMMatrixRotationQuaternion( quatv );
+
+    Matrix result;
+    XMStoreFloat4x4( &result, XMMatrixMultiply( M0, M1 ) );
+    return result;
+}
+
+
+/****************************************************************************
+ *
+ * Plane
+ *
+ ****************************************************************************/
+
+inline Plane::Plane(const Vector3& point1, const Vector3& point2, const Vector3& point3 )
+{
+    using namespace DirectX;
+    XMVECTOR P0 = XMLoadFloat3( &point1 );
+    XMVECTOR P1 = XMLoadFloat3( &point2 );
+    XMVECTOR P2 = XMLoadFloat3( &point3 );
+    XMStoreFloat4( this, XMPlaneFromPoints( P0, P1, P2 ) );
+}
+
+inline Plane::Plane(const Vector3& point, const Vector3& normal)
+{
+    using namespace DirectX;
+    XMVECTOR P = XMLoadFloat3( &point );
+    XMVECTOR N = XMLoadFloat3( &normal );
+    XMStoreFloat4( this, XMPlaneFromPointNormal( P, N ) );
+}
+
+//------------------------------------------------------------------------------
+// Comparision operators
+//------------------------------------------------------------------------------
+
+inline bool Plane::operator == ( const Plane& p ) const
+{
+    using namespace DirectX;
+    XMVECTOR p1 = XMLoadFloat4( this );
+    XMVECTOR p2 = XMLoadFloat4( &p );
+    return XMPlaneEqual( p1, p2 );
+}
+
+inline bool Plane::operator != ( const Plane& p ) const
+{
+    using namespace DirectX;
+    XMVECTOR p1 = XMLoadFloat4( this );
+    XMVECTOR p2 = XMLoadFloat4( &p );
+    return XMPlaneNotEqual( p1, p2 );
+}
+
+//------------------------------------------------------------------------------
+// Plane operations
+//------------------------------------------------------------------------------
+
+inline void Plane::Normalize()
+{
+    using namespace DirectX;
+    XMVECTOR p = XMLoadFloat4( this );
+    XMStoreFloat4( this, XMPlaneNormalize( p ) );
+}
+
+inline void Plane::Normalize( Plane& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR p = XMLoadFloat4( this );
+    XMStoreFloat4( &result, XMPlaneNormalize( p ) );
+}
+
+inline float Plane::Dot( const Vector4& v ) const
+{
+    using namespace DirectX;
+    XMVECTOR p = XMLoadFloat4( this );
+    XMVECTOR v0 = XMLoadFloat4( &v );
+    return XMVectorGetX( XMPlaneDot( p, v0 ) );
+}
+
+inline float Plane::DotCoordinate( const Vector3& position ) const
+{
+    using namespace DirectX;
+    XMVECTOR p = XMLoadFloat4( this );
+    XMVECTOR v0 = XMLoadFloat3( &position );
+    return XMVectorGetX( XMPlaneDotCoord( p, v0 ) );
+}
+
+inline float Plane::DotNormal( const Vector3& normal ) const
+{
+    using namespace DirectX;
+    XMVECTOR p = XMLoadFloat4( this );
+    XMVECTOR n0 = XMLoadFloat3( &normal );
+    return XMVectorGetX( XMPlaneDotNormal( p, n0 ) );
+}
+
+//------------------------------------------------------------------------------
+// Static functions
+//------------------------------------------------------------------------------
+
+inline void Plane::Transform( const Plane& plane, const Matrix& M, Plane& result )
+{
+    using namespace DirectX;
+    XMVECTOR p = XMLoadFloat4( &plane );
+    XMMATRIX m0 = XMLoadFloat4x4( &M );
+    XMStoreFloat4( &result, XMPlaneTransform( p, m0 ) );
+}
+
+inline Plane Plane::Transform( const Plane& plane, const Matrix& M )
+{
+    using namespace DirectX;
+    XMVECTOR p = XMLoadFloat4( &plane );
+    XMMATRIX m0 = XMLoadFloat4x4( &M );
+
+    Plane result;
+    XMStoreFloat4( &result, XMPlaneTransform( p, m0 ) );
+    return result;
+}
+
+inline void Plane::Transform( const Plane& plane, const Quaternion& rotation, Plane& result )
+{
+    using namespace DirectX;
+    XMVECTOR p = XMLoadFloat4( &plane );
+    XMVECTOR q = XMLoadFloat4( &rotation );
+    XMVECTOR X = XMVector3Rotate( p, q );
+    X = XMVectorSelect( p, X, g_XMSelect1110 ); // result.d = plane.d
+    XMStoreFloat4( &result, X );
+}
+
+inline Plane Plane::Transform( const Plane& plane, const Quaternion& rotation )
+{
+    using namespace DirectX;
+    XMVECTOR p = XMLoadFloat4( &plane );
+    XMVECTOR q = XMLoadFloat4( &rotation );
+    XMVECTOR X = XMVector3Rotate( p, q );
+    X = XMVectorSelect( p, X, g_XMSelect1110 ); // result.d = plane.d
+
+    Plane result;
+    XMStoreFloat4( &result, X );
+    return result;
+}
+
+
+/****************************************************************************
+ *
+ * Quaternion
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparision operators
+//------------------------------------------------------------------------------
+
+inline bool Quaternion::operator == ( const Quaternion& q ) const
+{
+    using namespace DirectX;
+    XMVECTOR q1 = XMLoadFloat4( this );
+    XMVECTOR q2 = XMLoadFloat4( &q );
+    return XMQuaternionEqual( q1, q2 );
+}
+
+inline bool Quaternion::operator != ( const Quaternion& q ) const
+{
+    using namespace DirectX;
+    XMVECTOR q1 = XMLoadFloat4( this );
+    XMVECTOR q2 = XMLoadFloat4( &q );
+    return XMQuaternionNotEqual( q1, q2 );
+}
+
+//------------------------------------------------------------------------------
+// Assignment operators
+//------------------------------------------------------------------------------
+
+inline Quaternion& Quaternion::operator+= (const Quaternion& q)
+{
+    using namespace DirectX;
+    XMVECTOR q1 = XMLoadFloat4( this );
+    XMVECTOR q2 = XMLoadFloat4( &q );
+    XMStoreFloat4( this, XMVectorAdd( q1, q2 ) );
+    return *this;
+}
+
+inline Quaternion& Quaternion::operator-= (const Quaternion& q)
+{
+    using namespace DirectX;
+    XMVECTOR q1 = XMLoadFloat4( this );
+    XMVECTOR q2 = XMLoadFloat4( &q );
+    XMStoreFloat4( this, XMVectorSubtract( q1, q2 ) );
+    return *this;
+}
+
+inline Quaternion& Quaternion::operator*= (const Quaternion& q)
+{
+    using namespace DirectX;
+    XMVECTOR q1 = XMLoadFloat4( this );
+    XMVECTOR q2 = XMLoadFloat4( &q );
+    XMStoreFloat4( this, XMQuaternionMultiply( q1, q2 ) );
+    return *this;
+}
+
+inline Quaternion& Quaternion::operator*= (float S)
+{
+    using namespace DirectX;
+    XMVECTOR q = XMLoadFloat4( this );
+    XMStoreFloat4( this, XMVectorScale( q, S ) );
+    return *this;
+}
+
+inline Quaternion& Quaternion::operator/= (const Quaternion& q)
+{
+    using namespace DirectX;
+    XMVECTOR q1 = XMLoadFloat4( this );
+    XMVECTOR q2 = XMLoadFloat4( &q );
+    q2 = XMQuaternionInverse( q2 );
+    XMStoreFloat4( this, XMQuaternionMultiply( q1, q2 ) );
+    return *this;
+}
+
+//------------------------------------------------------------------------------
+// Urnary operators
+//------------------------------------------------------------------------------
+
+inline Quaternion Quaternion::operator- () const
+{
+    using namespace DirectX;
+    XMVECTOR q = XMLoadFloat4( this );
+
+    Quaternion R;
+    XMStoreFloat4( &R, XMVectorNegate( q ) );
+    return R;
+}
+
+//------------------------------------------------------------------------------
+// Binary operators
+//------------------------------------------------------------------------------
+
+inline Quaternion operator+ (const Quaternion& Q1, const Quaternion& Q2)
+{
+    using namespace DirectX;
+    XMVECTOR q1 = XMLoadFloat4( &Q1 );
+    XMVECTOR q2 = XMLoadFloat4( &Q2 );
+
+    Quaternion R;
+    XMStoreFloat4( &R, XMVectorAdd( q1, q2 ) );
+    return R;
+}
+
+inline Quaternion operator- (const Quaternion& Q1, const Quaternion& Q2)
+{
+    using namespace DirectX;
+    XMVECTOR q1 = XMLoadFloat4( &Q1 );
+    XMVECTOR q2 = XMLoadFloat4( &Q2 );
+
+    Quaternion R;
+    XMStoreFloat4( &R, XMVectorSubtract( q1, q2 ) );
+    return R;
+}
+
+inline Quaternion operator* (const Quaternion& Q1, const Quaternion& Q2)
+{
+    using namespace DirectX;
+    XMVECTOR q1 = XMLoadFloat4( &Q1 );
+    XMVECTOR q2 = XMLoadFloat4( &Q2 );
+
+    Quaternion R;
+    XMStoreFloat4( &R, XMQuaternionMultiply( q1, q2 ) );
+    return R;
+}
+
+inline Quaternion operator* (const Quaternion& Q, float S)
+{
+    using namespace DirectX;
+    XMVECTOR q = XMLoadFloat4( &Q );
+
+    Quaternion R;
+    XMStoreFloat4( &R, XMVectorScale( q, S ) );
+    return R;
+}
+
+inline Quaternion operator/ (const Quaternion& Q1, const Quaternion& Q2)
+{
+    using namespace DirectX;
+    XMVECTOR q1 = XMLoadFloat4( &Q1 );
+    XMVECTOR q2 = XMLoadFloat4( &Q2 );
+    q2 = XMQuaternionInverse( q2 );
+
+    Quaternion R;
+    XMStoreFloat4( &R, XMQuaternionMultiply( q1, q2 ) );
+    return R;
+}
+
+inline Quaternion operator* (float S, const Quaternion& Q)
+{
+    using namespace DirectX;
+    XMVECTOR q1 = XMLoadFloat4( &Q );
+
+    Quaternion R;
+    XMStoreFloat4( &R, XMVectorScale( q1, S ) );
+    return R;
+}
+
+//------------------------------------------------------------------------------
+// Quaternion operations
+//------------------------------------------------------------------------------
+
+inline float Quaternion::Length() const
+{
+    using namespace DirectX;
+    XMVECTOR q = XMLoadFloat4( this );
+    return XMVectorGetX( XMQuaternionLength( q ) );
+}
+
+inline float Quaternion::LengthSquared() const
+{
+    using namespace DirectX;
+    XMVECTOR q = XMLoadFloat4( this );
+    return XMVectorGetX( XMQuaternionLengthSq( q ) );
+}
+
+inline void Quaternion::Normalize()
+{
+    using namespace DirectX;
+    XMVECTOR q = XMLoadFloat4( this );
+    XMStoreFloat4( this, XMQuaternionNormalize( q ) );
+}
+
+inline void Quaternion::Normalize( Quaternion& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR q = XMLoadFloat4( this );
+    XMStoreFloat4( &result, XMQuaternionNormalize( q ) );
+}
+
+inline void Quaternion::Conjugate()
+{
+    using namespace DirectX;
+    XMVECTOR q = XMLoadFloat4( this );
+    XMStoreFloat4( this, XMQuaternionConjugate( q ) );
+}
+
+inline void Quaternion::Conjugate( Quaternion& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR q = XMLoadFloat4( this );
+    XMStoreFloat4( &result, XMQuaternionConjugate( q ) );
+}
+
+inline void Quaternion::Inverse( Quaternion& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR q = XMLoadFloat4( this );
+    XMStoreFloat4( &result, XMQuaternionInverse( q ) );
+}
+
+inline float Quaternion::Dot( const Quaternion& q ) const
+{
+    using namespace DirectX;
+    XMVECTOR q1 = XMLoadFloat4( this );
+    XMVECTOR q2 = XMLoadFloat4( &q );
+    return XMVectorGetX( XMQuaternionDot( q1, q2 ) );
+}
+
+//------------------------------------------------------------------------------
+// Static functions
+//------------------------------------------------------------------------------
+
+inline Quaternion Quaternion::CreateFromAxisAngle( const Vector3& axis, float angle )
+{
+    using namespace DirectX;
+    XMVECTOR a = XMLoadFloat3( &axis );
+
+    Quaternion R;
+    XMStoreFloat4( &R, XMQuaternionRotationAxis( a, angle ) );
+    return R;
+}
+
+inline Quaternion Quaternion::CreateFromYawPitchRoll( float yaw, float pitch, float roll )
+{
+    using namespace DirectX;
+    Quaternion R;
+    XMStoreFloat4( &R, XMQuaternionRotationRollPitchYaw( pitch, yaw, roll ) );
+    return R;
+}
+
+inline Quaternion Quaternion::CreateFromRotationMatrix( const Matrix& M )
+{
+    using namespace DirectX;
+    XMMATRIX M0 = XMLoadFloat4x4( &M );
+
+    Quaternion R;
+    XMStoreFloat4( &R, XMQuaternionRotationMatrix( M0 ) );
+    return R;
+}
+
+inline void Quaternion::Lerp( const Quaternion& q1, const Quaternion& q2, float t, Quaternion& result )
+{
+    using namespace DirectX;
+    XMVECTOR Q0 = XMLoadFloat4( &q1 );
+    XMVECTOR Q1 = XMLoadFloat4( &q2 );
+
+    XMVECTOR dot = XMVector4Dot( Q0, Q1 );
+
+    XMVECTOR R;
+    if ( XMVector4GreaterOrEqual( dot, XMVectorZero() ) )
+    {
+        R = XMVectorLerp( Q0, Q1, t );
+    }
+    else
+    {
+        XMVECTOR tv = XMVectorReplicate( t );
+        XMVECTOR t1v = XMVectorReplicate( 1.f - t );
+        XMVECTOR X0 = XMVectorMultiply( Q0, t1v );
+        XMVECTOR X1 = XMVectorMultiply( Q1, tv );
+        R = XMVectorSubtract( X0, X1 );
+    }
+
+    XMStoreFloat4( &result, XMQuaternionNormalize( R ) );
+}
+
+inline Quaternion Quaternion::Lerp( const Quaternion& q1, const Quaternion& q2, float t )
+{
+    using namespace DirectX;
+    XMVECTOR Q0 = XMLoadFloat4( &q1 );
+    XMVECTOR Q1 = XMLoadFloat4( &q2 );
+
+    XMVECTOR dot = XMVector4Dot( Q0, Q1 );
+
+    XMVECTOR R;
+    if ( XMVector4GreaterOrEqual( dot, XMVectorZero() ) )
+    {
+        R = XMVectorLerp( Q0, Q1, t );
+    }
+    else
+    {
+        XMVECTOR tv = XMVectorReplicate( t );
+        XMVECTOR t1v = XMVectorReplicate( 1.f - t );
+        XMVECTOR X0 = XMVectorMultiply( Q0, t1v );
+        XMVECTOR X1 = XMVectorMultiply( Q1, tv );
+        R = XMVectorSubtract( X0, X1 );
+    }
+
+    Quaternion result;
+    XMStoreFloat4( &result, XMQuaternionNormalize( R ) );
+    return result;
+}
+
+inline void Quaternion::Slerp( const Quaternion& q1, const Quaternion& q2, float t, Quaternion& result )
+{
+    using namespace DirectX;
+    XMVECTOR Q0 = XMLoadFloat4( &q1 );
+    XMVECTOR Q1 = XMLoadFloat4( &q2 );
+    XMStoreFloat4( &result, XMQuaternionSlerp( Q0, Q1, t ) );
+}
+
+inline Quaternion Quaternion::Slerp( const Quaternion& q1, const Quaternion& q2, float t )
+{
+    using namespace DirectX;
+    XMVECTOR Q0 = XMLoadFloat4( &q1 );
+    XMVECTOR Q1 = XMLoadFloat4( &q2 );
+
+    Quaternion result;
+    XMStoreFloat4( &result, XMQuaternionSlerp( Q0, Q1, t ) );
+    return result;
+}
+
+inline void Quaternion::Concatenate( const Quaternion& q1, const Quaternion& q2, Quaternion& result )
+{
+    using namespace DirectX;
+    XMVECTOR Q0 = XMLoadFloat4( &q1 );
+    XMVECTOR Q1 = XMLoadFloat4( &q2 );
+    XMStoreFloat4( &result, XMQuaternionMultiply( Q1, Q0 ) );
+}
+
+inline Quaternion Quaternion::Concatenate( const Quaternion& q1, const Quaternion& q2 )
+{
+    using namespace DirectX;
+    XMVECTOR Q0 = XMLoadFloat4( &q1 );
+    XMVECTOR Q1 = XMLoadFloat4( &q2 );
+
+    Quaternion result;
+    XMStoreFloat4( &result, XMQuaternionMultiply( Q1, Q0 ) );
+    return result;
+}
+
+
+/****************************************************************************
+ *
+ * Color
+ *
+ ****************************************************************************/
+
+inline Color::Color( const DirectX::PackedVector::XMCOLOR& Packed )
+{
+    using namespace DirectX;
+    XMStoreFloat4( this, PackedVector::XMLoadColor( &Packed ) );
+}
+
+inline Color::Color( const DirectX::PackedVector::XMUBYTEN4& Packed )
+{
+    using namespace DirectX;
+    XMStoreFloat4( this, PackedVector::XMLoadUByteN4( &Packed ) );
+}
+
+//------------------------------------------------------------------------------
+// Comparision operators
+//------------------------------------------------------------------------------
+inline bool Color::operator == ( const Color& c ) const
+{
+    using namespace DirectX;
+    XMVECTOR c1 = XMLoadFloat4( this );
+    XMVECTOR c2 = XMLoadFloat4( &c );
+    return XMColorEqual( c1, c2 );
+}
+
+inline bool Color::operator != ( const Color& c ) const
+{
+    using namespace DirectX;
+    XMVECTOR c1 = XMLoadFloat4( this );
+    XMVECTOR c2 = XMLoadFloat4( &c );
+    return XMColorNotEqual( c1, c2 );
+}
+
+//------------------------------------------------------------------------------
+// Assignment operators
+//------------------------------------------------------------------------------
+
+inline Color& Color::operator= (const DirectX::PackedVector::XMCOLOR& Packed)
+{
+    using namespace DirectX;
+    XMStoreFloat4( this, PackedVector::XMLoadColor( &Packed ) );
+    return *this;
+}
+
+inline Color& Color::operator= (const DirectX::PackedVector::XMUBYTEN4& Packed)
+{
+    using namespace DirectX;
+    XMStoreFloat4( this, PackedVector::XMLoadUByteN4( &Packed ) );
+    return *this;
+}
+
+inline Color& Color::operator+= (const Color& c)
+{
+    using namespace DirectX;
+    XMVECTOR c1 = XMLoadFloat4( this );
+    XMVECTOR c2 = XMLoadFloat4( &c );
+    XMStoreFloat4( this, XMVectorAdd( c1, c2 ) );
+    return *this;
+}
+
+inline Color& Color::operator-= (const Color& c)
+{
+    using namespace DirectX;
+    XMVECTOR c1 = XMLoadFloat4( this );
+    XMVECTOR c2 = XMLoadFloat4( &c );
+    XMStoreFloat4( this, XMVectorSubtract( c1, c2 ) );
+    return *this;
+}
+
+inline Color& Color::operator*= (const Color& c)
+{
+    using namespace DirectX;
+    XMVECTOR c1 = XMLoadFloat4( this );
+    XMVECTOR c2 = XMLoadFloat4( &c );
+    XMStoreFloat4( this, XMVectorMultiply( c1, c2 ) );
+    return *this;
+}
+
+inline Color& Color::operator*= (float S)
+{
+    using namespace DirectX;
+    XMVECTOR c = XMLoadFloat4( this );
+    XMStoreFloat4( this, XMVectorScale( c, S ) );
+    return *this;
+}
+
+inline Color& Color::operator/= (const Color& c)
+{
+    using namespace DirectX;
+    XMVECTOR c1 = XMLoadFloat4( this );
+    XMVECTOR c2 = XMLoadFloat4( &c );
+    XMStoreFloat4( this, XMVectorDivide( c1, c2 ) );
+    return *this;
+}
+
+//------------------------------------------------------------------------------
+// Urnary operators
+//------------------------------------------------------------------------------
+
+inline Color Color::operator- () const
+{
+    using namespace DirectX;
+    XMVECTOR c = XMLoadFloat4( this );
+    Color R;
+    XMStoreFloat4( &R, XMVectorNegate( c ) );
+    return R;
+}
+
+//------------------------------------------------------------------------------
+// Binary operators
+//------------------------------------------------------------------------------
+
+inline Color operator+ (const Color& C1, const Color& C2)
+{
+    using namespace DirectX;
+    XMVECTOR c1 = XMLoadFloat4( &C1 );
+    XMVECTOR c2 = XMLoadFloat4( &C2 );
+    Color R;
+    XMStoreFloat4( &R, XMVectorAdd( c1, c2 ) );
+    return R;
+}
+
+inline Color operator- (const Color& C1, const Color& C2)
+{
+    using namespace DirectX;
+    XMVECTOR c1 = XMLoadFloat4( &C1 );
+    XMVECTOR c2 = XMLoadFloat4( &C2 );
+    Color R;
+    XMStoreFloat4( &R, XMVectorSubtract( c1, c2 ) );
+    return R;
+}
+
+inline Color operator* (const Color& C1, const Color& C2)
+{
+    using namespace DirectX;
+    XMVECTOR c1 = XMLoadFloat4( &C1 );
+    XMVECTOR c2 = XMLoadFloat4( &C2 );
+    Color R;
+    XMStoreFloat4( &R, XMVectorMultiply( c1, c2 ) );
+    return R;
+}
+
+inline Color operator* (const Color& C, float S)
+{
+    using namespace DirectX;
+    XMVECTOR c = XMLoadFloat4( &C );
+    Color R;
+    XMStoreFloat4( &R, XMVectorScale( c, S ) );
+    return R;
+}
+
+inline Color operator/ (const Color& C1, const Color& C2)
+{
+    using namespace DirectX;
+    XMVECTOR c1 = XMLoadFloat4( &C1 );
+    XMVECTOR c2 = XMLoadFloat4( &C2 );
+    Color R;
+    XMStoreFloat4( &R, XMVectorDivide( c1, c2 ) );
+    return R;
+}
+
+inline Color operator* (float S, const Color& C)
+{
+    using namespace DirectX;
+    XMVECTOR c1 = XMLoadFloat4( &C );
+    Color R;
+    XMStoreFloat4( &R, XMVectorScale( c1, S ) );
+    return R;
+}
+
+//------------------------------------------------------------------------------
+// Color operations
+//------------------------------------------------------------------------------
+
+inline DirectX::PackedVector::XMCOLOR Color::BGRA() const
+{
+    using namespace DirectX;
+    XMVECTOR clr = XMLoadFloat4( this );
+    PackedVector::XMCOLOR Packed;
+    PackedVector::XMStoreColor( &Packed, clr );
+    return Packed;
+}
+
+inline DirectX::PackedVector::XMUBYTEN4 Color::RGBA() const
+{
+    using namespace DirectX;
+    XMVECTOR clr = XMLoadFloat4( this );
+    PackedVector::XMUBYTEN4 Packed;
+    PackedVector::XMStoreUByteN4( &Packed, clr );
+    return Packed;
+}
+
+inline Vector3 Color::ToVector3() const
+{
+    return Vector3( x, y, z );
+}
+
+inline Vector4 Color::ToVector4() const
+{
+    return Vector4( x, y, z, w );
+}
+
+inline void Color::Negate()
+{
+    using namespace DirectX;
+    XMVECTOR c = XMLoadFloat4( this );
+    XMStoreFloat4( this, XMColorNegative( c) );
+}
+
+inline void Color::Negate( Color& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR c = XMLoadFloat4( this );
+    XMStoreFloat4( &result, XMColorNegative( c ) );
+}
+
+inline void Color::Saturate()
+{
+    using namespace DirectX;
+    XMVECTOR c = XMLoadFloat4( this );
+    XMStoreFloat4( this, XMVectorSaturate( c ) );
+}
+
+inline void Color::Saturate( Color& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR c = XMLoadFloat4( this );
+    XMStoreFloat4( &result, XMVectorSaturate( c ) );
+}
+
+inline void Color::Premultiply()
+{
+    using namespace DirectX;
+    XMVECTOR c = XMLoadFloat4( this );
+    XMVECTOR a = XMVectorSplatW( c );
+    a = XMVectorSelect( g_XMIdentityR3, a, g_XMSelect1110 );
+    XMStoreFloat4( this, XMVectorMultiply( c, a ) );
+}
+
+inline void Color::Premultiply( Color& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR c = XMLoadFloat4( this );
+    XMVECTOR a = XMVectorSplatW( c );
+    a = XMVectorSelect( g_XMIdentityR3, a, g_XMSelect1110 );
+    XMStoreFloat4( &result, XMVectorMultiply( c, a ) );
+}
+
+inline void Color::AdjustSaturation( float sat )
+{
+    using namespace DirectX;
+    XMVECTOR c = XMLoadFloat4( this );
+    XMStoreFloat4( this, XMColorAdjustSaturation( c, sat ) );
+}
+
+inline void Color::AdjustSaturation( float sat, Color& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR c = XMLoadFloat4( this );
+    XMStoreFloat4( &result, XMColorAdjustSaturation( c, sat ) );
+}
+
+inline void Color::AdjustContrast( float contrast )
+{
+    using namespace DirectX;
+    XMVECTOR c = XMLoadFloat4( this );
+    XMStoreFloat4( this, XMColorAdjustContrast( c, contrast ) );
+}
+
+inline void Color::AdjustContrast( float contrast, Color& result ) const
+{
+    using namespace DirectX;
+    XMVECTOR c = XMLoadFloat4( this );
+    XMStoreFloat4( &result, XMColorAdjustContrast( c, contrast ) );
+}
+
+//------------------------------------------------------------------------------
+// Static functions
+//------------------------------------------------------------------------------
+
+inline void Color::Modulate( const Color& c1, const Color& c2, Color& result )
+{
+    using namespace DirectX;
+    XMVECTOR C0 = XMLoadFloat4( &c1 );
+    XMVECTOR C1 = XMLoadFloat4( &c2 );
+    XMStoreFloat4( &result, XMColorModulate( C0, C1 ) );
+}
+
+inline Color Color::Modulate( const Color& c1, const Color& c2 )
+{
+    using namespace DirectX;
+    XMVECTOR C0 = XMLoadFloat4( &c1 );
+    XMVECTOR C1 = XMLoadFloat4( &c2 );
+
+    Color result;
+    XMStoreFloat4( &result, XMColorModulate( C0, C1 ) );
+    return result;
+}
+
+inline void Color::Lerp( const Color& c1, const Color& c2, float t, Color& result )
+{
+    using namespace DirectX;
+    XMVECTOR C0 = XMLoadFloat4( &c1 );
+    XMVECTOR C1 = XMLoadFloat4( &c2 );
+    XMStoreFloat4( &result, XMVectorLerp( C0, C1, t ) );
+}
+
+inline Color Color::Lerp( const Color& c1, const Color& c2, float t )
+{
+    using namespace DirectX;
+    XMVECTOR C0 = XMLoadFloat4( &c1 );
+    XMVECTOR C1 = XMLoadFloat4( &c2 );
+
+    Color result;
+    XMStoreFloat4( &result, XMVectorLerp( C0, C1, t ) );
+    return result;
+}
+
+
+/****************************************************************************
+ *
+ * Ray
+ *
+ ****************************************************************************/
+
+//-----------------------------------------------------------------------------
+// Comparision operators
+//------------------------------------------------------------------------------
+inline bool Ray::operator == ( const Ray& r ) const
+{
+    using namespace DirectX;
+    XMVECTOR r1p = XMLoadFloat3( &position );
+    XMVECTOR r2p = XMLoadFloat3( &r.position );
+    XMVECTOR r1d = XMLoadFloat3( &direction );
+    XMVECTOR r2d = XMLoadFloat3( &r.direction );
+    return XMVector3Equal( r1p, r2p ) && XMVector3Equal( r1d, r2d );
+}
+
+inline bool Ray::operator != ( const Ray& r ) const
+{
+    using namespace DirectX;
+    XMVECTOR r1p = XMLoadFloat3( &position );
+    XMVECTOR r2p = XMLoadFloat3( &r.position );
+    XMVECTOR r1d = XMLoadFloat3( &direction );
+    XMVECTOR r2d = XMLoadFloat3( &r.direction );
+    return XMVector3NotEqual( r1p, r2p ) && XMVector3NotEqual( r1d, r2d );
+}
+
+//-----------------------------------------------------------------------------
+// Ray operators
+//------------------------------------------------------------------------------
+
+inline bool Ray::Intersects( const BoundingSphere& sphere, _Out_ float& Dist ) const
+{
+    return sphere.Intersects( position, direction, Dist );
+}
+
+inline bool Ray::Intersects( const BoundingBox& box, _Out_ float& Dist ) const
+{
+    return box.Intersects( position, direction, Dist );
+}
+
+inline bool Ray::Intersects( const Vector3& tri0, const Vector3& tri1, const Vector3& tri2, _Out_ float& Dist ) const
+{
+    return DirectX::TriangleTests::Intersects( position, direction, tri0, tri1, tri2, Dist );
+}
+
+inline bool Ray::Intersects( const Plane& plane, _Out_ float& Dist ) const
+{
+    using namespace DirectX;
+
+    XMVECTOR p = XMLoadFloat4( &plane );
+    XMVECTOR dir = XMLoadFloat3( &direction );
+
+    XMVECTOR nd = XMPlaneDotNormal( p, dir );
+
+    if ( XMVector3LessOrEqual( XMVectorAbs( nd ), g_RayEpsilon ) )
+    {
+        Dist = 0.f;
+        return false;
+    }
+    else
+    {
+        // t = -(dot(n,origin) + D) / dot(n,dir)
+        XMVECTOR pos = XMLoadFloat3( &position );
+        XMVECTOR v = XMPlaneDotNormal( p, pos );
+        v = XMVectorAdd( v, XMVectorSplatW(p) );
+        v = XMVectorDivide( v, nd );
+        float dist = - XMVectorGetX( v );
+        if (dist < 0)
+        {
+            Dist = 0.f;
+            return false;
+        }
+        else
+        {
+            Dist = dist;
+            return true;
+        }
+    }
+}
+
+
+/****************************************************************************
+ *
+ * Viewport
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparision operators
+//------------------------------------------------------------------------------
+
+inline bool Viewport::operator == ( const Viewport& vp ) const
+{
+    return (x == vp.x && y == vp.y
+            && width == vp.width && height == vp.height
+            && minDepth == vp.minDepth && maxDepth == vp.maxDepth);
+}
+
+inline bool Viewport::operator != ( const Viewport& vp ) const
+{
+    return (x != vp.x || y != vp.y
+            || width != vp.width || height != vp.height
+            || minDepth != vp.minDepth || maxDepth != vp.maxDepth);
+}
+
+//------------------------------------------------------------------------------
+// Assignment operators
+//------------------------------------------------------------------------------
+
+inline Viewport& Viewport::operator= (const Viewport& vp)
+{
+    x = vp.x; y = vp.y;
+    width = vp.width; height = vp.height;
+    minDepth = vp.minDepth; maxDepth = vp.maxDepth;
+    return *this;
+}
+
+inline Viewport& Viewport::operator= (const RECT& rct)
+{
+    x = float(rct.left); y = float(rct.top);
+    width = float(rct.right - rct.left);
+    height = float(rct.bottom - rct.top);
+    minDepth = 0.f; maxDepth = 1.f;
+    return *this;
+}
+
+inline Viewport& Viewport::operator= (const D3D11_VIEWPORT& vp)
+{
+    x = vp.TopLeftX; y = vp.TopLeftY;
+    width = vp.Width; height = vp.Height;
+    minDepth = vp.MinDepth; maxDepth = vp.MaxDepth;
+    return *this;
+}
+
+//------------------------------------------------------------------------------
+// Viewport operations
+//------------------------------------------------------------------------------
+
+inline float Viewport::AspectRatio() const
+{
+    if (width == 0.f || height == 0.f)
+        return 0.f;
+
+    return (width / height);
+}
+
+inline Vector3 Viewport::Project(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world) const
+{
+    using namespace DirectX;
+    XMVECTOR v = XMLoadFloat3(&p);
+    XMMATRIX projection = XMLoadFloat4x4(&proj);
+    v = XMVector3Project(v, x, y, width, height, minDepth, maxDepth, projection, view, world);
+    Vector3 result;
+    XMStoreFloat3(&result, v);
+    return result;
+}
+
+inline void Viewport::Project(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world, Vector3& result) const
+{
+    using namespace DirectX;
+    XMVECTOR v = XMLoadFloat3(&p);
+    XMMATRIX projection = XMLoadFloat4x4(&proj);
+    v = XMVector3Project(v, x, y, width, height, minDepth, maxDepth, projection, view, world);
+    XMStoreFloat3(&result, v);
+}
+
+inline Vector3 Viewport::Unproject(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world) const
+{
+    using namespace DirectX;
+    XMVECTOR v = XMLoadFloat3(&p);
+    XMMATRIX projection = XMLoadFloat4x4(&proj);
+    v = XMVector3Unproject(v, x, y, width, height, minDepth, maxDepth, projection, view, world);
+    Vector3 result;
+    XMStoreFloat3(&result, v);
+    return result;
+}
+
+inline void Viewport::Unproject(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world, Vector3& result) const
+{
+    using namespace DirectX;
+    XMVECTOR v = XMLoadFloat3(&p);
+    XMMATRIX projection = XMLoadFloat4x4(&proj);
+    v = XMVector3Unproject(v, x, y, width, height, minDepth, maxDepth, projection, view, world);
+    XMStoreFloat3(&result, v);
+}
diff --git a/Windows/DirectXTK/SpriteBatch.h b/Windows/DirectXTK/SpriteBatch.h
new file mode 100644
index 0000000..9c974e1
--- /dev/null
+++ b/Windows/DirectXTK/SpriteBatch.h
@@ -0,0 +1,121 @@
+//--------------------------------------------------------------------------------------
+// File: SpriteBatch.h
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_XBOX_ONE) && defined(_TITLE)
+#include <d3d11_x.h>
+#else
+#include <d3d11_1.h>
+#endif
+
+// VS 2010/2012 do not support =default =delete
+#ifndef DIRECTX_CTOR_DEFAULT
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+#define DIRECTX_CTOR_DEFAULT {}
+#define DIRECTX_CTOR_DELETE ;
+#else
+#define DIRECTX_CTOR_DEFAULT =default;
+#define DIRECTX_CTOR_DELETE =delete;
+#endif
+#endif
+
+#include <DirectXMath.h>
+#include <DirectXColors.h>
+#include <functional>
+#include <memory>
+
+// VS 2010 doesn't support explicit calling convention for std::function
+#ifndef DIRECTX_STD_CALLCONV
+#if defined(_MSC_VER) && (_MSC_VER < 1700)
+#define DIRECTX_STD_CALLCONV
+#else
+#define DIRECTX_STD_CALLCONV __cdecl
+#endif
+#endif
+
+namespace DirectX
+{
+    #if (DIRECTX_MATH_VERSION < 305) && !defined(XM_CALLCONV)
+    #define XM_CALLCONV __fastcall
+    typedef const XMVECTOR& HXMVECTOR;
+    typedef const XMMATRIX& FXMMATRIX;
+    #endif
+
+    enum SpriteSortMode
+    {
+        SpriteSortMode_Deferred,
+        SpriteSortMode_Immediate,
+        SpriteSortMode_Texture,
+        SpriteSortMode_BackToFront,
+        SpriteSortMode_FrontToBack,
+    };
+    
+    
+    enum SpriteEffects
+    {
+        SpriteEffects_None = 0,
+        SpriteEffects_FlipHorizontally = 1,
+        SpriteEffects_FlipVertically = 2,
+        SpriteEffects_FlipBoth = SpriteEffects_FlipHorizontally | SpriteEffects_FlipVertically,
+    };
+
+    
+    class SpriteBatch
+    {
+    public:
+        explicit SpriteBatch(_In_ ID3D11DeviceContext* deviceContext);
+        SpriteBatch(SpriteBatch&& moveFrom);
+        SpriteBatch& operator= (SpriteBatch&& moveFrom);
+        virtual ~SpriteBatch();
+
+        // Begin/End a batch of sprite drawing operations.
+        void XM_CALLCONV Begin(SpriteSortMode sortMode = SpriteSortMode_Deferred, _In_opt_ ID3D11BlendState* blendState = nullptr, _In_opt_ ID3D11SamplerState* samplerState = nullptr, _In_opt_ ID3D11DepthStencilState* depthStencilState = nullptr, _In_opt_ ID3D11RasterizerState* rasterizerState = nullptr,
+                               _In_opt_ std::function<void DIRECTX_STD_CALLCONV()> setCustomShaders = nullptr, FXMMATRIX transformMatrix = MatrixIdentity);
+        void __cdecl End();
+
+        // Draw overloads specifying position, origin and scale as XMFLOAT2.
+        void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, XMFLOAT2 const& position, FXMVECTOR color = Colors::White);
+        void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, XMFLOAT2 const& position, _In_opt_ RECT const* sourceRectangle, FXMVECTOR color = Colors::White, float rotation = 0, XMFLOAT2 const& origin = Float2Zero, float scale = 1, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0);
+        void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, XMFLOAT2 const& position, _In_opt_ RECT const* sourceRectangle, FXMVECTOR color, float rotation, XMFLOAT2 const& origin, XMFLOAT2 const& scale, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0);
+
+        // Draw overloads specifying position, origin and scale via the first two components of an XMVECTOR.
+        void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, FXMVECTOR position, FXMVECTOR color = Colors::White);
+        void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, FXMVECTOR position, _In_opt_ RECT const* sourceRectangle, FXMVECTOR color = Colors::White, float rotation = 0, FXMVECTOR origin = g_XMZero, float scale = 1, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0);
+        void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, FXMVECTOR position, _In_opt_ RECT const* sourceRectangle, FXMVECTOR color, float rotation, FXMVECTOR origin, GXMVECTOR scale, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0);
+
+        // Draw overloads specifying position as a RECT.
+        void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, RECT const& destinationRectangle, FXMVECTOR color = Colors::White);
+        void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, RECT const& destinationRectangle, _In_opt_ RECT const* sourceRectangle, FXMVECTOR color = Colors::White, float rotation = 0, XMFLOAT2 const& origin = Float2Zero, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0);
+
+        // Rotation mode to be applied to the sprite transformation
+        void __cdecl SetRotation( DXGI_MODE_ROTATION mode );
+        DXGI_MODE_ROTATION __cdecl GetRotation() const;
+
+        // Set viewport for sprite transformation
+        void __cdecl SetViewport( const D3D11_VIEWPORT& viewPort );
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        static const XMMATRIX MatrixIdentity;
+        static const XMFLOAT2 Float2Zero;
+
+        // Prevent copying.
+        SpriteBatch(SpriteBatch const&) DIRECTX_CTOR_DELETE
+        SpriteBatch& operator= (SpriteBatch const&) DIRECTX_CTOR_DELETE
+    };
+}
diff --git a/Windows/DirectXTK/SpriteFont.h b/Windows/DirectXTK/SpriteFont.h
new file mode 100644
index 0000000..47db082
--- /dev/null
+++ b/Windows/DirectXTK/SpriteFont.h
@@ -0,0 +1,89 @@
+//--------------------------------------------------------------------------------------
+// File: SpriteFont.h
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "SpriteBatch.h"
+
+// VS 2010/2012 do not support =default =delete
+#ifndef DIRECTX_CTOR_DEFAULT
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+#define DIRECTX_CTOR_DEFAULT {}
+#define DIRECTX_CTOR_DELETE ;
+#else
+#define DIRECTX_CTOR_DEFAULT =default;
+#define DIRECTX_CTOR_DELETE =delete;
+#endif
+#endif
+
+
+namespace DirectX
+{
+    class SpriteFont
+    {
+    public:
+        struct Glyph;
+
+        SpriteFont(_In_ ID3D11Device* device, _In_z_ wchar_t const* fileName);
+        SpriteFont(_In_ ID3D11Device* device, _In_reads_bytes_(dataSize) uint8_t const* dataBlob, _In_ size_t dataSize);
+        SpriteFont(_In_ ID3D11ShaderResourceView* texture, _In_reads_(glyphCount) Glyph const* glyphs, _In_ size_t glyphCount, _In_ float lineSpacing);
+
+        SpriteFont(SpriteFont&& moveFrom);
+        SpriteFont& operator= (SpriteFont&& moveFrom);
+        virtual ~SpriteFont();
+
+        void XM_CALLCONV DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ wchar_t const* text, XMFLOAT2 const& position, FXMVECTOR color = Colors::White, float rotation = 0, XMFLOAT2 const& origin = Float2Zero, float scale = 1, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0) const;
+        void XM_CALLCONV DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ wchar_t const* text, XMFLOAT2 const& position, FXMVECTOR color, float rotation, XMFLOAT2 const& origin, XMFLOAT2 const& scale, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0) const;
+        void XM_CALLCONV DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ wchar_t const* text, FXMVECTOR position, FXMVECTOR color = Colors::White, float rotation = 0, FXMVECTOR origin = g_XMZero, float scale = 1, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0) const;
+        void XM_CALLCONV DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ wchar_t const* text, FXMVECTOR position, FXMVECTOR color, float rotation, FXMVECTOR origin, GXMVECTOR scale, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0) const;
+
+        XMVECTOR XM_CALLCONV MeasureString(_In_z_ wchar_t const* text) const;
+
+        // Spacing properties
+        float __cdecl GetLineSpacing() const;
+        void __cdecl SetLineSpacing(float spacing);
+
+        // Font properties
+        wchar_t __cdecl GetDefaultCharacter() const;
+        void __cdecl SetDefaultCharacter(wchar_t character);
+
+        bool __cdecl ContainsCharacter(wchar_t character) const;
+
+        // Custom layout/rendering
+        Glyph const* __cdecl FindGlyph(wchar_t character) const;
+        void GetSpriteSheet( ID3D11ShaderResourceView** texture ) const;
+
+        // Describes a single character glyph.
+        struct Glyph
+        {
+            uint32_t Character;
+            RECT Subrect;
+            float XOffset;
+            float YOffset;
+            float XAdvance;
+        };
+
+
+    private:
+        // Private implementation.
+        class Impl;
+
+        std::unique_ptr<Impl> pImpl;
+
+        static const XMFLOAT2 Float2Zero;
+
+        // Prevent copying.
+        SpriteFont(SpriteFont const&) DIRECTX_CTOR_DELETE
+        SpriteFont& operator= (SpriteFont const&) DIRECTX_CTOR_DELETE
+    };
+}
diff --git a/Windows/DirectXTK/VertexTypes.h b/Windows/DirectXTK/VertexTypes.h
new file mode 100644
index 0000000..0ee7629
--- /dev/null
+++ b/Windows/DirectXTK/VertexTypes.h
@@ -0,0 +1,333 @@
+//--------------------------------------------------------------------------------------
+// File: VertexTypes.h
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_XBOX_ONE) && defined(_TITLE)
+#include <d3d11_x.h>
+#else
+#include <d3d11_1.h>
+#endif
+
+// VS 2010/2012 do not support =default =delete
+#ifndef DIRECTX_CTOR_DEFAULT
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+#define DIRECTX_CTOR_DEFAULT {}
+#define DIRECTX_CTOR_DELETE ;
+#else
+#define DIRECTX_CTOR_DEFAULT =default;
+#define DIRECTX_CTOR_DELETE =delete;
+#endif
+#endif
+
+#include <DirectXMath.h>
+
+
+namespace DirectX
+{
+    #if (DIRECTX_MATH_VERSION < 305) && !defined(XM_CALLCONV)
+    #define XM_CALLCONV __fastcall
+    typedef const XMVECTOR& HXMVECTOR;
+    typedef const XMMATRIX& FXMMATRIX;
+    #endif
+
+    // Vertex struct holding position and color information.
+    struct VertexPositionColor
+    {
+        VertexPositionColor() DIRECTX_CTOR_DEFAULT
+
+        VertexPositionColor(XMFLOAT3 const& position, XMFLOAT4 const& color)
+          : position(position),
+            color(color)
+        { }
+
+        VertexPositionColor(FXMVECTOR position, FXMVECTOR color)
+        {
+            XMStoreFloat3(&this->position, position);
+            XMStoreFloat4(&this->color, color);
+        }
+
+        XMFLOAT3 position;
+        XMFLOAT4 color;
+
+        static const int InputElementCount = 2;
+        static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount];
+    };
+
+
+    // Vertex struct holding position and texture mapping information.
+    struct VertexPositionTexture
+    {
+        VertexPositionTexture() DIRECTX_CTOR_DEFAULT
+
+        VertexPositionTexture(XMFLOAT3 const& position, XMFLOAT2 const& textureCoordinate)
+          : position(position),
+            textureCoordinate(textureCoordinate)
+        { }
+
+        VertexPositionTexture(FXMVECTOR position, FXMVECTOR textureCoordinate)
+        {
+            XMStoreFloat3(&this->position, position);
+            XMStoreFloat2(&this->textureCoordinate, textureCoordinate);
+        }
+
+        XMFLOAT3 position;
+        XMFLOAT2 textureCoordinate;
+
+        static const int InputElementCount = 2;
+        static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount];
+    };
+
+
+    // Vertex struct holding position and normal vector.
+    struct VertexPositionNormal
+    {
+        VertexPositionNormal() DIRECTX_CTOR_DEFAULT
+
+        VertexPositionNormal(XMFLOAT3 const& position, XMFLOAT3 const& normal)
+          : position(position),
+            normal(normal)
+        { }
+
+        VertexPositionNormal(FXMVECTOR position, FXMVECTOR normal)
+        {
+            XMStoreFloat3(&this->position, position);
+            XMStoreFloat3(&this->normal, normal);
+        }
+
+        XMFLOAT3 position;
+        XMFLOAT3 normal;
+
+        static const int InputElementCount = 2;
+        static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount];
+    };
+
+
+    // Vertex struct holding position, color, and texture mapping information.
+    struct VertexPositionColorTexture
+    {
+        VertexPositionColorTexture() DIRECTX_CTOR_DEFAULT
+
+        VertexPositionColorTexture(XMFLOAT3 const& position, XMFLOAT4 const& color, XMFLOAT2 const& textureCoordinate)
+          : position(position),
+            color(color),
+            textureCoordinate(textureCoordinate)
+        { }
+
+        VertexPositionColorTexture(FXMVECTOR position, FXMVECTOR color, FXMVECTOR textureCoordinate)
+        {
+            XMStoreFloat3(&this->position, position);
+            XMStoreFloat4(&this->color, color);
+            XMStoreFloat2(&this->textureCoordinate, textureCoordinate);
+        }
+
+        XMFLOAT3 position;
+        XMFLOAT4 color;
+        XMFLOAT2 textureCoordinate;
+
+        static const int InputElementCount = 3;
+        static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount];
+    };
+
+
+    // Vertex struct holding position, normal vector, and color information.
+    struct VertexPositionNormalColor
+    {
+        VertexPositionNormalColor() DIRECTX_CTOR_DEFAULT
+
+        VertexPositionNormalColor(XMFLOAT3 const& position, XMFLOAT3 const& normal, XMFLOAT4 const& color)
+          : position(position),
+            normal(normal),
+            color(color)
+        { }
+
+        VertexPositionNormalColor(FXMVECTOR position, FXMVECTOR normal, FXMVECTOR color)
+        {
+            XMStoreFloat3(&this->position, position);
+            XMStoreFloat3(&this->normal, normal);
+            XMStoreFloat4(&this->color, color);
+        }
+
+        XMFLOAT3 position;
+        XMFLOAT3 normal;
+        XMFLOAT4 color;
+
+        static const int InputElementCount = 3;
+        static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount];
+    };
+
+
+    // Vertex struct holding position, normal vector, and texture mapping information.
+    struct VertexPositionNormalTexture
+    {
+        VertexPositionNormalTexture() DIRECTX_CTOR_DEFAULT
+
+        VertexPositionNormalTexture(XMFLOAT3 const& position, XMFLOAT3 const& normal, XMFLOAT2 const& textureCoordinate)
+          : position(position),
+            normal(normal),
+            textureCoordinate(textureCoordinate)
+        { }
+
+        VertexPositionNormalTexture(FXMVECTOR position, FXMVECTOR normal, FXMVECTOR textureCoordinate)
+        {
+            XMStoreFloat3(&this->position, position);
+            XMStoreFloat3(&this->normal, normal);
+            XMStoreFloat2(&this->textureCoordinate, textureCoordinate);
+        }
+
+        XMFLOAT3 position;
+        XMFLOAT3 normal;
+        XMFLOAT2 textureCoordinate;
+
+        static const int InputElementCount = 3;
+        static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount];
+    };
+
+
+    // Vertex struct holding position, normal vector, color, and texture mapping information.
+    struct VertexPositionNormalColorTexture
+    {
+        VertexPositionNormalColorTexture() DIRECTX_CTOR_DEFAULT
+
+        VertexPositionNormalColorTexture(XMFLOAT3 const& position, XMFLOAT3 const& normal, XMFLOAT4 const& color, XMFLOAT2 const& textureCoordinate)
+          : position(position),
+            normal(normal),
+            color(color),
+            textureCoordinate(textureCoordinate)
+        { }
+
+        VertexPositionNormalColorTexture(FXMVECTOR position, FXMVECTOR normal, FXMVECTOR color, CXMVECTOR textureCoordinate)
+        {
+            XMStoreFloat3(&this->position, position);
+            XMStoreFloat3(&this->normal, normal);
+            XMStoreFloat4(&this->color, color);
+            XMStoreFloat2(&this->textureCoordinate, textureCoordinate);
+        }
+
+        XMFLOAT3 position;
+        XMFLOAT3 normal;
+        XMFLOAT4 color;
+        XMFLOAT2 textureCoordinate;
+
+        static const int InputElementCount = 4;
+        static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount];
+    };
+
+
+    // Vertex struct for Visual Studio Shader Designer (DGSL) holding position, normal,
+    // tangent, color (RGBA), and texture mapping information
+    struct VertexPositionNormalTangentColorTexture
+    {
+        VertexPositionNormalTangentColorTexture() DIRECTX_CTOR_DEFAULT
+
+        XMFLOAT3 position;
+        XMFLOAT3 normal;
+        XMFLOAT4 tangent;
+        uint32_t color;
+        XMFLOAT2 textureCoordinate;
+
+        VertexPositionNormalTangentColorTexture(XMFLOAT3 const& position, XMFLOAT3 const& normal, XMFLOAT4 const& tangent, uint32_t rgba, XMFLOAT2 const& textureCoordinate)
+          : position(position),
+            normal(normal),
+            tangent(tangent),
+            color(rgba),
+            textureCoordinate(textureCoordinate)
+        {
+        }
+
+        VertexPositionNormalTangentColorTexture(FXMVECTOR position, FXMVECTOR normal, FXMVECTOR tangent, uint32_t rgba, CXMVECTOR textureCoordinate)
+          : color(rgba)
+        {
+            XMStoreFloat3(&this->position, position);
+            XMStoreFloat3(&this->normal, normal);
+            XMStoreFloat4(&this->tangent, tangent);
+            XMStoreFloat2(&this->textureCoordinate, textureCoordinate);
+        }
+
+        VertexPositionNormalTangentColorTexture(XMFLOAT3 const& position, XMFLOAT3 const& normal, XMFLOAT4 const& tangent, XMFLOAT4 const& color, XMFLOAT2 const& textureCoordinate)
+          : position(position),
+            normal(normal),
+            tangent(tangent),
+            textureCoordinate(textureCoordinate)
+        {
+            SetColor( color );
+        }
+
+        VertexPositionNormalTangentColorTexture(FXMVECTOR position, FXMVECTOR normal, FXMVECTOR tangent, CXMVECTOR color, CXMVECTOR textureCoordinate)
+        {
+            XMStoreFloat3(&this->position, position);
+            XMStoreFloat3(&this->normal, normal);
+            XMStoreFloat4(&this->tangent, tangent);
+            XMStoreFloat2(&this->textureCoordinate, textureCoordinate);
+
+            SetColor( color );
+        }
+
+        void __cdecl SetColor( XMFLOAT4 const& icolor ) { SetColor( XMLoadFloat4( &icolor ) ); }
+        void XM_CALLCONV SetColor( FXMVECTOR icolor );
+
+        static const int InputElementCount = 5;
+        static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount];
+    };
+
+
+    // Vertex struct for Visual Studio Shader Designer (DGSL) holding position, normal,
+    // tangent, color (RGBA), texture mapping information, and skinning weights
+    struct VertexPositionNormalTangentColorTextureSkinning : public VertexPositionNormalTangentColorTexture
+    {
+        VertexPositionNormalTangentColorTextureSkinning() DIRECTX_CTOR_DEFAULT
+
+        uint32_t indices;
+        uint32_t weights;
+
+        VertexPositionNormalTangentColorTextureSkinning(XMFLOAT3 const& position, XMFLOAT3 const& normal, XMFLOAT4 const& tangent, uint32_t rgba,
+                                                        XMFLOAT2 const& textureCoordinate, XMUINT4 const& indices, XMFLOAT4 const& weights)
+          : VertexPositionNormalTangentColorTexture(position,normal,tangent,rgba,textureCoordinate)
+        {
+            SetBlendIndices( indices );
+            SetBlendWeights( weights );
+        }
+
+        VertexPositionNormalTangentColorTextureSkinning(FXMVECTOR position, FXMVECTOR normal, FXMVECTOR tangent, uint32_t rgba, CXMVECTOR textureCoordinate,
+                                                        XMUINT4 const& indices, CXMVECTOR weights)
+          : VertexPositionNormalTangentColorTexture(position,normal,tangent,rgba,textureCoordinate)
+        {
+            SetBlendIndices( indices );
+            SetBlendWeights( weights );
+        }
+
+        VertexPositionNormalTangentColorTextureSkinning(XMFLOAT3 const& position, XMFLOAT3 const& normal, XMFLOAT4 const& tangent, XMFLOAT4 const& color,
+                                                        XMFLOAT2 const& textureCoordinate, XMUINT4 const& indices, XMFLOAT4 const& weights)
+          : VertexPositionNormalTangentColorTexture(position,normal,tangent,color,textureCoordinate)
+        {
+            SetBlendIndices( indices );
+            SetBlendWeights( weights );
+        }
+
+        VertexPositionNormalTangentColorTextureSkinning(FXMVECTOR position, FXMVECTOR normal, FXMVECTOR tangent, CXMVECTOR color, CXMVECTOR textureCoordinate,
+                                                        XMUINT4 const& indices, CXMVECTOR weights)
+          : VertexPositionNormalTangentColorTexture(position,normal,tangent,color,textureCoordinate)
+        {
+            SetBlendIndices( indices );
+            SetBlendWeights( weights );
+        }
+
+        void __cdecl SetBlendIndices( XMUINT4 const& iindices );
+
+        void __cdecl SetBlendWeights( XMFLOAT4 const& iweights ) { SetBlendWeights( XMLoadFloat4( &iweights ) ); }
+        void XM_CALLCONV SetBlendWeights( FXMVECTOR iweights );
+
+        static const int InputElementCount = 7;
+        static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount];
+    };
+}
diff --git a/Windows/DirectXTK/WICTextureLoader.h b/Windows/DirectXTK/WICTextureLoader.h
new file mode 100644
index 0000000..ae8b705
--- /dev/null
+++ b/Windows/DirectXTK/WICTextureLoader.h
@@ -0,0 +1,154 @@
+//--------------------------------------------------------------------------------------
+// File: WICTextureLoader.h
+//
+// Function for loading a WIC image and creating a Direct3D 11 runtime texture for it
+// (auto-generating mipmaps if possible)
+//
+// Note: Assumes application has already called CoInitializeEx
+//
+// Warning: CreateWICTexture* functions are not thread-safe if given a d3dContext instance for
+//          auto-gen mipmap support.
+//
+// Note these functions are useful for images created as simple 2D textures. For
+// more complex resources, DDSTextureLoader is an excellent light-weight runtime loader.
+// For a full-featured DDS file reader, writer, and texture processing pipeline see
+// the 'Texconv' sample and the 'DirectXTex' library.
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248926
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP) && (_WIN32_WINNT <= _WIN32_WINNT_WIN8)
+#error WIC is not supported on Windows Phone 8.0
+#endif
+
+#if defined(_XBOX_ONE) && defined(_TITLE)
+#include <d3d11_x.h>
+#else
+#include <d3d11_1.h>
+#endif
+
+#pragma warning(push)
+#pragma warning(disable : 4005)
+#include <stdint.h>
+#pragma warning(pop)
+
+namespace DirectX
+{
+    // Standard version
+    HRESULT __cdecl CreateWICTextureFromMemory( _In_ ID3D11Device* d3dDevice,
+                                                _In_reads_bytes_(wicDataSize) const uint8_t* wicData,
+                                                _In_ size_t wicDataSize,
+                                                _Out_opt_ ID3D11Resource** texture,
+                                                _Out_opt_ ID3D11ShaderResourceView** textureView,
+                                                _In_ size_t maxsize = 0
+                                              );
+
+    HRESULT __cdecl CreateWICTextureFromFile( _In_ ID3D11Device* d3dDevice,
+                                              _In_z_ const wchar_t* szFileName,
+                                              _Out_opt_ ID3D11Resource** texture,
+                                              _Out_opt_ ID3D11ShaderResourceView** textureView,
+                                              _In_ size_t maxsize = 0
+                                            );
+
+    // Standard version with optional auto-gen mipmap support
+    #if defined(_XBOX_ONE) && defined(_TITLE)
+    HRESULT __cdecl CreateWICTextureFromMemory( _In_ ID3D11DeviceX* d3dDevice,
+                                                _In_opt_ ID3D11DeviceContextX* d3dContext,
+    #else
+    HRESULT __cdecl CreateWICTextureFromMemory( _In_ ID3D11Device* d3dDevice,
+                                                _In_opt_ ID3D11DeviceContext* d3dContext,
+    #endif
+                                                _In_reads_bytes_(wicDataSize) const uint8_t* wicData,
+                                                _In_ size_t wicDataSize,
+                                                _Out_opt_ ID3D11Resource** texture,
+                                                _Out_opt_ ID3D11ShaderResourceView** textureView,
+                                                _In_ size_t maxsize = 0
+                                              );
+
+    #if defined(_XBOX_ONE) && defined(_TITLE)
+    HRESULT __cdecl CreateWICTextureFromFile( _In_ ID3D11DeviceX* d3dDevice,
+                                              _In_opt_ ID3D11DeviceContextX* d3dContext,
+    #else
+    HRESULT __cdecl CreateWICTextureFromFile( _In_ ID3D11Device* d3dDevice,
+                                              _In_opt_ ID3D11DeviceContext* d3dContext,
+    #endif
+                                              _In_z_ const wchar_t* szFileName,
+                                              _Out_opt_ ID3D11Resource** texture,
+                                              _Out_opt_ ID3D11ShaderResourceView** textureView,
+                                              _In_ size_t maxsize = 0
+                                            );
+
+    // Extended version
+    HRESULT __cdecl CreateWICTextureFromMemoryEx( _In_ ID3D11Device* d3dDevice,
+                                                  _In_reads_bytes_(wicDataSize) const uint8_t* wicData,
+                                                  _In_ size_t wicDataSize,
+                                                  _In_ size_t maxsize,
+                                                  _In_ D3D11_USAGE usage,
+                                                  _In_ unsigned int bindFlags,
+                                                  _In_ unsigned int cpuAccessFlags,
+                                                  _In_ unsigned int miscFlags,
+                                                  _In_ bool forceSRGB,
+                                                  _Out_opt_ ID3D11Resource** texture,
+                                                  _Out_opt_ ID3D11ShaderResourceView** textureView
+                                                );
+
+    HRESULT __cdecl CreateWICTextureFromFileEx( _In_ ID3D11Device* d3dDevice,
+                                                _In_z_ const wchar_t* szFileName,
+                                                _In_ size_t maxsize,
+                                                _In_ D3D11_USAGE usage,
+                                                _In_ unsigned int bindFlags,
+                                                _In_ unsigned int cpuAccessFlags,
+                                                _In_ unsigned int miscFlags,
+                                                _In_ bool forceSRGB,
+                                                _Out_opt_ ID3D11Resource** texture,
+                                                _Out_opt_ ID3D11ShaderResourceView** textureView
+                                              );
+
+    // Extended version with optional auto-gen mipmap support
+    #if defined(_XBOX_ONE) && defined(_TITLE)
+    HRESULT __cdecl CreateWICTextureFromMemoryEx( _In_ ID3D11DeviceX* d3dDevice,
+                                                  _In_opt_ ID3D11DeviceContextX* d3dContext,
+    #else
+    HRESULT __cdecl CreateWICTextureFromMemoryEx( _In_ ID3D11Device* d3dDevice,
+                                                  _In_opt_ ID3D11DeviceContext* d3dContext,
+    #endif
+                                                  _In_reads_bytes_(wicDataSize) const uint8_t* wicData,
+                                                  _In_ size_t wicDataSize,
+                                                  _In_ size_t maxsize,
+                                                  _In_ D3D11_USAGE usage,
+                                                  _In_ unsigned int bindFlags,
+                                                  _In_ unsigned int cpuAccessFlags,
+                                                  _In_ unsigned int miscFlags,
+                                                  _In_ bool forceSRGB,
+                                                  _Out_opt_ ID3D11Resource** texture,
+                                                  _Out_opt_ ID3D11ShaderResourceView** textureView
+                                              );
+
+    #if defined(_XBOX_ONE) && defined(_TITLE)
+    HRESULT __cdecl CreateWICTextureFromFileEx( _In_ ID3D11DeviceX* d3dDevice,
+                                                _In_opt_ ID3D11DeviceContextX* d3dContext,
+    #else
+    HRESULT __cdecl CreateWICTextureFromFileEx( _In_ ID3D11Device* d3dDevice,
+                                                _In_opt_ ID3D11DeviceContext* d3dContext,
+    #endif
+                                                _In_z_ const wchar_t* szFileName,
+                                                _In_ size_t maxsize,
+                                                _In_ D3D11_USAGE usage,
+                                                _In_ unsigned int bindFlags,
+                                                _In_ unsigned int cpuAccessFlags,
+                                                _In_ unsigned int miscFlags,
+                                                _In_ bool forceSRGB,
+                                                _Out_opt_ ID3D11Resource** texture,
+                                                _Out_opt_ ID3D11ShaderResourceView** textureView
+                                            );
+}
\ No newline at end of file
diff --git a/Windows/DirectXTK/XboxDDSTextureLoader.h b/Windows/DirectXTK/XboxDDSTextureLoader.h
new file mode 100644
index 0000000..f303b67
--- /dev/null
+++ b/Windows/DirectXTK/XboxDDSTextureLoader.h
@@ -0,0 +1,61 @@
+//--------------------------------------------------------------------------------------
+// File: XboxDDSTextureLoader.h
+//
+// Functions for loading a DDS texture using the XBOX extended header and creating a
+// Direct3D11.X runtime resource for it via the CreatePlacement APIs
+//
+// Note these functions will not load standard DDS files. Use the DDSTextureLoader
+// module in the DirectXTex package or as part of the DirectXTK library to load
+// these files which use standard Direct3D 11 resource creation APIs.
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248926
+// http://go.microsoft.com/fwlink/?LinkId=248929
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#if !defined(_XBOX_ONE) || !defined(_TITLE)
+#error This module only supports Xbox One exclusive apps
+#endif
+
+#include <d3d11_x.h>
+
+#include <stdint.h>
+
+namespace Xbox
+{
+    enum DDS_ALPHA_MODE
+    {
+        DDS_ALPHA_MODE_UNKNOWN       = 0,
+        DDS_ALPHA_MODE_STRAIGHT      = 1,
+        DDS_ALPHA_MODE_PREMULTIPLIED = 2,
+        DDS_ALPHA_MODE_OPAQUE        = 3,
+        DDS_ALPHA_MODE_CUSTOM        = 4,
+    };
+
+    HRESULT __cdecl CreateDDSTextureFromMemory( _In_ ID3D11DeviceX* d3dDevice,
+                                                _In_reads_bytes_(ddsDataSize) const uint8_t* ddsData,
+                                                _In_ size_t ddsDataSize,
+                                                _Outptr_opt_ ID3D11Resource** texture,
+                                                _Outptr_opt_ ID3D11ShaderResourceView** textureView,
+                                                _Outptr_ void** grfxMemory,
+                                                _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr, 
+                                                _In_ bool forceSRGB = false
+                                               );
+
+    HRESULT __cdecl CreateDDSTextureFromFile( _In_ ID3D11DeviceX* d3dDevice,
+                                              _In_z_ const wchar_t* szFileName,
+                                              _Outptr_opt_ ID3D11Resource** texture,
+                                              _Outptr_opt_ ID3D11ShaderResourceView** textureView,
+                                              _Outptr_ void** grfxMemory,
+                                              _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr,
+                                              _In_ bool forceSRGB = false
+                                            );
+}
\ No newline at end of file
diff --git a/Windows/Renderer.cpp b/Windows/Renderer.cpp
new file mode 100644
index 0000000..0157a6c
--- /dev/null
+++ b/Windows/Renderer.cpp
@@ -0,0 +1,670 @@
+#include "stdafx.h"
+#include "Renderer.h"
+#include "DirectXTK/SpriteBatch.h"
+#include "DirectXTK/SpriteFont.h"
+#include "../Core/Console.h"
+#include "../Core/Debugger.h"
+#include "../Core/MessageManager.h"
+#include "../Utilities/UTF8Util.h"
+
+using namespace DirectX;
+
+Renderer::Renderer(shared_ptr<Console> console, HWND hWnd, bool registerAsMessageManager) : BaseRenderer(console, registerAsMessageManager)
+{
+	_hWnd = hWnd;
+
+	SetScreenSize(256, 240);
+}
+
+Renderer::~Renderer()
+{
+	//TODO
+	/*shared_ptr<VideoRenderer> videoRenderer = _console->GetVideoRenderer();
+	if(videoRenderer) {
+		videoRenderer->UnregisterRenderingDevice(this);
+	}*/
+	CleanupDevice();
+}
+
+void Renderer::SetFullscreenMode(bool fullscreen, void* windowHandle, uint32_t monitorWidth, uint32_t monitorHeight)
+{
+	if(fullscreen != _fullscreen || _hWnd != (HWND)windowHandle) {
+		_hWnd = (HWND)windowHandle;
+		_monitorWidth = monitorWidth;
+		_monitorHeight = monitorHeight;
+		_newFullscreen = fullscreen;
+	}
+}
+
+void Renderer::SetScreenSize(uint32_t width, uint32_t height)
+{
+	//TODO
+	/*ScreenSize screenSize;
+	_console->GetVideoDecoder()->GetScreenSize(screenSize, false);
+
+	if(_screenHeight != screenSize.Height || _screenWidth != screenSize.Width || _nesFrameHeight != height || _nesFrameWidth != width || _resizeFilter != _console->GetSettings()->GetVideoResizeFilter() || _newFullscreen != _fullscreen) {
+		auto frameLock = _frameLock.AcquireSafe();
+		auto textureLock = _textureLock.AcquireSafe();
+		_console->GetVideoDecoder()->GetScreenSize(screenSize, false);
+		if(_screenHeight != screenSize.Height || _screenWidth != screenSize.Width || _nesFrameHeight != height || _nesFrameWidth != width || _resizeFilter != _console->GetSettings()->GetVideoResizeFilter() || _newFullscreen != _fullscreen) {
+			_nesFrameHeight = height;
+			_nesFrameWidth = width;
+			_newFrameBufferSize = width*height;
+
+			bool needReset = _fullscreen != _newFullscreen || _resizeFilter != _console->GetSettings()->GetVideoResizeFilter();
+			bool fullscreenResizeMode = _fullscreen && _newFullscreen;
+
+			if(_pSwapChain && _fullscreen && !_newFullscreen) {
+				HRESULT hr = _pSwapChain->SetFullscreenState(FALSE, NULL);
+				if(FAILED(hr)) {
+					MessageManager::Log("SetFullscreenState(FALSE) failed - Error:" + std::to_string(hr));
+				}
+			}
+
+			_fullscreen = _newFullscreen;
+
+			_screenHeight = screenSize.Height;
+			_screenWidth = screenSize.Width;
+
+			if(_fullscreen) {
+				_realScreenHeight = _monitorHeight;
+				_realScreenWidth = _monitorWidth;
+			} else {
+				_realScreenHeight = screenSize.Height;
+				_realScreenWidth = screenSize.Width;
+			}
+
+			_leftMargin = (_realScreenWidth - _screenWidth) / 2;
+			_topMargin = (_realScreenHeight - _screenHeight) / 2;
+
+			_screenBufferSize = _realScreenHeight*_realScreenWidth;
+
+			if(!_pSwapChain || needReset) {
+				Reset();
+			} else {
+				if(fullscreenResizeMode) {
+					ResetNesBuffers();
+					CreateNesBuffers();
+				} else {
+					ResetNesBuffers();
+					ReleaseRenderTargetView();
+					_pSwapChain->ResizeBuffers(1, _realScreenWidth, _realScreenHeight, DXGI_FORMAT_B8G8R8A8_UNORM, 0);
+					CreateRenderTargetView();
+					CreateNesBuffers();
+				}
+			}
+		}
+	}*/
+}
+
+void Renderer::Reset()
+{
+	auto lock = _frameLock.AcquireSafe();
+	CleanupDevice();
+	if(FAILED(InitDevice())) {
+		CleanupDevice();
+	} else {
+		//TODO
+		//_console->GetVideoRenderer()->RegisterRenderingDevice(this);
+	}
+}
+
+void Renderer::CleanupDevice()
+{
+	ResetNesBuffers();
+	ReleaseRenderTargetView();
+	if(_pAlphaEnableBlendingState) {
+		_pAlphaEnableBlendingState->Release();
+		_pAlphaEnableBlendingState = nullptr;
+	}
+	if(_pDepthDisabledStencilState) {
+		_pDepthDisabledStencilState->Release();
+		_pDepthDisabledStencilState = nullptr;
+	}
+	if(_samplerState) {
+		_samplerState->Release();
+		_samplerState = nullptr;
+	}
+	if(_pSwapChain) {
+		_pSwapChain->SetFullscreenState(false, nullptr);
+		_pSwapChain->Release();
+		_pSwapChain = nullptr;
+	}
+	if(_pDeviceContext) {
+		_pDeviceContext->Release();
+		_pDeviceContext = nullptr;
+	}
+	if(_pd3dDevice) {
+		_pd3dDevice->Release();
+		_pd3dDevice = nullptr;
+	}
+}
+
+void Renderer::ResetNesBuffers()
+{
+	if(_pTexture) {
+		_pTexture->Release();
+		_pTexture = nullptr;
+	}
+	if(_overlayTexture) {
+		_overlayTexture->Release();
+		_overlayTexture = nullptr;
+	}
+	if(_pTextureSrv) {
+		_pTextureSrv->Release();
+		_pTextureSrv = nullptr;
+	}
+	if(_pOverlaySrv) {
+		_pOverlaySrv->Release();
+		_pOverlaySrv = nullptr;
+	}
+	if(_textureBuffer[0]) {
+		delete[] _textureBuffer[0];
+		_textureBuffer[0] = nullptr;
+	}
+	if(_textureBuffer[1]) {
+		delete[] _textureBuffer[1];
+		_textureBuffer[1] = nullptr;
+	}
+}
+
+void Renderer::ReleaseRenderTargetView()
+{
+	if(_pRenderTargetView) {
+		_pRenderTargetView->Release();
+		_pRenderTargetView = nullptr;
+	}
+}
+
+HRESULT Renderer::CreateRenderTargetView()
+{
+	// Create a render target view
+	ID3D11Texture2D* pBackBuffer = nullptr;
+	HRESULT hr = _pSwapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), (LPVOID*)&pBackBuffer);
+	if(FAILED(hr)) {
+		MessageManager::Log("SwapChain::GetBuffer() failed - Error:" + std::to_string(hr));
+		return hr;
+	}
+
+	hr = _pd3dDevice->CreateRenderTargetView(pBackBuffer, nullptr, &_pRenderTargetView);
+	pBackBuffer->Release();
+	if(FAILED(hr)) {
+		MessageManager::Log("D3DDevice::CreateRenderTargetView() failed - Error:" + std::to_string(hr));
+		return hr;
+	}
+
+	_pDeviceContext->OMSetRenderTargets(1, &_pRenderTargetView, nullptr);
+
+	return S_OK;
+}
+
+HRESULT Renderer::CreateNesBuffers()
+{
+	// Setup the viewport
+	D3D11_VIEWPORT vp;
+	vp.Width = (FLOAT)_realScreenWidth;
+	vp.Height = (FLOAT)_realScreenHeight;
+	vp.MinDepth = 0.0f;
+	vp.MaxDepth = 1.0f;
+	vp.TopLeftX = 0;
+	vp.TopLeftY = 0;
+	_pDeviceContext->RSSetViewports(1, &vp);
+
+	_textureBuffer[0] = new uint8_t[_nesFrameWidth*_nesFrameHeight * 4];
+	_textureBuffer[1] = new uint8_t[_nesFrameWidth*_nesFrameHeight * 4];
+	memset(_textureBuffer[0], 0, _nesFrameWidth*_nesFrameHeight * 4);
+	memset(_textureBuffer[1], 0, _nesFrameWidth*_nesFrameHeight * 4);
+
+	_pTexture = CreateTexture(_nesFrameWidth, _nesFrameHeight);
+	if(!_pTexture) {
+		return S_FALSE;
+	}
+	_overlayTexture = CreateTexture(8, 8);
+	if(!_overlayTexture) {
+		return S_FALSE;
+	}
+	_pTextureSrv = GetShaderResourceView(_pTexture);
+	if(!_pTextureSrv) {
+		return S_FALSE;
+	}
+	_pOverlaySrv = GetShaderResourceView(_overlayTexture);
+	if(!_pOverlaySrv) {
+		return S_FALSE;
+	}
+
+	////////////////////////////////////////////////////////////////////////////
+	_spriteBatch.reset(new SpriteBatch(_pDeviceContext));
+
+	_largeFont.reset(new SpriteFont(_pd3dDevice, L"Resources\\Font.64.spritefont"));
+	_font.reset(new SpriteFont(_pd3dDevice, L"Resources\\Font.24.spritefont"));
+	_font->SetDefaultCharacter('?');
+
+	return S_OK;
+}
+
+//--------------------------------------------------------------------------------------
+// Create Direct3D device and swap chain
+//--------------------------------------------------------------------------------------
+HRESULT Renderer::InitDevice()
+{
+	HRESULT hr = S_OK;
+
+	UINT createDeviceFlags = 0;
+#ifdef _DEBUG
+	createDeviceFlags |= D3D11_CREATE_DEVICE_DEBUG;
+#endif
+
+	D3D_DRIVER_TYPE driverTypes[] =
+	{
+		D3D_DRIVER_TYPE_HARDWARE,
+		D3D_DRIVER_TYPE_WARP,
+		D3D_DRIVER_TYPE_REFERENCE,
+	};
+	UINT numDriverTypes = ARRAYSIZE(driverTypes);
+
+	D3D_FEATURE_LEVEL featureLevels[] =
+	{
+		D3D_FEATURE_LEVEL_11_1,
+		D3D_FEATURE_LEVEL_11_0,
+		D3D_FEATURE_LEVEL_10_1,
+		D3D_FEATURE_LEVEL_10_0,
+	};
+	UINT numFeatureLevels = ARRAYSIZE(featureLevels);
+
+	DXGI_SWAP_CHAIN_DESC sd;
+	ZeroMemory(&sd, sizeof(sd));
+	sd.BufferCount = 1;
+	sd.BufferDesc.Width = _realScreenWidth;
+	sd.BufferDesc.Height = _realScreenHeight;
+	sd.BufferDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
+	sd.BufferDesc.RefreshRate.Numerator = 60; //TODO _console->GetSettings()->GetExclusiveRefreshRate();
+	sd.BufferDesc.RefreshRate.Denominator = 1;
+	sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
+	sd.Flags = _fullscreen ? DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH : 0;
+	sd.OutputWindow = _hWnd;
+	sd.SampleDesc.Count = 1;
+	sd.SampleDesc.Quality = 0;
+	sd.Windowed = TRUE;
+
+	D3D_DRIVER_TYPE driverType = D3D_DRIVER_TYPE_NULL;
+	D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_1;
+	for(UINT driverTypeIndex = 0; driverTypeIndex < numDriverTypes; driverTypeIndex++) {
+		driverType = driverTypes[driverTypeIndex];
+		featureLevel = D3D_FEATURE_LEVEL_11_1;
+		hr = D3D11CreateDeviceAndSwapChain(nullptr, driverType, nullptr, createDeviceFlags, featureLevels, numFeatureLevels, D3D11_SDK_VERSION, &sd, &_pSwapChain, &_pd3dDevice, &featureLevel, &_pDeviceContext);
+
+		/*if(FAILED(hr)) {
+			MessageManager::Log("D3D11CreateDeviceAndSwapChain() failed - Error:" + std::to_string(hr));
+		}*/
+
+		if(hr == E_INVALIDARG) {
+			// DirectX 11.0 platforms will not recognize D3D_FEATURE_LEVEL_11_1 so we need to retry without it
+			featureLevel = D3D_FEATURE_LEVEL_11_0;
+			hr = D3D11CreateDeviceAndSwapChain(nullptr, driverType, nullptr, createDeviceFlags, &featureLevels[1], numFeatureLevels - 1, D3D11_SDK_VERSION, &sd, &_pSwapChain, &_pd3dDevice, &featureLevel, &_pDeviceContext);
+		}
+
+		if(SUCCEEDED(hr)) {
+			break;
+		}
+	}
+		
+	if(FAILED(hr)) {
+		MessageManager::Log("D3D11CreateDeviceAndSwapChain() failed - Error:" + std::to_string(hr));
+		return hr;
+	}
+
+	if(_fullscreen) {
+		hr = _pSwapChain->SetFullscreenState(TRUE, NULL);
+		if(FAILED(hr)) {
+			MessageManager::Log("SetFullscreenState(true) failed - Error:" + std::to_string(hr));
+			MessageManager::Log("Switching back to windowed mode");
+			hr = _pSwapChain->SetFullscreenState(FALSE, NULL);
+			if(FAILED(hr)) {
+				MessageManager::Log("SetFullscreenState(false) failed - Error:" + std::to_string(hr));
+				return hr;
+			}
+		}
+	}
+
+	hr = CreateRenderTargetView();
+	if(FAILED(hr)) {
+		return hr;
+	}
+
+	D3D11_DEPTH_STENCIL_DESC depthDisabledStencilDesc;
+	ZeroMemory(&depthDisabledStencilDesc, sizeof(depthDisabledStencilDesc));
+	depthDisabledStencilDesc.DepthEnable = false;
+	depthDisabledStencilDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL;
+	depthDisabledStencilDesc.DepthFunc = D3D11_COMPARISON_LESS;
+	depthDisabledStencilDesc.StencilEnable = true;
+	depthDisabledStencilDesc.StencilReadMask = 0xFF;
+	depthDisabledStencilDesc.StencilWriteMask = 0xFF;
+	depthDisabledStencilDesc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP;
+	depthDisabledStencilDesc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_INCR;
+	depthDisabledStencilDesc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP;
+	depthDisabledStencilDesc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS;
+	depthDisabledStencilDesc.BackFace.StencilFailOp = D3D11_STENCIL_OP_KEEP;
+	depthDisabledStencilDesc.BackFace.StencilDepthFailOp = D3D11_STENCIL_OP_DECR;
+	depthDisabledStencilDesc.BackFace.StencilPassOp = D3D11_STENCIL_OP_KEEP;
+	depthDisabledStencilDesc.BackFace.StencilFunc = D3D11_COMPARISON_ALWAYS;
+
+	// Create the state using the device.
+	hr = _pd3dDevice->CreateDepthStencilState(&depthDisabledStencilDesc, &_pDepthDisabledStencilState);
+	if(FAILED(hr)) {
+		MessageManager::Log("D3DDevice::CreateDepthStencilState() failed - Error:" + std::to_string(hr));
+		return hr;
+	}
+
+	// Clear the blend state description.
+	D3D11_BLEND_DESC blendStateDescription;
+	ZeroMemory(&blendStateDescription, sizeof(D3D11_BLEND_DESC));
+
+	// Create an alpha enabled blend state description.
+	blendStateDescription.RenderTarget[0].BlendEnable = TRUE;
+	blendStateDescription.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE;
+	blendStateDescription.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA;
+	blendStateDescription.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD;
+	blendStateDescription.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
+	blendStateDescription.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO;
+	blendStateDescription.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
+	blendStateDescription.RenderTarget[0].RenderTargetWriteMask = 0x0f;
+
+	// Create the blend state using the description.
+	hr = _pd3dDevice->CreateBlendState(&blendStateDescription, &_pAlphaEnableBlendingState);
+	if(FAILED(hr)) {
+		MessageManager::Log("D3DDevice::CreateBlendState() failed - Error:" + std::to_string(hr));
+		return hr;
+	}
+
+	float blendFactor[4];
+	blendFactor[0] = 0.0f;
+	blendFactor[1] = 0.0f;
+	blendFactor[2] = 0.0f;
+	blendFactor[3] = 0.0f;
+	
+	_pDeviceContext->OMSetBlendState(_pAlphaEnableBlendingState, blendFactor, 0xffffffff);
+	_pDeviceContext->OMSetDepthStencilState(_pDepthDisabledStencilState, 1);
+
+	hr = CreateNesBuffers();
+	if(FAILED(hr)) {
+		return hr;
+	}
+
+	hr = CreateSamplerState();
+	if(FAILED(hr)) {
+		return hr;
+	}
+
+	return S_OK;
+}
+
+HRESULT Renderer::CreateSamplerState()
+{
+	_resizeFilter = VideoResizeFilter::NearestNeighbor; //TODO _console->GetSettings()->GetVideoResizeFilter();
+
+	//Sample state
+	D3D11_SAMPLER_DESC samplerDesc;
+	ZeroMemory(&samplerDesc, sizeof(samplerDesc));
+	samplerDesc.Filter = _resizeFilter == VideoResizeFilter::Bilinear ? D3D11_FILTER_MIN_MAG_MIP_LINEAR : D3D11_FILTER_MIN_MAG_MIP_POINT;
+	samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
+	samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
+	samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
+	//samplerDesc.BorderColor = { 1.0f, 1.0f, 1.0f, 1.0f };
+	samplerDesc.MinLOD = -FLT_MAX;
+	samplerDesc.MaxLOD = FLT_MAX;
+	samplerDesc.MipLODBias = 0.0f;
+	samplerDesc.MaxAnisotropy = 1;
+	samplerDesc.ComparisonFunc = D3D11_COMPARISON_NEVER;
+
+	HRESULT hr = _pd3dDevice->CreateSamplerState(&samplerDesc, &_samplerState);
+	if(FAILED(hr)) {
+		MessageManager::Log("D3DDevice::CreateSamplerState() failed - Error:" + std::to_string(hr));
+	}
+
+	return hr;
+}
+
+ID3D11Texture2D* Renderer::CreateTexture(uint32_t width, uint32_t height)
+{
+	ID3D11Texture2D* texture;
+
+	D3D11_TEXTURE2D_DESC desc;
+	ZeroMemory(&desc, sizeof(D3D11_TEXTURE2D_DESC));
+	desc.ArraySize = 1;
+	desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
+	desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
+	desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
+	desc.MipLevels = 1;
+	desc.MiscFlags = 0;
+	desc.SampleDesc.Count = 1;
+	desc.SampleDesc.Quality = 0;
+	desc.Usage = D3D11_USAGE_DYNAMIC;
+	desc.Width = width;
+	desc.Height = height;
+	desc.MiscFlags = 0;
+
+	HRESULT hr = _pd3dDevice->CreateTexture2D(&desc, nullptr, &texture);
+	if(FAILED(hr)) {
+		MessageManager::Log("D3DDevice::CreateTexture() failed - Error:" + std::to_string(hr));
+		return nullptr;
+	}
+	return texture;
+}
+
+ID3D11ShaderResourceView* Renderer::GetShaderResourceView(ID3D11Texture2D* texture)
+{
+	ID3D11ShaderResourceView *shaderResourceView = nullptr;
+	HRESULT hr = _pd3dDevice->CreateShaderResourceView(texture, nullptr, &shaderResourceView);
+	if(FAILED(hr)) {
+		MessageManager::Log("D3DDevice::CreateShaderResourceView() failed - Error:" + std::to_string(hr));
+		return nullptr;
+	}
+
+	return shaderResourceView;
+}
+
+void Renderer::DrawString(string message, float x, float y, DirectX::FXMVECTOR color, float scale, SpriteFont* font)
+{
+	std::wstring textStr = utf8::utf8::decode(message);
+	DrawString(textStr, x, y, color, scale, font);
+}
+
+void Renderer::DrawString(std::wstring message, float x, float y, DirectX::FXMVECTOR color, float scale, SpriteFont* font)
+{
+	const wchar_t *text = message.c_str();
+	if(font == nullptr) {
+		font = _font.get();
+	}
+
+	font->DrawString(_spriteBatch.get(), text, XMFLOAT2(x+_leftMargin, y+_topMargin), color, 0.0f, XMFLOAT2(0, 0), scale);
+}
+
+void Renderer::UpdateFrame(void *frameBuffer, uint32_t width, uint32_t height)
+{
+	SetScreenSize(width, height);
+
+	uint32_t bpp = 4;
+	auto lock = _textureLock.AcquireSafe();
+	if(_textureBuffer[0]) {
+		//_textureBuffer[0] may be null if directx failed to initialize properly
+		memcpy(_textureBuffer[0], frameBuffer, width*height*bpp);
+		_needFlip = true;
+		_frameChanged = true;
+	}
+}
+
+void Renderer::DrawScreen()
+{
+	//Swap buffers - emulator always writes to _textureBuffer[0], screen always draws _textureBuffer[1]
+	if(_needFlip) {
+		auto lock = _textureLock.AcquireSafe();
+		uint8_t* textureBuffer = _textureBuffer[0];
+		_textureBuffer[0] = _textureBuffer[1];
+		_textureBuffer[1] = textureBuffer;
+		_needFlip = false;
+
+		if(_frameChanged) {
+			_frameChanged = false;
+			_renderedFrameCount++;
+		}
+	}
+
+	//Copy buffer to texture
+	uint32_t bpp = 4;
+	uint32_t rowPitch = _nesFrameWidth * bpp;
+	D3D11_MAPPED_SUBRESOURCE dd;
+	HRESULT hr = _pDeviceContext->Map(_pTexture, 0, D3D11_MAP_WRITE_DISCARD, 0, &dd);
+	if(FAILED(hr)) {
+		MessageManager::Log("DeviceContext::Map() failed - Error:" + std::to_string(hr));
+		return;
+	}
+	uint8_t* surfacePointer = (uint8_t*)dd.pData;
+	uint8_t* videoBuffer = _textureBuffer[1];
+	for(uint32_t i = 0, iMax = _nesFrameHeight; i < iMax; i++) {
+		memcpy(surfacePointer, videoBuffer, rowPitch);
+		videoBuffer += rowPitch;
+		surfacePointer += dd.RowPitch;
+	}
+	_pDeviceContext->Unmap(_pTexture, 0);
+
+	RECT destRect;
+	destRect.left = _leftMargin;
+	destRect.top = _topMargin;
+	destRect.right = _screenWidth+_leftMargin;
+	destRect.bottom = _screenHeight+_topMargin;
+
+	_spriteBatch->Draw(_pTextureSrv, destRect);
+}
+
+void Renderer::DrawPauseScreen(bool disableOverlay)
+{
+	if(disableOverlay) {
+		const static XMVECTORF32 transparentBlue = { { { 0.415686309f, 0.352941185f, 0.803921640f, 0.66f } } };
+		DrawString("I", 15, 15, transparentBlue, 2.0f, _font.get());
+		DrawString("I", 32, 15, transparentBlue, 2.0f, _font.get());
+	} else {
+		RECT destRect;
+		destRect.left = 0;
+		destRect.top = 0;
+		destRect.right = _realScreenWidth;
+		destRect.bottom = _realScreenHeight;
+
+		D3D11_MAPPED_SUBRESOURCE dd;
+		HRESULT hr = _pDeviceContext->Map(_overlayTexture, 0, D3D11_MAP_WRITE_DISCARD, 0, &dd);
+		if(FAILED(hr)) {
+			MessageManager::Log("(DrawPauseScreen) DeviceContext::Map() failed - Error:" + std::to_string(hr));
+			return;
+		}
+
+		uint8_t* surfacePointer = (uint8_t*)dd.pData;
+		for(uint32_t i = 0, len = 8; i < len; i++) {
+			//Gray transparent overlay
+			for(int j = 0; j < 8; j++) {
+				((uint32_t*)surfacePointer)[j] = 0xAA222222;
+			}
+			surfacePointer += dd.RowPitch;
+		}
+		_pDeviceContext->Unmap(_overlayTexture, 0);
+
+		_spriteBatch->Draw(_pOverlaySrv, destRect);
+
+		XMVECTOR stringDimensions = _largeFont->MeasureString(L"PAUSE");
+		float x = (float)_screenWidth / 2 - stringDimensions.m128_f32[0] / 2;
+		float y = (float)_screenHeight / 2 - stringDimensions.m128_f32[1] / 2 - 8;
+		DrawString("PAUSE", x, y, Colors::AntiqueWhite, 1.0f, _largeFont.get());
+
+		//TODO
+		/*string utf8Message = _console->GetSettings()->GetPauseScreenMessage();
+		if(utf8Message.size() > 0) {
+			std::wstring message = utf8::utf8::decode(utf8Message);
+			float width = MeasureString(message);
+			DrawString(message, (float)_screenWidth - width - 20, (float)_screenHeight - 40, Colors::AntiqueWhite, 1.0f, _font.get());
+		}*/
+	}
+}
+
+void Renderer::Render()
+{
+	//TODO
+	/*bool paused = _console->IsPaused() && _console->IsRunning();
+	bool disableOverlay = _console->GetSettings()->CheckFlag(EmulationFlags::HidePauseOverlay);
+	shared_ptr<Debugger> debugger = _console->GetDebugger(false);
+	if(debugger && debugger->IsExecutionStopped()) {
+		paused = debugger->IsPauseIconShown();
+		disableOverlay = true;
+	}*/
+	bool paused = false;
+
+	if(_noUpdateCount > 10 || _frameChanged || paused || IsMessageShown()) {
+		_noUpdateCount = 0;
+		
+		auto lock = _frameLock.AcquireSafe();
+		if(_newFullscreen != _fullscreen) {
+			SetScreenSize(_nesFrameWidth, _nesFrameHeight);
+		}
+
+		if(_pDeviceContext == nullptr) {
+			//DirectX failed to initialize, try to init
+			Reset();
+			if(_pDeviceContext == nullptr) {
+				//Can't init, prevent crash
+				return;
+			}
+		}
+
+		// Clear the back buffer 
+		_pDeviceContext->ClearRenderTargetView(_pRenderTargetView, Colors::Black);
+
+		_spriteBatch->Begin(SpriteSortMode_Deferred, nullptr, _samplerState);
+
+		//Draw screen
+		DrawScreen();
+
+		//TODO
+		/*
+		if(paused) {
+			DrawPauseScreen(disableOverlay);
+		}
+				
+		if(_console->IsRunning()) {
+			DrawCounters();
+		}*/
+
+		DrawToasts();
+
+		_spriteBatch->End();
+
+		// Present the information rendered to the back buffer to the front buffer (the screen)
+
+		bool waitVSync = false; //TODO _console->GetSettings()->CheckFlag(EmulationFlags::VerticalSync)
+		HRESULT hr = _pSwapChain->Present(waitVSync ? 1 : 0, 0);
+		if(FAILED(hr)) {
+			MessageManager::Log("SwapChain::Present() failed - Error:" + std::to_string(hr));
+			if(hr == DXGI_ERROR_DEVICE_REMOVED) {
+				MessageManager::Log("D3DDevice: GetDeviceRemovedReason: " + std::to_string(_pd3dDevice->GetDeviceRemovedReason()));
+			}
+			MessageManager::Log("Trying to reset DX...");
+			Reset();
+		}
+	} else {
+		_noUpdateCount++;
+	}
+}
+
+void Renderer::DrawString(std::wstring message, int x, int y, uint8_t r, uint8_t g, uint8_t b, uint8_t opacity)
+{
+	XMVECTORF32 color = { (float)r / 255.0f, (float)g / 255.0f, (float)b / 255.0f, (float)opacity / 255.0f };
+	_font->DrawString(_spriteBatch.get(), message.c_str(), XMFLOAT2((float)x+_leftMargin, (float)y+_topMargin), color);
+}
+
+float Renderer::MeasureString(std::wstring text)
+{
+	XMVECTOR measure = _font->MeasureString(text.c_str());
+	float* measureF = (float*)&measure;
+	return measureF[0];
+}
+
+bool Renderer::ContainsCharacter(wchar_t character)
+{
+	return _font->ContainsCharacter(character);
+}
\ No newline at end of file
diff --git a/Windows/Renderer.h b/Windows/Renderer.h
new file mode 100644
index 0000000..8672689
--- /dev/null
+++ b/Windows/Renderer.h
@@ -0,0 +1,104 @@
+#pragma once
+
+#include "stdafx.h"
+#include "../Core/IRenderingDevice.h"
+#include "../Core/IMessageManager.h"
+#include "../Utilities/FolderUtilities.h"
+#include "../Utilities/SimpleLock.h"
+#include "../Utilities/Timer.h"
+#include "../Core/BaseRenderer.h"
+
+using namespace DirectX;
+
+class Console;
+
+namespace DirectX {
+	class SpriteBatch;
+	class SpriteFont;
+}
+
+class Renderer : public BaseRenderer, public IRenderingDevice
+{
+private:
+	HWND                    _hWnd = nullptr;
+
+	ID3D11Device*           _pd3dDevice = nullptr;
+	ID3D11DeviceContext*    _pDeviceContext = nullptr;
+	IDXGISwapChain*         _pSwapChain = nullptr;
+	ID3D11RenderTargetView* _pRenderTargetView = nullptr;
+	ID3D11DepthStencilState* _pDepthDisabledStencilState = nullptr;
+	ID3D11BlendState*			_pAlphaEnableBlendingState = nullptr;
+
+	ID3D11SamplerState*		_samplerState = nullptr;
+		
+	atomic<bool>				_needFlip = false;
+	uint8_t*						_textureBuffer[2] = { nullptr, nullptr };
+	ID3D11Texture2D*			_pTexture = nullptr;
+	ID3D11ShaderResourceView*	_pTextureSrv = nullptr;
+	ID3D11Texture2D*			_overlayTexture = nullptr;
+	ID3D11ShaderResourceView*	_pOverlaySrv = nullptr;
+
+	bool							_frameChanged = true;
+	SimpleLock					_frameLock;
+	SimpleLock					_textureLock;
+
+	VideoResizeFilter _resizeFilter = VideoResizeFilter::NearestNeighbor;
+
+	unique_ptr<SpriteFont>	_font;
+	unique_ptr<SpriteFont>	_largeFont;
+		
+	unique_ptr<SpriteBatch> _spriteBatch;
+
+	const uint32_t _bytesPerPixel = 4;
+	uint32_t _screenBufferSize = 0;
+
+	bool _newFullscreen = false;
+	bool _fullscreen = false;
+
+	uint32_t _realScreenHeight = 240;
+	uint32_t _realScreenWidth = 256;
+	uint32_t _leftMargin = 0;
+	uint32_t _topMargin = 0;
+	uint32_t _monitorWidth = 0;
+	uint32_t _monitorHeight = 0;
+
+	uint32_t _nesFrameHeight = 0;
+	uint32_t _nesFrameWidth = 0;
+	uint32_t _newFrameBufferSize = 0;
+
+	uint32_t _noUpdateCount = 0;
+
+	HRESULT InitDevice();
+	void CleanupDevice();
+
+	void SetScreenSize(uint32_t width, uint32_t height);
+
+	ID3D11Texture2D* CreateTexture(uint32_t width, uint32_t height);
+	ID3D11ShaderResourceView* GetShaderResourceView(ID3D11Texture2D* texture);
+	void DrawScreen();
+	void DrawPauseScreen(bool disableOverlay);
+		
+	void DrawString(string message, float x, float y, DirectX::FXMVECTOR color, float scale, SpriteFont* font = nullptr);
+	void DrawString(std::wstring message, float x, float y, DirectX::FXMVECTOR color, float scale, SpriteFont* font = nullptr);
+
+	void DrawString(std::wstring message, int x, int y, uint8_t r, uint8_t g, uint8_t b, uint8_t opacity);
+	float MeasureString(std::wstring text);
+	bool ContainsCharacter(wchar_t character);
+
+	HRESULT CreateRenderTargetView();
+	void ReleaseRenderTargetView();
+	HRESULT CreateNesBuffers();
+	void ResetNesBuffers();
+	HRESULT CreateSamplerState();
+
+public:
+	Renderer(shared_ptr<Console> console, HWND hWnd, bool registerAsMessageManager);
+	~Renderer();
+
+	void SetFullscreenMode(bool fullscreen, void* windowHandle, uint32_t monitorWidth, uint32_t monitorHeight);
+
+	void Reset();
+	void Render();
+
+	void UpdateFrame(void *frameBuffer, uint32_t width, uint32_t height);
+};
\ No newline at end of file
diff --git a/Windows/Resources/MesenIcon.bmp b/Windows/Resources/MesenIcon.bmp
new file mode 100644
index 0000000..12dfc3e
Binary files /dev/null and b/Windows/Resources/MesenIcon.bmp differ
diff --git a/Windows/Resources/Roboto.12.spritefont b/Windows/Resources/Roboto.12.spritefont
new file mode 100644
index 0000000..f9020a2
Binary files /dev/null and b/Windows/Resources/Roboto.12.spritefont differ
diff --git a/Windows/Resources/Toast.dds b/Windows/Resources/Toast.dds
new file mode 100644
index 0000000..ddea493
Binary files /dev/null and b/Windows/Resources/Toast.dds differ
diff --git a/Windows/SoundManager.cpp b/Windows/SoundManager.cpp
new file mode 100644
index 0000000..3dcf832
--- /dev/null
+++ b/Windows/SoundManager.cpp
@@ -0,0 +1,312 @@
+#include "stdafx.h"
+#include "SoundManager.h"
+//#include "../Core/SoundMixer.h"
+#include "../Core/Console.h"
+#include "../Core/MessageManager.h"
+
+SoundManager::SoundManager(shared_ptr<Console> console, HWND hwnd)
+{
+	_console = console;
+	_hWnd = hwnd;
+	_directSound = 0;
+	_primaryBuffer = 0;
+	_secondaryBuffer = 0;
+
+	memset(&_audioDeviceID, 0, sizeof(_audioDeviceID));
+
+	if(InitializeDirectSound(44100, false)) {
+		//_console->GetSoundMixer()->RegisterAudioDevice(this);
+	} else {
+		MessageManager::DisplayMessage("Error", "CouldNotInitializeAudioSystem");
+	}
+}
+
+SoundManager::~SoundManager()
+{
+	Release();
+}
+
+bool CALLBACK SoundManager::DirectSoundEnumProc(LPGUID lpGUID, LPCWSTR lpszDesc, LPCSTR lpszDrvName, LPVOID lpContext)
+{
+	vector<SoundDeviceInfo> *devices = (vector<SoundDeviceInfo>*)lpContext;
+
+	SoundDeviceInfo deviceInfo;
+	deviceInfo.description = utf8::utf8::encode(lpszDesc);
+	if(lpGUID != nullptr) {
+		memcpy((void*)&deviceInfo.guid, lpGUID, 16);
+	} else {
+		memset((void*)&deviceInfo.guid, 0, 16);
+	}
+	devices->push_back(deviceInfo);
+
+	return true;
+}
+
+vector<SoundDeviceInfo> SoundManager::GetAvailableDeviceInfo()
+{
+	vector<SoundDeviceInfo> devices;
+	DirectSoundEnumerateW((LPDSENUMCALLBACKW)SoundManager::DirectSoundEnumProc, &devices);
+	return devices;
+}
+
+string SoundManager::GetAvailableDevices()
+{
+	string deviceString;
+	for(SoundDeviceInfo device : GetAvailableDeviceInfo()) {
+		deviceString += device.description + "||"s;
+	}
+	return deviceString;
+}
+
+void SoundManager::SetAudioDevice(string deviceName)
+{
+	for(SoundDeviceInfo device : GetAvailableDeviceInfo()) {
+		if(device.description.compare(deviceName) == 0) {
+			if(memcmp(&_audioDeviceID, &device.guid, 16) != 0) {
+				memcpy(&_audioDeviceID, &device.guid, 16);
+				_needReset = true;
+			}
+			break;
+		}
+	}
+}
+
+bool SoundManager::InitializeDirectSound(uint32_t sampleRate, bool isStereo)
+{
+	HRESULT result;
+	DSBUFFERDESC bufferDesc;
+	WAVEFORMATEX waveFormat;
+	
+	// Initialize the direct sound interface pointer for the default sound device.
+	result = DirectSoundCreate8(&_audioDeviceID, &_directSound, NULL);
+	if(FAILED(result)) {
+		MessageManager::Log("[Audio] Failed to create direct sound device.");
+		return false;
+	}
+
+	// Set the cooperative level to priority so the format of the primary sound buffer can be modified.
+	result = _directSound->SetCooperativeLevel(_hWnd, DSSCL_PRIORITY);
+	if(FAILED(result)) {
+		MessageManager::Log("[Audio] Failed to set cooperative level.");
+		return false;
+	}
+
+	// Setup the primary buffer description.
+	bufferDesc.dwSize = sizeof(DSBUFFERDESC);
+	bufferDesc.dwFlags = DSBCAPS_PRIMARYBUFFER | DSBCAPS_CTRLVOLUME;
+	bufferDesc.dwBufferBytes = 0;
+	bufferDesc.dwReserved = 0;
+	bufferDesc.lpwfxFormat = NULL;
+	bufferDesc.guid3DAlgorithm = GUID_NULL;
+
+	// Get control of the primary sound buffer on the default sound device.
+	result = _directSound->CreateSoundBuffer(&bufferDesc, &_primaryBuffer, NULL);
+	if(FAILED(result)) {
+		MessageManager::Log("[Audio] Failed to create primary sound buffer.");
+		return false;
+	}
+
+	// Setup the format of the primary sound bufffer.
+	_sampleRate = sampleRate;
+	_isStereo = isStereo;
+
+	waveFormat.wFormatTag = WAVE_FORMAT_PCM;
+	waveFormat.nSamplesPerSec = _sampleRate;
+	waveFormat.wBitsPerSample = 16;
+	waveFormat.nChannels = isStereo ? 2 : 1;
+	waveFormat.nBlockAlign = (waveFormat.wBitsPerSample / 8) * waveFormat.nChannels;
+	waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign;
+	waveFormat.cbSize = 0;
+
+	// Set the primary buffer to be the wave format specified.
+	result = _primaryBuffer->SetFormat(&waveFormat);
+	if(FAILED(result)) {
+		MessageManager::Log("[Audio] Failed to set the sound format.");
+		return false;
+	}
+
+	//TODO
+	int32_t latency = 100; //_console->GetSettings()->GetAudioLatency()
+	int32_t requestedByteLatency = (int32_t)((float)(sampleRate * latency) / 1000.0f * waveFormat.nBlockAlign);
+	_bufferSize = (int32_t)std::ceil((double)requestedByteLatency * 2 / 0x10000) * 0x10000;
+
+	// Set the buffer description of the secondary sound buffer that the wave file will be loaded onto.
+	bufferDesc.dwSize = sizeof(DSBUFFERDESC);
+	bufferDesc.dwFlags = DSBCAPS_CTRLPOSITIONNOTIFY | DSBCAPS_GETCURRENTPOSITION2 | DSBCAPS_GLOBALFOCUS | DSBCAPS_LOCSOFTWARE | DSBCAPS_CTRLVOLUME | DSBCAPS_CTRLFREQUENCY;
+	bufferDesc.dwBufferBytes = _bufferSize;
+	bufferDesc.dwReserved = 0;
+	bufferDesc.lpwfxFormat = &waveFormat;
+	bufferDesc.guid3DAlgorithm = GUID_NULL;
+
+	// Create a temporary sound buffer with the specific buffer settings.
+	IDirectSoundBuffer* tempBuffer;
+	result = _directSound->CreateSoundBuffer(&bufferDesc, &tempBuffer, NULL);
+	if(FAILED(result)) {
+		MessageManager::Log("[Audio] Failed to create temporary sound buffer.");
+		return false;
+	}
+
+	// Test the buffer format against the direct sound 8 interface and create the secondary buffer.
+	result = tempBuffer->QueryInterface(IID_IDirectSoundBuffer8, (LPVOID*)&_secondaryBuffer);
+	if(FAILED(result)) {
+		MessageManager::Log("[Audio] Failed to obtain secondary sound buffer.");
+		return false;
+	}
+
+	// Set volume of the buffer to 100%.
+	result = _secondaryBuffer->SetVolume(DSBVOLUME_MAX);
+	if(FAILED(result)) {
+		MessageManager::Log("[Audio] Failed to set volume of the secondary sound buffer.");
+		return false;
+	}
+
+	// Release the temporary buffer.
+	tempBuffer->Release();
+
+	_playing = false;
+
+	return true;
+}
+
+void SoundManager::Release()
+{
+	_playing = false;
+	_needReset = false;
+	_lastWriteOffset = 0;
+
+	if(_secondaryBuffer) {
+		_secondaryBuffer->Release();
+		_secondaryBuffer = nullptr;
+	}
+
+	if(_primaryBuffer) {
+		_primaryBuffer->Release();
+		_primaryBuffer = nullptr;
+	}
+	
+	if(_directSound) {
+		_directSound->Release();
+		_directSound = nullptr;
+	}
+}
+
+void SoundManager::ClearSecondaryBuffer()
+{
+	unsigned char* bufferPtr;
+	DWORD bufferSize;
+	_secondaryBuffer->Lock(0, 0, (void**)&bufferPtr, (DWORD*)&bufferSize, nullptr, 0, DSBLOCK_ENTIREBUFFER);
+	memset(bufferPtr, 0, bufferSize);
+	_secondaryBuffer->Unlock((void*)bufferPtr, bufferSize, nullptr, 0);
+
+	_secondaryBuffer->SetCurrentPosition(0);
+	_lastWriteOffset = 0;
+}
+
+void SoundManager::CopyToSecondaryBuffer(uint8_t *data, uint32_t size)
+{
+	uint8_t* bufferPtrA;
+	uint8_t* bufferPtrB;
+	DWORD bufferASize;
+	DWORD bufferBSize;
+
+	_secondaryBuffer->Lock(_lastWriteOffset, size, (void**)&bufferPtrA, (DWORD*)&bufferASize, (void**)&bufferPtrB, (DWORD*)&bufferBSize, 0);
+	_lastWriteOffset = (_lastWriteOffset + size) % _bufferSize;
+
+	memcpy(bufferPtrA, data, bufferASize);
+	if(bufferPtrB && bufferBSize > 0) {
+		memcpy(bufferPtrB, data + bufferASize, bufferBSize);
+	}
+
+	_secondaryBuffer->Unlock((void*)bufferPtrA, bufferASize, (void*)bufferPtrB, bufferBSize);
+}
+
+void SoundManager::Pause()
+{
+	if(_secondaryBuffer) {
+		_secondaryBuffer->Stop();
+	}
+	_playing = false;
+}
+
+void SoundManager::Stop()
+{
+	if(_secondaryBuffer) {
+		_secondaryBuffer->Stop();
+		ClearSecondaryBuffer();
+	}
+
+	_playing = false;
+	ResetStats();
+}
+
+void SoundManager::Play()
+{
+	if(_secondaryBuffer) {
+		_secondaryBuffer->Play(0, 0, DSBPLAY_LOOPING);
+		_playing = true;
+	}
+}
+
+void SoundManager::ValidateWriteCursor(DWORD safeWriteCursor)
+{
+	int32_t writeGap = _lastWriteOffset - safeWriteCursor;
+	if(writeGap < -10000) {
+		writeGap += _bufferSize;
+	} else if(writeGap < 0) {
+		_bufferUnderrunEventCount++;
+		_lastWriteOffset = safeWriteCursor;
+	}
+}
+
+void SoundManager::ProcessEndOfFrame()
+{
+	DWORD currentPlayCursor;
+	DWORD safeWriteCursor;
+	_secondaryBuffer->GetCurrentPosition(&currentPlayCursor, &safeWriteCursor);
+	ValidateWriteCursor(safeWriteCursor);
+
+	//TODO
+	uint32_t emulationSpeed = 100; // _console->GetSettings()->GetEmulationSpeed();
+	uint32_t targetRate = _sampleRate;
+	if(emulationSpeed > 0 && emulationSpeed < 100) {
+		//Slow down playback when playing at less than 100%
+		targetRate = (uint32_t)(targetRate * ((double)emulationSpeed / 100.0));
+	}
+	_secondaryBuffer->SetFrequency((DWORD)(targetRate));
+
+	ProcessLatency(currentPlayCursor, _lastWriteOffset);
+
+	//TODO
+	uint32_t latency = 100; //_console->GetSettings()->GetAudioLatency();
+	if(_averageLatency > 0 && emulationSpeed <= 100 && emulationSpeed > 0 && std::abs(_averageLatency - latency) > 50) {
+		//Latency is way off (over 50ms gap), stop audio & start again
+		Stop();
+	}
+}
+
+void SoundManager::PlayBuffer(int16_t *soundBuffer, uint32_t sampleCount, uint32_t sampleRate, bool isStereo)
+{
+	uint32_t bytesPerSample = 2 * (isStereo ? 2 : 1);
+	//TODO
+	uint32_t latency = 100; //_console->GetSettings()->GetAudioLatency();
+	if(_sampleRate != sampleRate || _isStereo != isStereo || _needReset || latency != _previousLatency) {
+		_previousLatency = latency;
+		Release();
+		InitializeDirectSound(sampleRate, isStereo);
+		_secondaryBuffer->SetFrequency(sampleRate);
+	}
+
+	DWORD currentPlayCursor, safeWriteCursor;
+	_secondaryBuffer->GetCurrentPosition(&currentPlayCursor, &safeWriteCursor);
+	ValidateWriteCursor(safeWriteCursor);
+
+	uint32_t soundBufferSize = sampleCount * bytesPerSample;
+	CopyToSecondaryBuffer((uint8_t*)soundBuffer, soundBufferSize);
+	
+	if(!_playing) {
+		DWORD byteLatency = (int32_t)((float)(sampleRate * latency) / 1000.0f * bytesPerSample);
+		if(_lastWriteOffset >= byteLatency / 2) {
+			Play();
+		}
+	}
+}
\ No newline at end of file
diff --git a/Windows/SoundManager.h b/Windows/SoundManager.h
new file mode 100644
index 0000000..9a71c34
--- /dev/null
+++ b/Windows/SoundManager.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include "stdafx.h"
+#include "../Core/BaseSoundManager.h"
+
+class Console;
+
+struct SoundDeviceInfo
+{
+	string description;
+	GUID guid;
+};
+
+class SoundManager : public BaseSoundManager
+{
+public:
+	SoundManager(shared_ptr<Console> console, HWND hWnd);
+	~SoundManager();
+
+	void Release();
+	void ProcessEndOfFrame();
+	void PlayBuffer(int16_t *soundBuffer, uint32_t bufferSize, uint32_t sampleRate, bool isStereo);
+	void Play();	
+	void Pause();
+	void Stop();
+
+	string GetAvailableDevices();
+	void SetAudioDevice(string deviceName);
+
+private:
+	vector<SoundDeviceInfo> GetAvailableDeviceInfo();
+	static bool CALLBACK DirectSoundEnumProc(LPGUID lpGUID, LPCWSTR lpszDesc, LPCSTR lpszDrvName, LPVOID lpContext);
+	bool InitializeDirectSound(uint32_t sampleRate, bool isStereo);
+	void ClearSecondaryBuffer();
+	void CopyToSecondaryBuffer(uint8_t *data, uint32_t size);
+	void ValidateWriteCursor(DWORD safeWriteCursor);
+
+private:
+	shared_ptr<Console> _console;
+	HWND _hWnd;
+	GUID _audioDeviceID;
+	bool _needReset = false;
+	
+	DWORD _lastWriteOffset = 0;
+	uint32_t _previousLatency = 0;
+	bool _playing = false;
+
+	IDirectSound8* _directSound;
+	IDirectSoundBuffer* _primaryBuffer;
+	IDirectSoundBuffer8* _secondaryBuffer;
+};
diff --git a/Windows/Windows.vcxproj b/Windows/Windows.vcxproj
new file mode 100644
index 0000000..dc86dc5
--- /dev/null
+++ b/Windows/Windows.vcxproj
@@ -0,0 +1,421 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Libretro|Win32">
+      <Configuration>Libretro</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Libretro|x64">
+      <Configuration>Libretro</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Optimize|Win32">
+      <Configuration>PGO Optimize</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Optimize|x64">
+      <Configuration>PGO Optimize</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Profile|Win32">
+      <Configuration>PGO Profile</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="PGO Profile|x64">
+      <Configuration>PGO Profile</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{7761E790-B42C-4179-8550-8365FF9EB23E}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>Windows</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\PGO Profile\</OutDir>
+    <IntDir>obj\$(Platform)\PGO Profile\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\$(Configuration)\</OutDir>
+    <IntDir>obj\$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">
+    <OutDir>$(SolutionDir)\bin\$(PlatformTarget)\PGO Profile\</OutDir>
+    <IntDir>obj\$(Platform)\PGO Profile\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <CallingConvention>Cdecl</CallingConvention>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <CallingConvention>Cdecl</CallingConvention>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="DirectInputManager.h" />
+    <ClInclude Include="DirectXTK\Audio.h" />
+    <ClInclude Include="DirectXTK\CommonStates.h" />
+    <ClInclude Include="DirectXTK\DDSTextureLoader.h" />
+    <ClInclude Include="DirectXTK\DirectXHelpers.h" />
+    <ClInclude Include="DirectXTK\Effects.h" />
+    <ClInclude Include="DirectXTK\GeometricPrimitive.h" />
+    <ClInclude Include="DirectXTK\Model.h" />
+    <ClInclude Include="DirectXTK\PrimitiveBatch.h" />
+    <ClInclude Include="DirectXTK\ScreenGrab.h" />
+    <ClInclude Include="DirectXTK\SimpleMath.h" />
+    <ClInclude Include="DirectXTK\SpriteBatch.h" />
+    <ClInclude Include="DirectXTK\SpriteFont.h" />
+    <ClInclude Include="DirectXTK\VertexTypes.h" />
+    <ClInclude Include="DirectXTK\WICTextureLoader.h" />
+    <ClInclude Include="DirectXTK\XboxDDSTextureLoader.h" />
+    <ClInclude Include="XInputManager.h" />
+    <ClInclude Include="WindowsKeyManager.h" />
+    <ClInclude Include="Renderer.h" />
+    <ClInclude Include="SoundManager.h" />
+    <ClInclude Include="stdafx.h" />
+    <ClInclude Include="targetver.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="DirectInputManager.cpp" />
+    <ClCompile Include="XInputManager.cpp" />
+    <ClCompile Include="WindowsKeyManager.cpp">
+      <DeploymentContent>false</DeploymentContent>
+    </ClCompile>
+    <ClCompile Include="Renderer.cpp" />
+    <ClCompile Include="SoundManager.cpp" />
+    <ClCompile Include="stdafx.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Libretro|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Profile|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='PGO Optimize|x64'">Create</PrecompiledHeader>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="DirectXTK\SimpleMath.inl" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/Windows/Windows.vcxproj.filters b/Windows/Windows.vcxproj.filters
new file mode 100644
index 0000000..c74e067
--- /dev/null
+++ b/Windows/Windows.vcxproj.filters
@@ -0,0 +1,109 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Header Files\DirectXTK">
+      <UniqueIdentifier>{e65ed40e-43de-490e-8c87-ff96e845a513}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="stdafx.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="targetver.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="Renderer.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="SoundManager.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectXTK\Audio.h">
+      <Filter>Header Files\DirectXTK</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectXTK\CommonStates.h">
+      <Filter>Header Files\DirectXTK</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectXTK\DDSTextureLoader.h">
+      <Filter>Header Files\DirectXTK</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectXTK\DirectXHelpers.h">
+      <Filter>Header Files\DirectXTK</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectXTK\Effects.h">
+      <Filter>Header Files\DirectXTK</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectXTK\GeometricPrimitive.h">
+      <Filter>Header Files\DirectXTK</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectXTK\Model.h">
+      <Filter>Header Files\DirectXTK</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectXTK\PrimitiveBatch.h">
+      <Filter>Header Files\DirectXTK</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectXTK\ScreenGrab.h">
+      <Filter>Header Files\DirectXTK</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectXTK\SimpleMath.h">
+      <Filter>Header Files\DirectXTK</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectXTK\SpriteBatch.h">
+      <Filter>Header Files\DirectXTK</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectXTK\SpriteFont.h">
+      <Filter>Header Files\DirectXTK</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectXTK\VertexTypes.h">
+      <Filter>Header Files\DirectXTK</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectXTK\WICTextureLoader.h">
+      <Filter>Header Files\DirectXTK</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectXTK\XboxDDSTextureLoader.h">
+      <Filter>Header Files\DirectXTK</Filter>
+    </ClInclude>
+    <ClInclude Include="WindowsKeyManager.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="XInputManager.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="DirectInputManager.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="stdafx.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="Renderer.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="SoundManager.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="WindowsKeyManager.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="XInputManager.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="DirectInputManager.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="DirectXTK\SimpleMath.inl">
+      <Filter>Header Files\DirectXTK</Filter>
+    </None>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/Windows/WindowsKeyManager.cpp b/Windows/WindowsKeyManager.cpp
new file mode 100644
index 0000000..cae50b8
--- /dev/null
+++ b/Windows/WindowsKeyManager.cpp
@@ -0,0 +1,406 @@
+#include "stdafx.h"
+#include "WindowsKeyManager.h"
+
+static vector<KeyDefinition> _keyDefinitions = {
+	//{ "VK_LBUTTON", 0x01, "Left mouse button", "" },
+	//{ "VK_RBUTTON", 0x02, "Right mouse button", "" },
+	{ "VK_CANCEL", 0x03, "Control-break processing", "" },
+	//{ "VK_MBUTTON", 0x04, "Middle mouse button (three-button mouse)", "" },
+	//{ "VK_XBUTTON1", 0x05, "X1 mouse button", "" },
+	//{ "VK_XBUTTON2", 0x06, "X2 mouse button", "" },
+	{ "-", 0x07, "Undefined", "" },
+	{ "VK_BACK", 0x08, "Backspace", "" },
+	{ "VK_TAB", 0x09, "Tab", "" },
+	//{ "-", 0x0A - 0B, "Reserved", "" },
+	{ "VK_CLEAR", 0x0C, "Numpad 5", "" },
+	{ "VK_RETURN", 0x0D, "Enter", "Numpad Enter" },
+	//{ "-", 0x0E - 0F, "Undefined", "" },
+	{ "VK_SHIFT", 0x10, "Shift", "" },
+	{ "VK_CONTROL", 0x11, "Ctrl", "" },
+	{ "VK_MENU", 0x12, "Alt", "" },
+	{ "VK_PAUSE", 0x13, "Pause", "" },
+	{ "VK_CAPITAL", 0x14, "Caps Lock", "" },
+	{ "VK_KANA", 0x15, "IME Kana mode", "" },
+	{ "VK_HANGUEL", 0x15, "IME Hanguel mode", "" },
+	{ "VK_HANGUL", 0x15, "IME Hangul mode", "" },
+	//{ "-", 0x16, "Undefined", "" },
+	{ "VK_JUNJA", 0x17, "IME Junja mode", "" },
+	{ "VK_FINAL", 0x18, "IME final mode", "" },
+	{ "VK_HANJA", 0x19, "IME Hanja mode", "" },
+	{ "VK_KANJI", 0x19, "IME Kanji mode", "" },
+	//{ "-", 0x1A, "Undefined", "" },
+	{ "VK_ESCAPE", 0x1B, "Esc", "" },
+	{ "VK_CONVERT", 0x1C, "IME convert", "" },
+	{ "VK_NONCONVERT", 0x1D, "IME nonconvert", "" },
+	{ "VK_ACCEPT", 0x1E, "IME accept", "" },
+	{ "VK_MODECHANGE", 0x1F, "IME mode change request", "" },
+	{ "VK_SPACE", 0x20, "Spacebar", "" },
+	{ "VK_PRIOR", 0x21, "Numpad 9", "Page Up" },
+	{ "VK_NEXT", 0x22, "Numpad 3", "Page Down" },
+	{ "VK_END", 0x23, "Numpad 1", "End" },
+	{ "VK_HOME", 0x24, "Numpad 7", "Home" },
+	{ "VK_LEFT", 0x25, "Numpad 4", "Left Arrow" },
+	{ "VK_UP", 0x26, "Numpad 8", "Up Arrow" },
+	{ "VK_RIGHT", 0x27, "Numpad 6", "Right Arrow" },
+	{ "VK_DOWN", 0x28, "Numpad 2", "Down Arrow" },
+	{ "VK_SELECT", 0x29, "Select", "" },
+	{ "VK_PRINT", 0x2A, "Print", "" },
+	{ "VK_EXECUTE", 0x2B, "Execute", "" },
+	{ "VK_SNAPSHOT", 0x2C, "Print Screen", "" },
+	{ "VK_INSERT", 0x2D, "Numpad 0", "Insert" },
+	{ "VK_DELETE", 0x2E, "Numpad .", "Delete" },
+	{ "VK_HELP", 0x2F, "Help", "" },
+	{ "0", 0x30, "0", "" },
+	{ "1", 0x31, "1", "" },
+	{ "2", 0x32, "2", "" },
+	{ "3", 0x33, "3", "" },
+	{ "4", 0x34, "4", "" },
+	{ "5", 0x35, "5", "" },
+	{ "6", 0x36, "6", "" },
+	{ "7", 0x37, "7", "" },
+	{ "8", 0x38, "8", "" },
+	{ "9", 0x39, "9", "" },
+	//{ "undefined", 0x3A - 40, "undefined", "" },
+	{ "A", 0x41, "A", "" },
+	{ "B", 0x42, "B", "" },
+	{ "C", 0x43, "C", "" },
+	{ "D", 0x44, "D", "" },
+	{ "E", 0x45, "E", "" },
+	{ "F", 0x46, "F", "" },
+	{ "G", 0x47, "G", "" },
+	{ "H", 0x48, "H", "" },
+	{ "I", 0x49, "I", "" },
+	{ "J", 0x4A, "J", "" },
+	{ "K", 0x4B, "K", "" },
+	{ "L", 0x4C, "L", "" },
+	{ "M", 0x4D, "M", "" },
+	{ "N", 0x4E, "N", "" },
+	{ "O", 0x4F, "O", "" },
+	{ "P", 0x50, "P", "" },
+	{ "Q", 0x51, "Q", "" },
+	{ "R", 0x52, "R", "" },
+	{ "S", 0x53, "S", "" },
+	{ "T", 0x54, "T", "" },
+	{ "U", 0x55, "U", "" },
+	{ "V", 0x56, "V", "" },
+	{ "W", 0x57, "W", "" },
+	{ "X", 0x58, "X", "" },
+	{ "Y", 0x59, "Y", "" },
+	{ "Z", 0x5A, "Z", "" },
+	{ "VK_LWIN", 0x5B, "Left Windows", "" },
+	{ "VK_RWIN", 0x5C, "Right Windows", "" },
+	{ "VK_APPS", 0x5D, "Applications Key", "" },
+	//{ "-", 0x5E, "Reserved", "" },
+	{ "VK_SLEEP", 0x5F, "Computer Sleep", "" },
+	{ "VK_NUMPAD0", 0x60, "Keypad 0", "" },
+	{ "VK_NUMPAD1", 0x61, "Keypad 1", "" },
+	{ "VK_NUMPAD2", 0x62, "Keypad 2", "" },
+	{ "VK_NUMPAD3", 0x63, "Keypad 3", "" },
+	{ "VK_NUMPAD4", 0x64, "Keypad 4", "" },
+	{ "VK_NUMPAD5", 0x65, "Keypad 5", "" },
+	{ "VK_NUMPAD6", 0x66, "Keypad 6", "" },
+	{ "VK_NUMPAD7", 0x67, "Keypad 7", "" },
+	{ "VK_NUMPAD8", 0x68, "Keypad 8", "" },
+	{ "VK_NUMPAD9", 0x69, "Keypad 9", "" },
+	{ "VK_MULTIPLY", 0x6A, "Numpad *", "" },
+	{ "VK_ADD", 0x6B, "Numpad +", "" },
+	{ "VK_SEPARATOR", 0x6C, "Separator", "" },
+	{ "VK_SUBTRACT", 0x6D, "Numpad -", "" },
+	{ "VK_DECIMAL", 0x6E, "Decimal", "" },
+	{ "VK_DIVIDE", 0x6F, "Numpad /", "" },
+	{ "VK_F1", 0x70, "F1", "" },
+	{ "VK_F2", 0x71, "F2", "" },
+	{ "VK_F3", 0x72, "F3", "" },
+	{ "VK_F4", 0x73, "F4", "" },
+	{ "VK_F5", 0x74, "F5", "" },
+	{ "VK_F6", 0x75, "F6", "" },
+	{ "VK_F7", 0x76, "F7", "" },
+	{ "VK_F8", 0x77, "F8", "" },
+	{ "VK_F9", 0x78, "F9", "" },
+	{ "VK_F10", 0x79, "F10", "" },
+	{ "VK_F11", 0x7A, "F11", "" },
+	{ "VK_F12", 0x7B, "F12", "" },
+	{ "VK_F13", 0x7C, "F13", "" },
+	{ "VK_F14", 0x7D, "F14", "" },
+	{ "VK_F15", 0x7E, "F15", "" },
+	{ "VK_F16", 0x7F, "F16", "" },
+	{ "VK_F17", 0x80, "F17", "" },
+	{ "VK_F18", 0x81, "F18", "" },
+	{ "VK_F19", 0x82, "F19", "" },
+	{ "VK_F20", 0x83, "F20", "" },
+	{ "VK_F21", 0x84, "F21", "" },
+	{ "VK_F22", 0x85, "F22", "" },
+	{ "VK_F23", 0x86, "F23", "" },
+	{ "VK_F24", 0x87, "F24", "" },
+	//{ "-", 0x88 - 8F, "Unassigned", "" },
+	{ "VK_NUMLOCK", 0x90, "Pause", "Num Lock" },
+	{ "VK_SCROLL", 0x91, "Scroll Lock", "" },
+	//{"-", 0x92-96,"OEM specific"},
+	//{ "-", 0x97 - 9F, "Unassigned", "" },
+	{ "VK_LSHIFT", 0xA0, "Left Shift", "" },
+	{ "VK_RSHIFT", 0xA1, "Right Shift", "" },
+	{ "VK_LCONTROL", 0xA2, "Left Control", "" },
+	{ "VK_RCONTROL", 0xA3, "Right Control", "" },
+	{ "VK_LMENU", 0xA4, "Left Menu", "" },
+	{ "VK_RMENU", 0xA5, "Right Menu", "" },
+	{ "VK_BROWSER_BACK", 0xA6, "Browser Back", "" },
+	{ "VK_BROWSER_FORWARD", 0xA7, "Browser Forward", "" },
+	{ "VK_BROWSER_REFRESH", 0xA8, "Browser Refresh", "" },
+	{ "VK_BROWSER_STOP", 0xA9, "Browser Stop", "" },
+	{ "VK_BROWSER_SEARCH", 0xAA, "Browser Search", "" },
+	{ "VK_BROWSER_FAVORITES", 0xAB, "Browser Favorites", "" },
+	{ "VK_BROWSER_HOME", 0xAC, "Browser Start and Home", "" },
+	{ "VK_VOLUME_MUTE", 0xAD, "Volume Mute", "" },
+	{ "VK_VOLUME_DOWN", 0xAE, "Volume Down", "" },
+	{ "VK_VOLUME_UP", 0xAF, "Volume Up", "" },
+	{ "VK_MEDIA_NEXT_TRACK", 0xB0, "Next Track", "" },
+	{ "VK_MEDIA_PREV_TRACK", 0xB1, "Previous Track", "" },
+	{ "VK_MEDIA_STOP", 0xB2, "Stop Media", "" },
+	{ "VK_MEDIA_PLAY_PAUSE", 0xB3, "Play/Pause Media", "" },
+	{ "VK_LAUNCH_MAIL", 0xB4, "Start Mail", "" },
+	{ "VK_LAUNCH_MEDIA_SELECT", 0xB5, "Select Media", "" },
+	{ "VK_LAUNCH_APP1", 0xB6, "Start Application 1", "" },
+	{ "VK_LAUNCH_APP2", 0xB7, "Start Application 2", "" },
+	//{ "-", 0xB8 - B9, "Reserved", "" },
+	{ "VK_OEM_1", 0xBA, ";", "" },
+	{ "VK_OEM_PLUS", 0xBB, "=", "" },
+	{ "VK_OEM_COMMA", 0xBC, ",", "" },
+	{ "VK_OEM_MINUS", 0xBD, "-", "" },
+	{ "VK_OEM_PERIOD", 0xBE, ".", "" },
+	{ "VK_OEM_2", 0xBF, "/", "Numpad /" },
+	{ "VK_OEM_3", 0xC0, "`", "" },
+	//{ "-", 0xC1 - D7, "Reserved", "" },
+	//{ "-", 0xD8 - DA, "Unassigned", "" },
+	{ "VK_OEM_4", 0xDB, "[", "" },
+	{ "VK_OEM_5", 0xDC, "\\", "" },
+	{ "VK_OEM_6", 0xDD, "]", "" },
+	{ "VK_OEM_7", 0xDE, "'", "" },
+	{ "VK_OEM_8", 0xDF, "Used for miscellaneous characters; it can vary by keyboard.", "" },
+	//{ "-", 0xE0, "Reserved", "" },
+	//{ "-", 0xE1, "OEM specific", "" },
+	{ "VK_OEM_102", 0xE2, "Pipe", "" },
+	//{ "-", 0xE3 - E4, "OEM specific", "" },
+	{ "VK_PROCESSKEY", 0xE5, "IME PROCESS", "" },
+	//{ "-", 0xE6, "OEM specific", "" },
+	{ "VK_PACKET", 0xE7, "Used to pass Unicode characters as if they were keystrokes. The VK_PACKET key is the low word of a 32-bit Virtual Key value used for non-keyboard input methods. For more information, see Remark in KEYBDINPUT, SendInput, WM_KEYDOWN, and WM_KEYUP", "" },
+	//{ "-", 0xE8, "Unassigned", "" },
+	//  {"-",0xE6,"OEM specific"},
+	{ "VK_PACKET", 0xE7, "Used to pass Unicode characters as if they were keystrokes. The VK_PACKET key is the low word of a 32-bit Virtual Key value used for non-keyboard input methods. For more information, see Remark in KEYBDINPUT, SendInput, WM_KEYDOWN, and WM_KEYUP", "" },
+	//  {"-",0xE8,"Unassigned"},
+	//{ "-", 0xE9 - F5, "OEM specific", "" },
+	{ "VK_ATTN", 0xF6, "Attn", "" },
+	{ "VK_CRSEL", 0xF7, "CrSel", "" },
+	{ "VK_EXSEL", 0xF8, "ExSel", "" },
+	{ "VK_EREOF", 0xF9, "Erase EOF", "Menu" },
+	{ "VK_PLAY", 0xFA, "Play", "" },
+	{ "VK_ZOOM", 0xFB, "Zoom", "" },
+	{ "VK_NONAME", 0xFC, "Reserved", "" },
+	{ "VK_PA1", 0xFD, "PA1", "" },
+	{ "VK_OEM_CLEAR", 0xFE, "Clear", "" }
+};
+
+WindowsKeyManager::WindowsKeyManager(shared_ptr<Console> console, HWND hWnd)
+{
+	_console = console;
+	_hWnd = hWnd;
+
+	ResetKeyState();
+
+	//Init XInput buttons
+	vector<string> buttonNames = { "Up", "Down", "Left", "Right", "Start", "Back", "L3", "R3", "L1", "R1", "?", "?", "A", "B", "X", "Y", "L2", "R2", "RT Up", "RT Down", "RT Left", "RT Right", "LT Up", "LT Down", "LT Left", "LT Right" };
+	for(int i = 0; i < 4; i++) {
+		for(int j = 0; j < (int)buttonNames.size(); j++) {
+			_keyDefinitions.push_back({ "", (uint32_t)(0xFFFF + i * 0x100 + j + 1), "Pad" + std::to_string(i + 1) + " " + buttonNames[j] });
+		}
+	}
+
+	//Init DirectInput buttons
+	vector<string> diButtonNames = { "Y+", "Y-", "X-", "X+", "Y2+", "Y2-", "X2-", "X2+", "Z+", "Z-", "Z2+", "Z2-", "DPad Up", "DPad Down", "DPad Right", "DPad Left" };
+	for(int i = 0; i < 16; i++) {
+		for(int j = 0; j < (int)diButtonNames.size(); j++) {
+			_keyDefinitions.push_back({ "", (uint32_t)(0x11000 + i * 0x100 + j), "Joy" + std::to_string(i + 1) + " " + diButtonNames[j] });
+		}
+
+		for(int j = 0; j < 128; j++) {
+			_keyDefinitions.push_back({ "", (uint32_t)(0x11000 + i * 0x100 + j + 0x10), "Joy" + std::to_string(i + 1) + " But" + std::to_string(j+1)});
+		}
+	}
+
+	for(KeyDefinition &keyDef : _keyDefinitions) {
+		_keyNames[keyDef.keyCode] = keyDef.description;
+		_keyExtendedNames[keyDef.keyCode] = keyDef.extDescription.empty() ? "Ext " + keyDef.description : keyDef.extDescription;
+		
+		uint32_t keyCode = keyDef.keyCode <= 0xFFFF ? MapVirtualKeyEx(keyDef.keyCode & 0xFF, MAPVK_VK_TO_VSC, nullptr) : keyDef.keyCode;
+		_keyCodes[keyDef.description] = keyCode;
+		if(!keyDef.extDescription.empty()) {
+			_keyCodes[keyDef.extDescription] = 0x100 | keyCode;
+		}
+	}
+	
+	StartUpdateDeviceThread();
+}
+
+WindowsKeyManager::~WindowsKeyManager()
+{
+	_stopUpdateDeviceThread = true;
+	_stopSignal.Signal();
+	_updateDeviceThread.join();
+}
+
+void WindowsKeyManager::StartUpdateDeviceThread()
+{
+	_updateDeviceThread = std::thread([=]() {
+		_xInput.reset(new XInputManager(_console));
+		_directInput.reset(new DirectInputManager(_console, _hWnd));
+
+		while(!_stopUpdateDeviceThread) {
+			//Check for newly plugged in controllers every 5 secs (this takes ~60-70ms when no new controllers are found)
+			if(_xInput->NeedToUpdate()) {
+				_xInput->UpdateDeviceList();
+			}
+			_directInput->UpdateDeviceList();
+
+			_stopSignal.Wait(5000);
+		}
+	});
+}
+
+void WindowsKeyManager::RefreshState()
+{
+	if(!_xInput || !_directInput) {
+		return;
+	}
+
+	_xInput->RefreshState();
+	_directInput->RefreshState();
+}
+
+bool WindowsKeyManager::IsKeyPressed(uint32_t key)
+{
+	if(_disableAllKeys) {
+		return false;
+	}
+
+	if(key >= 0x10000) {
+		if(!_xInput || !_directInput) {
+			return false;
+		}
+
+		if(key >= 0x11000) {
+			//Directinput key
+			uint8_t gamepadPort = (key - 0x11000) / 0x100;
+			uint8_t gamepadButton = (key - 0x11000) % 0x100;
+			return _directInput->IsPressed(gamepadPort, gamepadButton);
+		} else {
+			//XInput key
+			uint8_t gamepadPort = (key - 0xFFFF) / 0x100;
+			uint8_t gamepadButton = (key - 0xFFFF) % 0x100;
+			return _xInput->IsPressed(gamepadPort, gamepadButton);
+		}
+	} else if(key < 0x200) {
+		return _keyState[key] != 0;
+	}
+	return false;
+}
+
+bool WindowsKeyManager::IsMouseButtonPressed(MouseButton button)
+{
+	switch(button) {
+		case MouseButton::LeftButton: return _mouseState[0];
+		case MouseButton::RightButton: return _mouseState[1];
+		case MouseButton::MiddleButton: return _mouseState[2];
+	}
+
+	return false;
+}
+
+vector<uint32_t> WindowsKeyManager::GetPressedKeys()
+{
+	vector<uint32_t> result;
+	if(!_xInput || !_directInput) {
+		return result;
+	}
+
+	_xInput->RefreshState();
+	for(int i = 0; i < XUSER_MAX_COUNT; i++) {
+		for(int j = 1; j <= 26; j++) {
+			if(_xInput->IsPressed(i, j)) {
+				result.push_back(0xFFFF + i * 0x100 + j);
+			}
+		}
+	}
+
+	_directInput->RefreshState();
+	for(int i = _directInput->GetJoystickCount() - 1; i >= 0; i--) {
+		for(int j = 0; j < 0x29; j++) {
+			if(_directInput->IsPressed(i, j)) {
+				result.push_back(0x11000 + i * 0x100 + j);
+			}
+		}
+	}
+
+	for(int i = 0; i < 0x200; i++) {
+		if(_keyState[i]) {
+			result.push_back(i);
+		}
+	}
+	return result;
+}
+
+string WindowsKeyManager::GetKeyName(uint32_t scanCode)
+{
+	uint32_t keyCode = scanCode <= 0xFFFF ? MapVirtualKeyEx(scanCode & 0xFF, MAPVK_VSC_TO_VK, nullptr) : scanCode;
+	bool extendedKey = (scanCode <= 0xFFFF && scanCode & 0x100);
+	auto keyDef = (extendedKey ? _keyExtendedNames : _keyNames).find(keyCode);
+	if(keyDef != (extendedKey ? _keyExtendedNames : _keyNames).end()) {
+		return keyDef->second;
+	}
+	return "";
+}
+
+uint32_t WindowsKeyManager::GetKeyCode(string keyName)
+{
+	auto keyDef = _keyCodes.find(keyName);
+	if(keyDef != _keyCodes.end()) {
+		return keyDef->second;
+	}
+	return 0;
+}
+
+void WindowsKeyManager::UpdateDevices()
+{
+	if(!_xInput || !_directInput) {
+		return;
+	}
+
+	//TODO
+	//_console->Pause();
+	_xInput->UpdateDeviceList();
+	_directInput->UpdateDeviceList();
+	//_console->Resume();
+}
+
+void WindowsKeyManager::SetKeyState(uint16_t scanCode, bool state)
+{
+	if(scanCode > 0x1FF) {
+		_mouseState[scanCode & 0x03] = state;
+	} else {
+		uint32_t keyCode = MapVirtualKeyEx(scanCode & 0xFF, MAPVK_VSC_TO_VK, nullptr);
+		if(keyCode >= 0x10 && keyCode <= 0x12) {
+			//Ignore "ext" flag for alt, ctrl & shift
+			scanCode = MapVirtualKeyEx(keyCode, MAPVK_VK_TO_VSC, nullptr);
+		}
+		_keyState[scanCode & 0x1FF] = state;
+	}
+}
+
+void WindowsKeyManager::SetDisabled(bool disabled)
+{
+	_disableAllKeys = disabled;
+}
+
+void WindowsKeyManager::ResetKeyState()
+{
+	memset(_mouseState, 0, sizeof(_mouseState));
+	memset(_keyState, 0, sizeof(_keyState));
+}
\ No newline at end of file
diff --git a/Windows/WindowsKeyManager.h b/Windows/WindowsKeyManager.h
new file mode 100644
index 0000000..01594f3
--- /dev/null
+++ b/Windows/WindowsKeyManager.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include "stdafx.h"
+#include <unordered_map>
+#include "../Core/IKeyManager.h"
+#include "../Utilities/Timer.h"
+#include "../Utilities/AutoResetEvent.h"
+#include "XInputManager.h"
+#include "DirectInputManager.h"
+
+struct KeyDefinition {
+	string name;
+	uint32_t keyCode;
+	string description;
+	string extDescription;
+};
+
+class Console;
+
+class WindowsKeyManager : public IKeyManager
+{
+	private:
+		HWND _hWnd;
+		shared_ptr<Console> _console;
+
+		bool _keyState[0x200];
+		bool _mouseState[0x03];
+		unique_ptr<DirectInputManager> _directInput;
+		unique_ptr<XInputManager> _xInput;
+		std::unordered_map<uint32_t, string> _keyNames;
+		std::unordered_map<uint32_t, string> _keyExtendedNames;
+		std::unordered_map<string, uint32_t> _keyCodes;
+
+		AutoResetEvent _stopSignal;
+		
+		std::thread _updateDeviceThread;
+		atomic<bool> _stopUpdateDeviceThread = false;
+		bool _disableAllKeys = false;
+
+		void StartUpdateDeviceThread();
+
+	public:
+		WindowsKeyManager(shared_ptr<Console> console, HWND hWnd);
+		~WindowsKeyManager();
+
+		void RefreshState();
+		bool IsKeyPressed(uint32_t key);
+		bool IsMouseButtonPressed(MouseButton button);
+		vector<uint32_t> GetPressedKeys();
+		string GetKeyName(uint32_t key);
+		uint32_t GetKeyCode(string keyName);
+
+		void SetKeyState(uint16_t scanCode, bool state);
+		void ResetKeyState();
+		void SetDisabled(bool disabled);
+
+		void UpdateDevices();
+};
diff --git a/Windows/XInputManager.cpp b/Windows/XInputManager.cpp
new file mode 100644
index 0000000..185f0ff
--- /dev/null
+++ b/Windows/XInputManager.cpp
@@ -0,0 +1,77 @@
+#include "stdafx.h"
+#include "XInputManager.h"
+#include "../Core/Console.h"
+
+XInputManager::XInputManager(shared_ptr<Console> console)
+{
+	_console = console;
+	for(int i = 0; i < XUSER_MAX_COUNT; i++) {
+		_gamePadStates.push_back(shared_ptr<XINPUT_STATE>(new XINPUT_STATE()));
+		_gamePadConnected.push_back(true);
+	}
+}
+
+void XInputManager::RefreshState()
+{
+	XINPUT_STATE state;
+	for(DWORD i = 0; i < XUSER_MAX_COUNT; i++) {
+		if(_gamePadConnected[i]) {
+			if(XInputGetState(i, &state) != ERROR_SUCCESS) {
+				//XInputGetState is incredibly slow when no controller is plugged in
+				ZeroMemory(_gamePadStates[i].get(), sizeof(XINPUT_STATE));
+				_gamePadConnected[i] = false;
+			} else {
+				*_gamePadStates[i] = state;
+			}
+		}
+	}
+}
+
+bool XInputManager::NeedToUpdate()
+{
+	for(int i = 0; i < XUSER_MAX_COUNT; i++) {
+		if(!_gamePadConnected[i]) {
+			XINPUT_STATE state;
+			if(XInputGetState(i, &state) == ERROR_SUCCESS) {
+				return true;
+			}
+		}
+	}
+	return false;
+}
+
+void XInputManager::UpdateDeviceList()
+{
+	//Periodically detect if a controller has been plugged in to allow controllers to be plugged in after the emu is started
+	for(int i = 0; i < XUSER_MAX_COUNT; i++) {
+		_gamePadConnected[i] = true;
+	}
+}
+
+bool XInputManager::IsPressed(uint8_t gamepadPort, uint8_t button)
+{
+	if(_gamePadConnected[gamepadPort]) {
+		XINPUT_GAMEPAD &gamepad = _gamePadStates[gamepadPort]->Gamepad;
+		if(button <= 16) {
+			WORD xinputButton = 1 << (button - 1);
+			return (_gamePadStates[gamepadPort]->Gamepad.wButtons & xinputButton) != 0;
+		} else {
+			//TODO
+			double ratio = 1; //_console->GetSettings()->GetControllerDeadzoneRatio() * 2;
+
+			switch(button) {
+				case 17: return gamepad.bLeftTrigger > (XINPUT_GAMEPAD_TRIGGER_THRESHOLD * ratio);
+				case 18: return gamepad.bRightTrigger > (XINPUT_GAMEPAD_TRIGGER_THRESHOLD * ratio);
+				case 19: return gamepad.sThumbRY > (XINPUT_GAMEPAD_RIGHT_THUMB_DEADZONE * ratio);
+				case 20: return gamepad.sThumbRY < -(XINPUT_GAMEPAD_RIGHT_THUMB_DEADZONE * ratio);
+				case 21: return gamepad.sThumbRX < -(XINPUT_GAMEPAD_RIGHT_THUMB_DEADZONE * ratio);
+				case 22: return gamepad.sThumbRX > (XINPUT_GAMEPAD_RIGHT_THUMB_DEADZONE * ratio);
+				case 23: return gamepad.sThumbLY > (XINPUT_GAMEPAD_LEFT_THUMB_DEADZONE * ratio);
+				case 24: return gamepad.sThumbLY < -(XINPUT_GAMEPAD_LEFT_THUMB_DEADZONE * ratio);
+				case 25: return gamepad.sThumbLX < -(XINPUT_GAMEPAD_LEFT_THUMB_DEADZONE * ratio);
+				case 26: return gamepad.sThumbLX > (XINPUT_GAMEPAD_LEFT_THUMB_DEADZONE * ratio);
+			}
+		}
+	}
+	return false;
+}
diff --git a/Windows/XInputManager.h b/Windows/XInputManager.h
new file mode 100644
index 0000000..019d343
--- /dev/null
+++ b/Windows/XInputManager.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "stdafx.h"
+#include <Xinput.h>
+
+class Console;
+
+class XInputManager
+{
+	private:
+		shared_ptr<Console> _console;
+		vector<shared_ptr<XINPUT_STATE>> _gamePadStates;
+		vector<uint8_t> _gamePadConnected;
+
+	public:
+		XInputManager(shared_ptr<Console> console);
+
+		bool NeedToUpdate();
+		void UpdateDeviceList();
+		void RefreshState();
+		bool IsPressed(uint8_t gamepadPort, uint8_t button);
+};
diff --git a/Windows/stdafx.cpp b/Windows/stdafx.cpp
new file mode 100644
index 0000000..45cd884
--- /dev/null
+++ b/Windows/stdafx.cpp
@@ -0,0 +1,8 @@
+// stdafx.cpp : source file that includes just the standard includes
+// Windows.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
diff --git a/Windows/stdafx.h b/Windows/stdafx.h
new file mode 100644
index 0000000..4db3bd9
--- /dev/null
+++ b/Windows/stdafx.h
@@ -0,0 +1,47 @@
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#pragma once
+
+#include <SDKDDKVer.h>
+
+#define WIN32_LEAN_AND_MEAN             // Exclude rarely-used stuff from Windows headers
+// Windows Header Files:
+#include <windows.h>
+#include <Commdlg.h>
+
+#pragma comment(lib, "dsound.lib")
+#pragma comment(lib, "dxguid.lib")
+#pragma comment(lib, "winmm.lib")
+
+
+// C RunTime Header Files
+#include <stdlib.h>
+#include <malloc.h>
+#include <memory.h>
+#include <tchar.h>
+
+#include <mmsystem.h>
+#include <stdio.h>
+
+#include <d3d11_1.h>
+#include <d3dcompiler.h>
+#include <directxmath.h>
+#include <directxcolors.h>
+#include <dsound.h>
+#include <io.h>
+#include <Fcntl.h>
+
+#include <list>
+#include <vector>
+
+#include <string>
+#include <memory>
+
+using std::list;
+using std::vector;
+using std::shared_ptr;
+using std::string;
+using namespace std::literals::string_literals;
\ No newline at end of file
diff --git a/Windows/targetver.h b/Windows/targetver.h
new file mode 100644
index 0000000..87c0086
--- /dev/null
+++ b/Windows/targetver.h
@@ -0,0 +1,8 @@
+#pragma once
+
+// Including SDKDDKVer.h defines the highest available Windows platform.
+
+// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
+// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
+
+#include <SDKDDKVer.h>