Merge branch 'rr1-maint'

2012-04-25 17:20:25 +03:00 · 2012-04-25 17:20:25 +03:00 · 1e5258c8d4
commit 1e5258c8d4
parent 52459eccec eba73d3208
10 changed files with 377 additions and 222 deletions
--- a/2
+++ b/2
@ -1 +1 @@
-1-Δ7
+1-Δ7ε1
--- a/include/library/workthread.hpp
+++ b/include/library/workthread.hpp
@ -85,6 +85,14 @@ public:
 * Note: Don't call from outside workthread code.
 */
 	int operator()(int dummy);
 /**
 * Get wait counters.
 *
 * Retrns: Two-element tuple.
 *	- The first element is the amount of microseconds wait_busy() has waited.
 *	- The second element is the amount of microseconds wait_workflag() has waited.
 */
 	std::pair<uint64_t, uint64_t> get_wait_count();
 protected:
 /**
 * Thread entrypoint.
@ -106,6 +114,8 @@ private:
 	volatile bool busy;
 	volatile bool exception_caught;
 	volatile bool exception_oom;
 	volatile uint64_t waitamt_busy;
 	volatile uint64_t waitamt_work;
 	std::string exception_text;
 };
--- a/include/video/avi/codec.hpp
+++ b/include/video/avi/codec.hpp
@ -94,6 +94,13 @@ struct avi_video_codec
 * Returns: The packet.
 */
 	virtual avi_packet getpacket() = 0;
 /**
 * Send performance counters.
 *
 * Parameter b: Amount of busywaiting by emulator.
 * Parameter w: Amount of workwaiting by dumper.
 */
 	virtual void send_performance_counters(uint64_t b, uint64_t w);
 };
 /**
--- a/manual.lyx
+++ b/manual.lyx
@ -6176,5 +6176,21 @@ Wxwidgets: Redesign hotkeys dialog to avoid tree control (tree control doesn't
 Start paused option.
 \end_layout
 \begin_layout Subsection
 rr1-delta7epsilon1
 \end_layout
 \begin_layout Itemize
 AVI: ZMBV support
 \end_layout
 \begin_layout Itemize
 lsnes-dumpavi: Start Lua before starting dumper
 \end_layout
 \begin_layout Itemize
 AVI: Fix secondary audio in mode 4.
 \end_layout
 \end_body
 \end_document
--- a/manual.txt
+++ b/manual.txt
@ -3048,3 +3048,11 @@ set-axis joystick0axis19 disabled
 • Start paused option.
 15.58 rr1-delta7epsilon1
 • AVI: ZMBV support
 • lsnes-dumpavi: Start Lua before starting dumper
 • AVI: Fix secondary audio in mode 4.
--- a/src/library/workthread.cpp
+++ b/src/library/workthread.cpp
@ -1,5 +1,16 @@
 #include "library/workthread.hpp"
 #include <stdexcept>
 #include <sys/time.h>
 namespace
 {
 	uint64_t ticks()
 	{
 		struct timeval tv;
 		gettimeofday(&tv, NULL);
 		return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
 	}
 }
 struct worker_thread_reflector
 {
@ -15,6 +26,8 @@ worker_thread::worker_thread()
 	reflector = NULL;
 	workflag = 0;
 	busy = false;
 	waitamt_busy = 0;
 	waitamt_work = 0;
 	exception_caught = false;
 	exception_oom = false;
 	joined = false;
@ -58,8 +71,12 @@ void worker_thread::clear_busy()
 void worker_thread::wait_busy()
 {
 	umutex_class h(mutex);
-	while(busy)
+	if(busy) {
-		condition.wait(h);
+		uint64_t tmp = ticks();
 		while(busy)
 			condition.wait(h);
 		waitamt_busy += (ticks() - tmp);
 	}
 }
 void worker_thread::rethrow()
@ -90,11 +107,21 @@ uint32_t worker_thread::clear_workflag(uint32_t flag)
 uint32_t worker_thread::wait_workflag()
 {
 	umutex_class h(mutex);
-	while(!workflag)
+	if(!workflag) {
-		condition.wait(h);
+		uint64_t tmp = ticks();
 		while(!workflag)
 			condition.wait(h);
 		waitamt_work += (ticks() - tmp);
 	}
 	return workflag;
 }
 std::pair<uint64_t, uint64_t> worker_thread::get_wait_count()
 {
 	umutex_class h(mutex);
 	return std::make_pair(waitamt_busy, waitamt_work);
 }
 int worker_thread::operator()(int dummy)
 {
 	try {
--- a/src/util/lsnes-dumpavi.cpp
+++ b/src/util/lsnes-dumpavi.cpp
@ -292,8 +292,8 @@ int main(int argc, char** argv)
 		if(!our_rom->region)
 			throw std::runtime_error("Core does not support game type '" + movie.gametype + "'");
 		our_rom->load();
 		dumper_startup(dumper, mode, prefix, length);
 		startup_lua_scripts(cmdline);
 		dumper_startup(dumper, mode, prefix, length);
 		main_loop(r, movie, true);
 	} catch(std::bad_alloc& e) {
 		OOM_panic();
--- a/src/video/avi.cpp
+++ b/src/video/avi.cpp
@ -25,6 +25,9 @@
 namespace
 {
 	class avi_avsnoop;
 	avi_avsnoop* vid_dumper;
 	uint32_t rates[] = {8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000, 64000, 88200, 96000,
 		128000, 176400, 192000};
@ -111,6 +114,24 @@ namespace
 		uint32_t max_frames;
 	};
 	struct resample_worker : public worker_thread
 	{
 		resample_worker(double _ratio, uint32_t _nch);
 		~resample_worker();
 		void entry();
 		void sendblock(short* block, size_t frames);
 		void sendend();
 	private:
 		std::vector<short> buffers;
 		std::vector<float> buffers2;
 		std::vector<float> buffers3;
 		std::vector<short> buffers4;
 		size_t bufused;
 		double ratio;
 		uint32_t nch;
 		void* resampler;
 	};
 	struct avi_worker : public worker_thread
 	{
 		avi_worker(const struct avi_info& info);
@ -127,16 +148,17 @@ namespace
 		uint32_t segframes;
 		uint32_t max_segframes;
 		bool closed;
 		avi_video_codec* ivcodec;
 	};
 #define WORKFLAG_QUEUE_FRAME 1
 #define WORKFLAG_FLUSH 2
 #define WORKFLAG_END 4
 	avi_worker::avi_worker(const struct avi_info& info)
 		: aviout(info.prefix, *info.vcodec, *info.acodec, info.sample_rate, info.audio_chans)
 	{
 		ivcodec = info.vcodec;
 		segframes = 0;
 		max_segframes = info.max_frames;
 		fire();
@ -183,6 +205,8 @@ namespace
 				f.force_break = (segframes == max_segframes && max_segframes > 0);
 				if(f.force_break)
 					segframes = 0;
 				auto wc = get_wait_count();
 				ivcodec->send_performance_counters(wc.first, wc.second);
 				memcpy(&f.data[0], frame, 4 * frame_width * frame_height);
 				frame = NULL;
 				clear_workflag(WORKFLAG_QUEUE_FRAME);
@ -208,6 +232,64 @@ namespace
 		}
 	}
 	resample_worker::resample_worker(double _ratio, uint32_t _nch)
 	{
 		ratio = _ratio;
 		nch = _nch;
 		buffers.resize(RESAMPLE_BUFFER * nch);
 		buffers2.resize(RESAMPLE_BUFFER * nch);
 		buffers3.resize((RESAMPLE_BUFFER * nch * ratio) + 128 * nch);
 		buffers4.resize((RESAMPLE_BUFFER * nch * ratio) + 128 * nch);
 		bufused = 0;
 #ifdef WITH_SECRET_RABBIT_CODE
 		int errc = 0;
 		resampler = src_new(SRC_SINC_BEST_QUALITY, nch, &errc);
 		if(errc)
 			throw std::runtime_error(std::string("Error initing libsamplerate: ") +
 				src_strerror(errc));
 #else
 		throw std::runtime_error("HQ sample rate conversion not available");
 #endif
 		fire();
 	}
 	resample_worker::~resample_worker()
 	{
 #ifdef WITH_SECRET_RABBIT_CODE
 		src_delete((SRC_STATE*)resampler);
 #endif
 	}
 	void resample_worker::sendend()
 	{
 		rethrow();
 		set_workflag(WORKFLAG_END);
 		request_quit();
 	}
 	void resample_worker::sendblock(short* block, size_t frames)
 	{
 again:
 		rethrow();
 		wait_busy();
 		if(bufused + frames < RESAMPLE_BUFFER) {
 			memcpy(&buffers[bufused * nch], block, 2 * nch * frames);
 			bufused += frames;
 			block += (frames * nch);
 			frames = 0;
 		} else if(bufused < RESAMPLE_BUFFER) {
 			size_t processable = RESAMPLE_BUFFER - bufused;
 			memcpy(&buffers[bufused * nch], block, 2 * nch * processable);
 			block += (processable * nch);
 			frames -= processable;
 			bufused = RESAMPLE_BUFFER;
 		}
 		set_busy();
 		set_workflag(WORKFLAG_QUEUE_FRAME);
 		if(frames > 0)
 			goto again;
 	}
 	void waitfn();
 	class avi_avsnoop : public information_dispatch
@ -226,32 +308,19 @@ namespace
 				soundrate.second, 2);
 			dcounter = 0;
 			have_dumped_frame = false;
-			resampler = NULL;
+			resampler_w = NULL;
 			if(soundrate_setting == 4) {
 				double ratio = 1.0 * audio_record_rate * soundrate.second / soundrate.first;
 				sbuffer.resize(RESAMPLE_BUFFER * chans);
 				sbuffer2.resize(RESAMPLE_BUFFER * chans);
 				fbuffer.resize((RESAMPLE_BUFFER * ratio + 128) * chans + 128);
 				fbuffer2.resize((RESAMPLE_BUFFER * ratio + 128) * chans + 128);
 				sbuffer_fill = 0;
-#ifdef WITH_SECRET_RABBIT_CODE
+				sbuffer.resize(RESAMPLE_BUFFER * chans);
-				int errc = 0;
+				resampler_w = new resample_worker(ratio, chans);
 				resampler = src_new(SRC_SINC_BEST_QUALITY, info.audio_chans, &errc);
 				if(errc)
 					throw std::runtime_error(std::string("Error initing libsamplerate: ") +
 						src_strerror(errc));
 #else
 				throw std::runtime_error("HQ sample rate conversion not available");
 #endif
 			}
 		}
 		~avi_avsnoop() throw()
 		{
-#ifdef WITH_SECRET_RABBIT_CODE
+			if(resampler_w)
-			if(resampler)
+				delete resampler_w;
 				src_delete((SRC_STATE*)resampler);
 #endif
 			delete worker;
 			delete soxdumper;
 		}
@ -271,12 +340,16 @@ namespace
 		void on_sample(short l, short r)
 		{
-			if(resampler) {
+			if(resampler_w) {
 				if(!have_dumped_frame)
 					return;
 				sbuffer[sbuffer_fill++] = l;
 				sbuffer[sbuffer_fill++] = r;
-				forward_samples(false);
+				if(sbuffer_fill == sbuffer.size()) {
 					resampler_w->sendblock(&sbuffer[0], sbuffer_fill / chans);
 					sbuffer_fill = 0;
 				}
 				soxdumper->sample(l, r);
 				return;
 			}
 			short x[2];
@ -296,7 +369,8 @@ namespace
 		void on_dump_end()
 		{
 			if(worker) {
-				forward_samples(true);
+				if(resampler_w)
 					resampler_w->sendend();
 				worker->request_quit();
 			}
 			if(soxdumper)
@ -312,52 +386,19 @@ namespace
 			return true;
 		}
 		avi_worker* worker;
 		resample_worker* resampler_w;
 	private:
 		void forward_samples(bool eos)
 		{
 			if(!eos && sbuffer_fill < sbuffer.size())
 				return;
 #ifdef WITH_SECRET_RABBIT_CODE
 			double ratio = 1.0 * audio_record_rate * soundrate.second / soundrate.first;
 			SRC_DATA block;
 			src_short_to_float_array(&sbuffer[0], &sbuffer2[0], sbuffer_fill);
 			block.data_in = &sbuffer2[0];
 			block.data_out = &fbuffer2[0];
 			block.input_frames = sbuffer_fill / chans;
 			block.input_frames_used = 0;
 			block.output_frames = fbuffer2.size() / chans;
 			block.output_frames_gen = 0;
 			block.end_of_input = eos ? 1 : 0;
 			block.src_ratio = ratio;
 			int errc = src_process((SRC_STATE*)resampler, &block);
 			if(errc)
 				throw std::runtime_error(std::string("Error using libsamplerate: ") +
 					src_strerror(errc));
 			src_float_to_short_array(&fbuffer2[0], &fbuffer[0], block.output_frames_gen * chans);
 			worker->queue_audio(&fbuffer[0], block.output_frames_gen * chans);
 			if(block.input_frames_used * chans < sbuffer_fill)
 				memmove(&sbuffer[0], &sbuffer[block.output_frames_gen * chans], sbuffer_fill -
 					block.input_frames_used * chans);
 			sbuffer_fill -= block.input_frames_used * chans;
 #endif
 		}
 		sox_dumper* soxdumper;
 		screen<false> dscr;
 		unsigned dcounter;
 		bool have_dumped_frame;
 		std::pair<uint32_t, uint32_t> soundrate;
 		uint32_t audio_record_rate;
 		void* resampler;
 		std::vector<short> sbuffer;
 		std::vector<float> sbuffer2;
 		std::vector<float> fbuffer2;
 		std::vector<short> fbuffer;
 		size_t sbuffer_fill;
 		uint32_t chans;
 	};
 	avi_avsnoop* vid_dumper;
 	void waitfn()
 	{
 		vid_dumper->worker->wait_busy();
@ -449,4 +490,45 @@ namespace
 	adv_avi_dumper::~adv_avi_dumper() throw()
 	{
 	}
 	void resample_worker::entry()
 	{
 		while(1) {
 			wait_workflag();
 			uint32_t work = clear_workflag(~WORKFLAG_QUIT_REQUEST);
 			if(work & (WORKFLAG_QUEUE_FRAME | WORKFLAG_END)) {
 #ifdef WITH_SECRET_RABBIT_CODE
 again:
 				SRC_DATA block;
 				src_short_to_float_array(&buffers[0], &buffers2[0], bufused * nch);
 				block.data_in = &buffers2[0];
 				block.data_out = &buffers3[0];
 				block.input_frames = bufused;
 				block.input_frames_used = 0;
 				block.output_frames = buffers3.size() / nch;
 				block.output_frames_gen = 0;
 				block.end_of_input = (work & WORKFLAG_END) ? 1 : 0;
 				block.src_ratio = ratio;
 				int errc = src_process((SRC_STATE*)resampler, &block);
 				if(errc)
 					throw std::runtime_error(std::string("Error using libsamplerate: ") +
 					src_strerror(errc));
 				src_float_to_short_array(&buffers3[0], &buffers4[0], block.output_frames_gen * nch);
 				vid_dumper->worker->queue_audio(&buffers4[0], block.output_frames_gen * nch);
 				if(block.input_frames_used < bufused)
 					memmove(&buffers[0], &buffers[block.output_frames_gen * nch], (bufused -
 						block.input_frames_used) * nch);
 				bufused -= block.input_frames_used;
 				if(block.output_frames_gen > 0 && work & WORKFLAG_END)
 					goto again;	//Try again to get all the samples.
 #endif
 				clear_workflag(WORKFLAG_END | WORKFLAG_FLUSH | WORKFLAG_QUEUE_FRAME);
 				clear_busy();
 				if(work & WORKFLAG_END)
 					return;
 			}
 			if(work == WORKFLAG_QUIT_REQUEST)
 				break;
 		}
 	}
 }
--- a/src/video/avi/codec.cpp
+++ b/src/video/avi/codec.cpp
@ -22,6 +22,10 @@ avi_video_codec::format::format(uint32_t _width, uint32_t _height, uint32_t _com
 	clr_important = 0;
 }
 void avi_video_codec::send_performance_counters(uint64_t b, uint64_t w)
 {
 }
 avi_audio_codec::format::format(uint16_t tag)
 {
 	max_bytes_per_sec = 200000;
--- a/src/video/avi/codec/video/zmbv.cpp
+++ b/src/video/avi/codec/video/zmbv.cpp
@ -6,6 +6,9 @@
 #include <cerrno>
 #include <stdexcept>
 //The largest possible vector.
 #define MAXIMUM_VECTOR 64
 namespace
 {
 	numeric_setting clvl("avi-zmbv-compression", 0, 9, 7);
@ -13,13 +16,18 @@ namespace
 	numeric_setting bwv("avi-zmbv-blockw", 8, 64, 16);
 	numeric_setting bhv("avi-zmbv-blockh", 8, 64, 16);
 	//Motion vector.
 	struct motion
 	{
 		//X motion (positive is to left), -64...63.
 		int dx;
 		//Y motion (positive it to up), -64...63.
 		int dy;
 		//How bad the vector is. 0 means the vector is perfect (no residual).
 		uint32_t p;
 	};
 	//The main ZMBV decoder state.
 	struct avi_codec_zmbv : public avi_video_codec
 	{
 		avi_codec_zmbv(uint32_t _level, uint32_t maxpframes, uint32_t _bw, uint32_t _bh);
@ -29,181 +37,158 @@ namespace
 		bool ready();
 		avi_packet getpacket();
 	private:
 		//The current pending packet, if any.
 		avi_packet out;
 		//False if there is a pending packet, true if ready to take a frame.
 		bool ready_flag;
 		//The size of supplied frames.
 		unsigned iwidth;
 		unsigned iheight;
 		//The size of written frames.
 		unsigned ewidth;
 		unsigned eheight;
 		//P-frames written since last I-frame.
 		unsigned pframes;
 		//Maximum number of P-frames to write in sequence.
 		unsigned max_pframes;
 		//Compression level to use.
 		unsigned level;
-
+		//Size of one block.
 		//Size of block.
 		uint32_t bw;
 		uint32_t bh;
-		//Entropy estimator table.
+		//Motion vector buffer, one motion vector for each block, in left-to-right, top-to-bottom order.
 		std::vector<uint32_t> entropy_tab;
 		//Temporary scratch memory (one block).
 		std::vector<uint32_t> tmp;
 		//Motion vector buffer.
 		std::vector<motion> mv;
-		//Previous&Current frame.
+		//Pixel buffer (2 full frames and one block).
-		std::vector<uint32_t> current;
+		std::vector<uint32_t> pixbuf;
-		std::vector<uint32_t> prev;
+		//Current frame pointer.
-		//Compression packet buffer and size.
+		uint32_t* current_frame;
-		std::vector<char> diff;
+		//Previous frame pointer.
-		size_t diffsize;
+		uint32_t* prev_frame;
 		//Scratch block pointer.
 		uint32_t* scratch;
 		//Output buffer. Sufficient space to hold both compressed and uncompressed data.
 		std::vector<char> outbuffer;
 		//Output scratch memory.
 		char* oscratch;
 		//The actual output buffer. Pointer, size and ued.
 		char* outbuf;
 		size_t outbuf_size;
 		size_t outbuf_used;
 		//Zlib state.
 		z_stream zstream;
-		//Output packet buffer and size.
+		//Compute penalty for motion vector (dx, dy) on block with upper-left corner at (bx, by).
-		std::vector<char> output;
+		uint32_t mv_penalty(uint32_t bx, uint32_t by, int dx, int dy);
-		size_t output_size;
+		//Do motion detection for block with upper-left corner at (bx, by). M is filled with the resulting
-
+		//motion vector and t is initial guess for the motion vector.
-		//Motion vector penalty.
+		void mv_detect(uint32_t bx, uint32_t by, motion& m, motion t);
-		uint32_t mv_penalty(uint32_t* data, int32_t bx, int32_t by, int dx, int dy);
+		//Serialize movement vectors and furrent frame data to output buffer. If keyframe is true, keyframe is
-		//Do motion detection.
+		//written, otherwise non-keyframe.
-		void mv_detect(uint32_t* data, int32_t bx, int32_t by, motion& m, motion t);
+		void serialize_frame(bool keyframe);
 		//Serialize to difference buffer.
 		void serialize_frame(bool keyframe, uint32_t* data);
 		//Take compression packet buffer and write output packet buffer.
 		void compress_packet(bool keyframe);
 	};
-	void rbound(int32_t x, int32_t w, uint32_t b, int32_t& start, int32_t& offset, int32_t& size)
+	//Compute XOR of blocks.
 	void xor_blocks(uint32_t* target, uint32_t* src1, uint32_t src1x, uint32_t src1y,
 		uint32_t src1w, uint32_t src1h, uint32_t* src2, uint32_t src2x, uint32_t src2y,
 		uint32_t src2w, uint32_t src2h, uint32_t bw, uint32_t bh)
 	{
-		start = x;
+		uint32_t* s1ptr = src1 + src1y * src1w + src1x;
-		offset = 0;
+		uint32_t* s2ptr = src2 + src2y * src2w + src2x;
-		size = b;
+		for(uint32_t y = 0; y < bh; y++) {
-		if(start < 0) {
+			for(uint32_t x = 0; x < bw; x++)
-			offset = -start;
+				target[x] = s1ptr[x] ^ s2ptr[x];
-			start = 0;
+			target += bw;
-			size = b - offset;
+			s1ptr += src1w;
 			s2ptr += src2w;
 		}
 		if(start + size > w)
 			size = w - start;
 		if(size < 0)
 			size = 0;
 		start = x + offset;
 	}
-	void xor_blocks(uint32_t* target, uint32_t* src1, int32_t src1x, int32_t src1y,
+	//Estimate entropy.
-		int32_t src1w, int32_t src1h, uint32_t* src2, int32_t src2x, int32_t src2y,
+	uint32_t entropy(uint32_t* data, uint32_t bw, uint32_t bh)
 		int32_t src2w, int32_t src2h, uint32_t bw, uint32_t bh)
 	{
 		int32_t h_s1start;
 		int32_t h_s1off;
 		int32_t h_s1size;
 		int32_t h_s2start;
 		int32_t h_s2off;
 		int32_t h_s2size;
 		int32_t v_s1start;
 		int32_t v_s1off;
 		int32_t v_s1size;
 		int32_t v_s2start;
 		int32_t v_s2off;
 		int32_t v_s2size;
 		rbound(src1x, src1w, bw, h_s1start, h_s1off, h_s1size);
 		rbound(src2x, src2w, bw, h_s2start, h_s2off, h_s2size);
 		rbound(src1y, src1h, bh, v_s1start, v_s1off, v_s1size);
 		rbound(src2y, src2h, bh, v_s2start, v_s2off, v_s2size);
 		if(h_s1size < bw || v_s1size < bh)
 			memset(target, 0, 4 * bw * bh);
 		uint32_t* t1ptr = target + v_s1off * bh + h_s1off;
 		uint32_t* t2ptr = target + v_s2off * bh + h_s2off;
 		uint32_t* s1ptr = src1 + v_s1start * src1w + h_s1start;
 		uint32_t* s2ptr = src2 + v_s2start * src2w + h_s2start;
 		for(int32_t y = 0; y < v_s1size; y++)
 			memcpy(t1ptr + bw * y, s1ptr + src1w * y, 4 * h_s1size);
 		for(int32_t y = 0; y < v_s2size; y++)
 			for(int32_t x = 0; x < h_s2size; x++)
 				t2ptr[y * bw + x] ^= s2ptr[y * src2w + x];
 	}
 	void entropy_init(std::vector<uint32_t>& mem, uint32_t bw, uint32_t bh)
 	{
 		size_t bytes = 4 * bw * bh;
 		mem.resize(bytes + 1);
 		mem[0] = 0;
 		mem[bytes] = 0;
 		double M0 = log(bytes);
 		double M1 = 700000000.0 / bytes;
 		for(size_t i = 1; i < bytes; i++)
 			mem[i] = M1 * (M0 - log(i));
 	}
 	uint32_t entropy(std::vector<uint32_t>& mem, uint32_t* data)
 	{
 		//Because XORs are essentially random, calculate the number of non-zeroes to ascertain badness.
 		uint8_t* _data = reinterpret_cast<uint8_t*>(data);
 		uint32_t e = 0;
-		size_t imax = mem.size() - 1;
+		size_t imax = 4 * bw * bh;
 		for(size_t i = 0; i < imax; i++)
 			if(_data[i])
 				e++;
 		return e;
 	}
-	uint32_t avi_codec_zmbv::mv_penalty(uint32_t* data, int32_t bx, int32_t by, int dx, int dy)
+	uint32_t avi_codec_zmbv::mv_penalty(uint32_t bx, uint32_t by, int dx, int dy)
 	{
-		xor_blocks(&tmp[0], data, bx, by, ewidth, eheight, &prev[0], bx + dx, by + dy, ewidth, eheight, bw,
+		//Penalty is entropy estimate of resulting block.
-			bh);
+		xor_blocks(scratch, current_frame, bx, by, ewidth + 2 * MAXIMUM_VECTOR, eheight, prev_frame, bx + dx,
-		return entropy(entropy_tab, &tmp[0]);
+			by + dy, ewidth + 2 * MAXIMUM_VECTOR, eheight, bw, bh);
 		return entropy(scratch, bw, bh);
 	}
-	void avi_codec_zmbv::serialize_frame(bool keyframe, uint32_t* data)
+	void avi_codec_zmbv::serialize_frame(bool keyframe)
 	{
-		if(keyframe) {
+		uint32_t nhb, nvb, nb;
-			memcpy(&diff[0], data, 4 * ewidth * eheight);
+		//In_stride/in_offset is in units of words, out_stride is in units of bytes.
-			diffsize = 4 * ewidth * eheight;
+		size_t in_stride = (ewidth + 2 * MAXIMUM_VECTOR);
-			return;
+		size_t in_offset = MAXIMUM_VECTOR * (in_stride + 1);
 		}
 		uint32_t nhb = (ewidth + bw - 1) / bw;
 		uint32_t nvb = (eheight + bh - 1) / bh;
 		uint32_t nb = nhb * nvb;
 		size_t osize = 0;
-		for(size_t i = 0; i < nb; i++) {
+		if(keyframe) {
-			diff[osize++] = (mv[i].dx << 1) | (mv[i].p ? 1 : 0);
+			//Just copy the frame data and compress that.
-			diff[osize++] = (mv[i].dy << 1);
+			for(size_t y = 0; y < eheight; y++)
 				memcpy(oscratch + 4 * ewidth * y, current_frame + in_stride * y + in_offset,
 					4 * ewidth);
 			osize = 4 * ewidth * eheight;
 			goto compress;
 		}
 		//Number of blocks.
 		nhb = (ewidth + bw - 1) / bw;
 		nvb = (eheight + bh - 1) / bh;
 		nb = nhb * nvb;
 		osize = 0;
 		//Serialize the motion vectors.
 		for(size_t i = 0; i < nb; i++) {
 			oscratch[osize++] = (mv[i].dx << 1) | (mv[i].p ? 1 : 0);
 			oscratch[osize++] = (mv[i].dy << 1);
 		}
 		//Pad to multiple of 4 bytes.
 		while(osize % 4)
-			diff[osize++] = 0;
+			oscratch[osize++] = 0;
 		//Serialize the residuals.
 		for(size_t i = 0; i < nb; i++) {
 			if(mv[i].p == 0)
 				continue;
-			int32_t bx = (i % nhb) * bw;
+			uint32_t bx = (i % nhb) * bw + MAXIMUM_VECTOR;
-			int32_t by = (i / nhb) * bh;
+			uint32_t by = (i / nhb) * bh + MAXIMUM_VECTOR;
-			xor_blocks(reinterpret_cast<uint32_t*>(&diff[osize]), data, bx, by, ewidth, eheight, &prev[0],
+			xor_blocks(reinterpret_cast<uint32_t*>(oscratch + osize), current_frame, bx, by, ewidth + 2 *
-				bx + mv[i].dx, by + mv[i].dy, ewidth, eheight, bw, bh);
+				MAXIMUM_VECTOR, eheight, prev_frame, bx + mv[i].dx, by + mv[i].dy, ewidth + 2 *
 				MAXIMUM_VECTOR, eheight, bw, bh);
 			osize += 4 * bw * bh;
 		}
-		diffsize = osize;
+compress:
-	}
+		//Compress the output data.
 		zstream.next_in = reinterpret_cast<uint8_t*>(oscratch);
 		zstream.avail_in = osize;
-	void avi_codec_zmbv::compress_packet(bool keyframe)
+		osize = 0;
-	{
+		outbuf[osize++] = keyframe ? 1 : 0;	//Indicate keyframe/not.
 		size_t osize = 0;
 		output[osize++] = keyframe ? 1 : 0;	//Indicate keyframe/not.
 		if(keyframe) {
-			output[osize++] = 0;		//Version 0.1
+			//Write the keyframe header.
-			output[osize++] = 1;
+			outbuf[osize++] = 0;		//Version 0.1
-			output[osize++] = 1;		//Zlib compression.
+			outbuf[osize++] = 1;
-			output[osize++] = 8;		//32 bit.
+			outbuf[osize++] = 1;		//Zlib compression.
-			output[osize++] = bw;		//Block size.
+			outbuf[osize++] = 8;		//32 bit.
-			output[osize++] = bh;
+			outbuf[osize++] = bw;		//Block size.
 			outbuf[osize++] = bh;
 			deflateReset(&zstream);		//Reset the zlib context.
 		}
-		zstream.next_in = reinterpret_cast<uint8_t*>(&diff[0]);
+		zstream.next_out = reinterpret_cast<uint8_t*>(&outbuf[osize]);
-		zstream.avail_in = diffsize;
+		zstream.avail_out = outbuf_size - osize;
 		zstream.next_out = reinterpret_cast<uint8_t*>(&output[osize]);
 		zstream.avail_out = output.size() - osize;
 		if(deflate(&zstream, Z_SYNC_FLUSH) != Z_OK)
 			throw std::runtime_error("Zlib error while compressing data");
 		if(zstream.avail_in || !zstream.avail_out)
 			throw std::runtime_error("Buffer overrun while compressing data");
-		output_size = output.size() - zstream.avail_out;
+		outbuf_used = outbuf_size - zstream.avail_out;
 	}
 	//If candidate is better than best, update best. Returns true if ideal has been reached, else false.
 	bool update_best(motion& best, motion& candidate)
 	{
 		if(candidate.p < best.p)
@ -211,28 +196,31 @@ namespace
 		return (best.p == 0);
 	}
-	void avi_codec_zmbv::mv_detect(uint32_t* data, int32_t bx, int32_t by, motion& m, motion t)
+	void avi_codec_zmbv::mv_detect(uint32_t bx, uint32_t by, motion& m, motion t)
 	{
 		//Try the suggested vector.
 		motion c;
-		m.p = mv_penalty(data, bx, by, m.dx = t.dx, m.dy = t.dy);
+		m.p = mv_penalty(bx, by, m.dx = t.dx, m.dy = t.dy);
 		if(!m.p)
 			return;
-		c.p = mv_penalty(data, bx, by, c.dx = 0, c.dy = 0);
+		//Try the zero vector.
 		c.p = mv_penalty(bx, by, c.dx = 0, c.dy = 0);
 		if(update_best(m, c))
 			return;
 		//Try cardinal vectors up to 9 units.
 		for(int s = 1; s < 10; s++) {
 			if(s == 0)
 				continue;
-			c.p = mv_penalty(data, bx, by, c.dx = -s, c.dy = 0);
+			c.p = mv_penalty(bx, by, c.dx = -s, c.dy = 0);
 			if(update_best(m, c))
 				return;
-			c.p = mv_penalty(data, bx, by, c.dx = 0, c.dy = -s);
+			c.p = mv_penalty(bx, by, c.dx = 0, c.dy = -s);
 			if(update_best(m, c))
 				return;
-			c.p = mv_penalty(data, bx, by, c.dx = s, c.dy = 0);
+			c.p = mv_penalty(bx, by, c.dx = s, c.dy = 0);
 			if(update_best(m, c))
 				return;
-			c.p = mv_penalty(data, bx, by, c.dx = 0, c.dy = s);
+			c.p = mv_penalty(bx, by, c.dx = 0, c.dy = s);
 			if(update_best(m, c))
 				return;
 		}
@ -271,19 +259,23 @@ namespace
 		ready_flag = true;
 		avi_video_codec::format fmt(ewidth, eheight, 0x56424D5A, 24);
-		entropy_init(entropy_tab, bw, bh);
+		pixbuf.resize(2 * (ewidth + 2 * MAXIMUM_VECTOR) * (eheight + 2 * MAXIMUM_VECTOR) + bw * bh);
-		prev.resize(4 * ewidth * eheight);
+		current_frame = &pixbuf[0];
-		current.resize(4 * ewidth * eheight);
+		prev_frame = &pixbuf[(ewidth + 2 * MAXIMUM_VECTOR) * (eheight + 2 * MAXIMUM_VECTOR)];
-		tmp.resize(4 * bw * bh);
+		scratch = &pixbuf[2 * (ewidth + 2 * MAXIMUM_VECTOR) * (eheight + 2 * MAXIMUM_VECTOR)];
 		mv.resize(((ewidth + bw - 1) / bw) * ((eheight + bh - 1) / bh));
-		diff.resize(4 * ((mv.size() + 1) / 2) + 4 * ewidth * eheight);
+		size_t maxdiff = 4 * ((mv.size() + 1) / 2) + 4 * ewidth * eheight;
-		output.resize(deflateBound(&zstream, diff.size()) + 128);
+		outbuf_size = deflateBound(&zstream, maxdiff) + 128;
 		outbuffer.resize(maxdiff + outbuf_size);
 		oscratch = &outbuffer[outbuf_size];
 		outbuf = &outbuffer[0];
 		memset(&pixbuf[0], 0, 4 * pixbuf.size());
 		return fmt;
 	}
 	void avi_codec_zmbv::frame(uint32_t* data)
 	{
-		bool buffer_loaded = false;
+		//Keyframe/not determination.
 		bool keyframe = false;
 		if(pframes >= max_pframes) {
 			keyframe = true;
@ -293,25 +285,34 @@ namespace
 		//If bigendian, swap.
 		short magic = 258;
 		size_t frameoffset = MAXIMUM_VECTOR * (ewidth + 2 * MAXIMUM_VECTOR + 1);
 		size_t framestride = ewidth + 2 * MAXIMUM_VECTOR;
 		if(reinterpret_cast<uint8_t*>(&magic)[0] == 1)
-			for(size_t i = 0; i < ewidth * eheight; i++) {
+			for(size_t y = 0; y < iheight; y++) {
-				uint8_t* _current = reinterpret_cast<uint8_t*>(&current[0]);
+				uint8_t* _current = reinterpret_cast<uint8_t*>(current_frame + frameoffset +
-				uint8_t* _data = reinterpret_cast<uint8_t*>(&data[0]);
+					framestride * y);
-				_current[4 * i + 0] = _data[4 * i + 3];
+				uint8_t* _data = reinterpret_cast<uint8_t*>(&data[iwidth * y]);
-				_current[4 * i + 1] = _data[4 * i + 2];
+				for(size_t i = 0; i < iwidth; i++) {
-				_current[4 * i + 2] = _data[4 * i + 1];
+					_current[4 * i + 0] = _data[4 * i + 3];
-				_current[4 * i + 3] = _data[4 * i + 0];
+					_current[4 * i + 1] = _data[4 * i + 2];
 					_current[4 * i + 2] = _data[4 * i + 1];
 					_current[4 * i + 3] = _data[4 * i + 0];
 				}
 			}
 		else
-			for(size_t i = 0; i < ewidth * eheight; i++) {
+			for(size_t y = 0; y < iheight; y++) {
-				uint8_t* _current = reinterpret_cast<uint8_t*>(&current[0]);
+				uint8_t* _current = reinterpret_cast<uint8_t*>(current_frame + frameoffset +
-				uint8_t* _data = reinterpret_cast<uint8_t*>(&data[0]);
+					framestride * y);
-				_current[4 * i + 2] = _data[4 * i + 0];
+				uint8_t* _data = reinterpret_cast<uint8_t*>(&data[iwidth * y]);
-				_current[4 * i + 1] = _data[4 * i + 1];
+				for(size_t i = 0; i < iwidth; i++) {
-				_current[4 * i + 0] = _data[4 * i + 2];
+					_current[4 * i + 2] = _data[4 * i + 0];
-				_current[4 * i + 3] = _data[4 * i + 3];
+					_current[4 * i + 1] = _data[4 * i + 1];
 					_current[4 * i + 0] = _data[4 * i + 2];
 					_current[4 * i + 3] = _data[4 * i + 3];
 				}
 			}
 		//Estimate motion vectors for all blocks if non-keyframe.
 		uint32_t nhb = (ewidth + bw - 1) / bw;
 		if(!keyframe) {
 			motion t;
@ -319,16 +320,16 @@ namespace
 			t.dy = 0;
 			t.p = 0;
 			for(size_t i = 0; i < mv.size(); i++) {
-				mv_detect(&current[0], (i % nhb) * bw, (i / nhb) * bh, mv[i], t);
+				mv_detect((i % nhb) * bw + MAXIMUM_VECTOR, (i / nhb) * bh + MAXIMUM_VECTOR, mv[i], t);
 				t = mv[i];
 			}
 		}
-		serialize_frame(keyframe, &current[0]);
+		//Serialize and output.
-		compress_packet(keyframe);
+		serialize_frame(keyframe);
-		memcpy(&prev[0], &current[0], 4 * ewidth * eheight);
+		std::swap(current_frame, prev_frame);
-		out.payload.resize(output_size);
+		out.payload.resize(outbuf_used);
-		memcpy(&out.payload[0], &output[0], output_size);
+		memcpy(&out.payload[0], outbuf, outbuf_used);
 		out.typecode = 0x6264;		//Not exactly correct according to specs...
 		out.hidden = false;
 		out.indexflags = keyframe ? 0x10 : 0;
@ -346,7 +347,7 @@ namespace
 		return out;
 	}
-
+	//ZMBV encoder factory object.
 	avi_video_codec_type rgb("zmbv", "Zip Motion Blocks Video codec",
 		[]() -> avi_video_codec* { return new avi_codec_zmbv(clvl, kint, bwv, bhv);});
 }