From 1a8cfbc9900056411a77dcba3e42d15b1e4ddab5 Mon Sep 17 00:00:00 2001 From: Ilari Liusvaara Date: Tue, 24 Apr 2012 00:38:53 +0300 Subject: [PATCH 1/7] AVI ZMBV: Lots of refactoring --- src/video/avi/codec/video/zmbv.cpp | 246 ++++++++++++++++------------- 1 file changed, 137 insertions(+), 109 deletions(-) diff --git a/src/video/avi/codec/video/zmbv.cpp b/src/video/avi/codec/video/zmbv.cpp index 3c520473..db136809 100644 --- a/src/video/avi/codec/video/zmbv.cpp +++ b/src/video/avi/codec/video/zmbv.cpp @@ -13,13 +13,18 @@ namespace numeric_setting bwv("avi-zmbv-blockw", 8, 64, 16); numeric_setting bhv("avi-zmbv-blockh", 8, 64, 16); + //Motion vector. struct motion { + //X motion (positive is to left), -64...63. int dx; + //Y motion (positive it to up), -64...63. int dy; + //How bad the vector is. 0 means the vector is perfect (no residual). uint32_t p; }; + //The main ZMBV decoder state. struct avi_codec_zmbv : public avi_video_codec { avi_codec_zmbv(uint32_t _level, uint32_t maxpframes, uint32_t _bw, uint32_t _bh); @@ -29,46 +34,58 @@ namespace bool ready(); avi_packet getpacket(); private: + //The current pending packet, if any. avi_packet out; + //False if there is a pending packet, true if ready to take a frame. bool ready_flag; + //The size of supplied frames. unsigned iwidth; unsigned iheight; + //The size of written frames. unsigned ewidth; unsigned eheight; + //P-frames written since last I-frame. unsigned pframes; + //Maximum number of P-frames to write in sequence. unsigned max_pframes; + //Compression level to use. unsigned level; - - //Size of block. + //Size of one block. uint32_t bw; uint32_t bh; - //Entropy estimator table. - std::vector entropy_tab; - //Temporary scratch memory (one block). - std::vector tmp; - //Motion vector buffer. + //Motion vector buffer, one motion vector for each block, in left-to-right, top-to-bottom order. std::vector mv; - //Previous&Current frame. - std::vector current; - std::vector prev; - //Compression packet buffer and size. - std::vector diff; - size_t diffsize; + //Pixel buffer (2 full frames and one block). + std::vector pixbuf; + //Current frame pointer. + uint32_t* current_frame; + //Previous frame pointer. + uint32_t* prev_frame; + //Scratch block pointer. + uint32_t* scratch; + //Output buffer. Sufficient space to hold both compressed and uncompressed data. + std::vector outbuffer; + //Output scratch memory. + char* oscratch; + //The actual output buffer. Pointer, size and ued. + char* outbuf; + size_t outbuf_size; + size_t outbuf_used; + //Zlib state. z_stream zstream; - //Output packet buffer and size. - std::vector output; - size_t output_size; - //Motion vector penalty. - uint32_t mv_penalty(uint32_t* data, int32_t bx, int32_t by, int dx, int dy); - //Do motion detection. - void mv_detect(uint32_t* data, int32_t bx, int32_t by, motion& m, motion t); - //Serialize to difference buffer. - void serialize_frame(bool keyframe, uint32_t* data); - //Take compression packet buffer and write output packet buffer. - void compress_packet(bool keyframe); + //Compute penalty for motion vector (dx, dy) on block with upper-left corner at (bx, by). + uint32_t mv_penalty(int32_t bx, int32_t by, int dx, int dy); + //Do motion detection for block with upper-left corner at (bx, by). M is filled with the resulting + //motion vector and t is initial guess for the motion vector. + void mv_detect(int32_t bx, int32_t by, motion& m, motion t); + //Serialize movement vectors and furrent frame data to output buffer. If keyframe is true, keyframe is + //written, otherwise non-keyframe. + void serialize_frame(bool keyframe); }; + //Intersect the range [x, x+b) with [0, w). start is where the range starts, size is size of range, + //and offset is number of numbers clipped from low bound. void rbound(int32_t x, int32_t w, uint32_t b, int32_t& start, int32_t& offset, int32_t& size) { start = x; @@ -86,6 +103,7 @@ namespace start = x + offset; } + //Compute XOR of blocks. void xor_blocks(uint32_t* target, uint32_t* src1, int32_t src1x, int32_t src1y, int32_t src1w, int32_t src1h, uint32_t* src2, int32_t src2x, int32_t src2y, int32_t src2w, int32_t src2h, uint32_t bw, uint32_t bh) @@ -121,89 +139,87 @@ namespace t2ptr[y * bw + x] ^= s2ptr[y * src2w + x]; } - void entropy_init(std::vector& mem, uint32_t bw, uint32_t bh) - { - size_t bytes = 4 * bw * bh; - mem.resize(bytes + 1); - mem[0] = 0; - mem[bytes] = 0; - double M0 = log(bytes); - double M1 = 700000000.0 / bytes; - for(size_t i = 1; i < bytes; i++) - mem[i] = M1 * (M0 - log(i)); - } - - uint32_t entropy(std::vector& mem, uint32_t* data) + //Estimate entropy. + uint32_t entropy(uint32_t* data, uint32_t bw, uint32_t bh) { + //Because XORs are essentially random, calculate the number of non-zeroes to ascertain badness. uint8_t* _data = reinterpret_cast(data); uint32_t e = 0; - size_t imax = mem.size() - 1; + size_t imax = 4 * bw * bh; for(size_t i = 0; i < imax; i++) if(_data[i]) e++; return e; } - uint32_t avi_codec_zmbv::mv_penalty(uint32_t* data, int32_t bx, int32_t by, int dx, int dy) + uint32_t avi_codec_zmbv::mv_penalty(int32_t bx, int32_t by, int dx, int dy) { - xor_blocks(&tmp[0], data, bx, by, ewidth, eheight, &prev[0], bx + dx, by + dy, ewidth, eheight, bw, - bh); - return entropy(entropy_tab, &tmp[0]); + //Penalty is entropy estimate of resulting block. + xor_blocks(scratch, current_frame, bx, by, ewidth, eheight, prev_frame, bx + dx, by + dy, ewidth, + eheight, bw, bh); + return entropy(scratch, bw, bh); } - void avi_codec_zmbv::serialize_frame(bool keyframe, uint32_t* data) + void avi_codec_zmbv::serialize_frame(bool keyframe) { - if(keyframe) { - memcpy(&diff[0], data, 4 * ewidth * eheight); - diffsize = 4 * ewidth * eheight; - return; - } - uint32_t nhb = (ewidth + bw - 1) / bw; - uint32_t nvb = (eheight + bh - 1) / bh; - uint32_t nb = nhb * nvb; + uint32_t nhb, nvb, nb; size_t osize = 0; - for(size_t i = 0; i < nb; i++) { - diff[osize++] = (mv[i].dx << 1) | (mv[i].p ? 1 : 0); - diff[osize++] = (mv[i].dy << 1); + if(keyframe) { + //Just copy the frame data and compress that. + memcpy(oscratch, current_frame, 4 * ewidth * eheight); + osize = 4 * ewidth * eheight; + goto compress; } + //Number of blocks. + nhb = (ewidth + bw - 1) / bw; + nvb = (eheight + bh - 1) / bh; + nb = nhb * nvb; + osize = 0; + //Serialize the motion vectors. + for(size_t i = 0; i < nb; i++) { + oscratch[osize++] = (mv[i].dx << 1) | (mv[i].p ? 1 : 0); + oscratch[osize++] = (mv[i].dy << 1); + } + //Pad to multiple of 4 bytes. while(osize % 4) - diff[osize++] = 0; + oscratch[osize++] = 0; + //Serialize the residuals. for(size_t i = 0; i < nb; i++) { if(mv[i].p == 0) continue; int32_t bx = (i % nhb) * bw; int32_t by = (i / nhb) * bh; - xor_blocks(reinterpret_cast(&diff[osize]), data, bx, by, ewidth, eheight, &prev[0], - bx + mv[i].dx, by + mv[i].dy, ewidth, eheight, bw, bh); + xor_blocks(reinterpret_cast(oscratch + osize), current_frame, bx, by, ewidth, + eheight, prev_frame, bx + mv[i].dx, by + mv[i].dy, ewidth, eheight, bw, bh); osize += 4 * bw * bh; } - diffsize = osize; - } +compress: + //Compress the output data. + zstream.next_in = reinterpret_cast(oscratch); + zstream.avail_in = osize; - void avi_codec_zmbv::compress_packet(bool keyframe) - { - size_t osize = 0; - output[osize++] = keyframe ? 1 : 0; //Indicate keyframe/not. + osize = 0; + outbuf[osize++] = keyframe ? 1 : 0; //Indicate keyframe/not. if(keyframe) { - output[osize++] = 0; //Version 0.1 - output[osize++] = 1; - output[osize++] = 1; //Zlib compression. - output[osize++] = 8; //32 bit. - output[osize++] = bw; //Block size. - output[osize++] = bh; + //Write the keyframe header. + outbuf[osize++] = 0; //Version 0.1 + outbuf[osize++] = 1; + outbuf[osize++] = 1; //Zlib compression. + outbuf[osize++] = 8; //32 bit. + outbuf[osize++] = bw; //Block size. + outbuf[osize++] = bh; deflateReset(&zstream); //Reset the zlib context. } - zstream.next_in = reinterpret_cast(&diff[0]); - zstream.avail_in = diffsize; - zstream.next_out = reinterpret_cast(&output[osize]); - zstream.avail_out = output.size() - osize; + zstream.next_out = reinterpret_cast(&outbuf[osize]); + zstream.avail_out = outbuf_size - osize; if(deflate(&zstream, Z_SYNC_FLUSH) != Z_OK) throw std::runtime_error("Zlib error while compressing data"); if(zstream.avail_in || !zstream.avail_out) throw std::runtime_error("Buffer overrun while compressing data"); - output_size = output.size() - zstream.avail_out; + outbuf_used = outbuf_size - zstream.avail_out; } + //If candidate is better than best, update best. Returns true if ideal has been reached, else false. bool update_best(motion& best, motion& candidate) { if(candidate.p < best.p) @@ -211,28 +227,31 @@ namespace return (best.p == 0); } - void avi_codec_zmbv::mv_detect(uint32_t* data, int32_t bx, int32_t by, motion& m, motion t) + void avi_codec_zmbv::mv_detect(int32_t bx, int32_t by, motion& m, motion t) { + //Try the suggested vector. motion c; - m.p = mv_penalty(data, bx, by, m.dx = t.dx, m.dy = t.dy); + m.p = mv_penalty(bx, by, m.dx = t.dx, m.dy = t.dy); if(!m.p) return; - c.p = mv_penalty(data, bx, by, c.dx = 0, c.dy = 0); + //Try the zero vector. + c.p = mv_penalty(bx, by, c.dx = 0, c.dy = 0); if(update_best(m, c)) return; + //Try cardinal vectors up to 9 units. for(int s = 1; s < 10; s++) { if(s == 0) continue; - c.p = mv_penalty(data, bx, by, c.dx = -s, c.dy = 0); + c.p = mv_penalty(bx, by, c.dx = -s, c.dy = 0); if(update_best(m, c)) return; - c.p = mv_penalty(data, bx, by, c.dx = 0, c.dy = -s); + c.p = mv_penalty(bx, by, c.dx = 0, c.dy = -s); if(update_best(m, c)) return; - c.p = mv_penalty(data, bx, by, c.dx = s, c.dy = 0); + c.p = mv_penalty(bx, by, c.dx = s, c.dy = 0); if(update_best(m, c)) return; - c.p = mv_penalty(data, bx, by, c.dx = 0, c.dy = s); + c.p = mv_penalty(bx, by, c.dx = 0, c.dy = s); if(update_best(m, c)) return; } @@ -271,19 +290,23 @@ namespace ready_flag = true; avi_video_codec::format fmt(ewidth, eheight, 0x56424D5A, 24); - entropy_init(entropy_tab, bw, bh); - prev.resize(4 * ewidth * eheight); - current.resize(4 * ewidth * eheight); - tmp.resize(4 * bw * bh); + pixbuf.resize(2 * ewidth * eheight + bw * bh); + current_frame = &pixbuf[0]; + prev_frame = &pixbuf[ewidth * eheight]; + scratch = &pixbuf[2 * ewidth * eheight]; mv.resize(((ewidth + bw - 1) / bw) * ((eheight + bh - 1) / bh)); - diff.resize(4 * ((mv.size() + 1) / 2) + 4 * ewidth * eheight); - output.resize(deflateBound(&zstream, diff.size()) + 128); + size_t maxdiff = 4 * ((mv.size() + 1) / 2) + 4 * ewidth * eheight; + outbuf_size = deflateBound(&zstream, maxdiff) + 128; + outbuffer.resize(maxdiff + outbuf_size); + oscratch = &outbuffer[outbuf_size]; + outbuf = &outbuffer[0]; + memset(&pixbuf[0], 0, 4 * pixbuf.size()); return fmt; } void avi_codec_zmbv::frame(uint32_t* data) { - bool buffer_loaded = false; + //Keyframe/not determination. bool keyframe = false; if(pframes >= max_pframes) { keyframe = true; @@ -294,24 +317,29 @@ namespace //If bigendian, swap. short magic = 258; if(reinterpret_cast(&magic)[0] == 1) - for(size_t i = 0; i < ewidth * eheight; i++) { - uint8_t* _current = reinterpret_cast(¤t[0]); - uint8_t* _data = reinterpret_cast(&data[0]); - _current[4 * i + 0] = _data[4 * i + 3]; - _current[4 * i + 1] = _data[4 * i + 2]; - _current[4 * i + 2] = _data[4 * i + 1]; - _current[4 * i + 3] = _data[4 * i + 0]; + for(size_t y = 0; y < iheight; y++) { + uint8_t* _current = reinterpret_cast(current_frame + ewidth * y); + uint8_t* _data = reinterpret_cast(&data[iwidth * y]); + for(size_t i = 0; i < iwidth; i++) { + _current[4 * i + 0] = _data[4 * i + 3]; + _current[4 * i + 1] = _data[4 * i + 2]; + _current[4 * i + 2] = _data[4 * i + 1]; + _current[4 * i + 3] = _data[4 * i + 0]; + } } else - for(size_t i = 0; i < ewidth * eheight; i++) { - uint8_t* _current = reinterpret_cast(¤t[0]); - uint8_t* _data = reinterpret_cast(&data[0]); - _current[4 * i + 2] = _data[4 * i + 0]; - _current[4 * i + 1] = _data[4 * i + 1]; - _current[4 * i + 0] = _data[4 * i + 2]; - _current[4 * i + 3] = _data[4 * i + 3]; + for(size_t y = 0; y < iheight; y++) { + uint8_t* _current = reinterpret_cast(current_frame + ewidth * y); + uint8_t* _data = reinterpret_cast(&data[iwidth * y]); + for(size_t i = 0; i < iwidth; i++) { + _current[4 * i + 2] = _data[4 * i + 0]; + _current[4 * i + 1] = _data[4 * i + 1]; + _current[4 * i + 0] = _data[4 * i + 2]; + _current[4 * i + 3] = _data[4 * i + 3]; + } } + //Estimate motion vectors for all blocks if non-keyframe. uint32_t nhb = (ewidth + bw - 1) / bw; if(!keyframe) { motion t; @@ -319,16 +347,16 @@ namespace t.dy = 0; t.p = 0; for(size_t i = 0; i < mv.size(); i++) { - mv_detect(¤t[0], (i % nhb) * bw, (i / nhb) * bh, mv[i], t); + mv_detect((i % nhb) * bw, (i / nhb) * bh, mv[i], t); t = mv[i]; } } - serialize_frame(keyframe, ¤t[0]); - compress_packet(keyframe); - memcpy(&prev[0], ¤t[0], 4 * ewidth * eheight); - out.payload.resize(output_size); - memcpy(&out.payload[0], &output[0], output_size); + //Serialize and output. + serialize_frame(keyframe); + std::swap(current_frame, prev_frame); + out.payload.resize(outbuf_used); + memcpy(&out.payload[0], outbuf, outbuf_used); out.typecode = 0x6264; //Not exactly correct according to specs... out.hidden = false; out.indexflags = keyframe ? 0x10 : 0; @@ -346,7 +374,7 @@ namespace return out; } - + //ZMBV encoder factory object. avi_video_codec_type rgb("zmbv", "Zip Motion Blocks Video codec", []() -> avi_video_codec* { return new avi_codec_zmbv(clvl, kint, bwv, bhv);}); } From bad7915dfb5275c0aed3ff835cf4f53e6c7378e2 Mon Sep 17 00:00:00 2001 From: Ilari Liusvaara Date: Tue, 24 Apr 2012 09:52:00 +0300 Subject: [PATCH 2/7] AVI dumper: Performance counters These are needed for dynamic compression level adjustment (not implemented yet for any codec). --- include/library/workthread.hpp | 10 ++++++++++ include/video/avi/codec.hpp | 7 +++++++ src/library/workthread.cpp | 35 ++++++++++++++++++++++++++++++---- src/video/avi.cpp | 4 ++++ src/video/avi/codec.cpp | 4 ++++ 5 files changed, 56 insertions(+), 4 deletions(-) diff --git a/include/library/workthread.hpp b/include/library/workthread.hpp index bcce5b47..736bf3e0 100644 --- a/include/library/workthread.hpp +++ b/include/library/workthread.hpp @@ -85,6 +85,14 @@ public: * Note: Don't call from outside workthread code. */ int operator()(int dummy); +/** + * Get wait counters. + * + * Retrns: Two-element tuple. + * - The first element is the amount of microseconds wait_busy() has waited. + * - The second element is the amount of microseconds wait_workflag() has waited. + */ + std::pair get_wait_count(); protected: /** * Thread entrypoint. @@ -106,6 +114,8 @@ private: volatile bool busy; volatile bool exception_caught; volatile bool exception_oom; + volatile uint64_t waitamt_busy; + volatile uint64_t waitamt_work; std::string exception_text; }; diff --git a/include/video/avi/codec.hpp b/include/video/avi/codec.hpp index 8abc3c5e..c8046e08 100644 --- a/include/video/avi/codec.hpp +++ b/include/video/avi/codec.hpp @@ -94,6 +94,13 @@ struct avi_video_codec * Returns: The packet. */ virtual avi_packet getpacket() = 0; +/** + * Send performance counters. + * + * Parameter b: Amount of busywaiting by emulator. + * Parameter w: Amount of workwaiting by dumper. + */ + virtual void send_performance_counters(uint64_t b, uint64_t w); }; /** diff --git a/src/library/workthread.cpp b/src/library/workthread.cpp index 6afab0a2..ab3aeb91 100644 --- a/src/library/workthread.cpp +++ b/src/library/workthread.cpp @@ -1,5 +1,16 @@ #include "library/workthread.hpp" #include +#include + +namespace +{ + uint64_t ticks() + { + struct timeval tv; + gettimeofday(&tv, NULL); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; + } +} struct worker_thread_reflector { @@ -15,6 +26,8 @@ worker_thread::worker_thread() reflector = NULL; workflag = 0; busy = false; + waitamt_busy = 0; + waitamt_work = 0; exception_caught = false; exception_oom = false; joined = false; @@ -58,8 +71,12 @@ void worker_thread::clear_busy() void worker_thread::wait_busy() { umutex_class h(mutex); - while(busy) - condition.wait(h); + if(busy) { + uint64_t tmp = ticks(); + while(busy) + condition.wait(h); + waitamt_busy += (ticks() - tmp); + } } void worker_thread::rethrow() @@ -90,11 +107,21 @@ uint32_t worker_thread::clear_workflag(uint32_t flag) uint32_t worker_thread::wait_workflag() { umutex_class h(mutex); - while(!workflag) - condition.wait(h); + if(!workflag) { + uint64_t tmp = ticks(); + while(!workflag) + condition.wait(h); + waitamt_work += (ticks() - tmp); + } return workflag; } +std::pair worker_thread::get_wait_count() +{ + umutex_class h(mutex); + return std::make_pair(waitamt_busy, waitamt_work); +} + int worker_thread::operator()(int dummy) { try { diff --git a/src/video/avi.cpp b/src/video/avi.cpp index 297447f3..632bbe5c 100644 --- a/src/video/avi.cpp +++ b/src/video/avi.cpp @@ -127,6 +127,7 @@ namespace uint32_t segframes; uint32_t max_segframes; bool closed; + avi_video_codec* ivcodec; }; #define WORKFLAG_QUEUE_FRAME 1 @@ -137,6 +138,7 @@ namespace avi_worker::avi_worker(const struct avi_info& info) : aviout(info.prefix, *info.vcodec, *info.acodec, info.sample_rate, info.audio_chans) { + ivcodec = info.vcodec; segframes = 0; max_segframes = info.max_frames; fire(); @@ -183,6 +185,8 @@ namespace f.force_break = (segframes == max_segframes && max_segframes > 0); if(f.force_break) segframes = 0; + auto wc = get_wait_count(); + ivcodec->send_performance_counters(wc.first, wc.second); memcpy(&f.data[0], frame, 4 * frame_width * frame_height); frame = NULL; clear_workflag(WORKFLAG_QUEUE_FRAME); diff --git a/src/video/avi/codec.cpp b/src/video/avi/codec.cpp index 00660597..e8bb1527 100644 --- a/src/video/avi/codec.cpp +++ b/src/video/avi/codec.cpp @@ -22,6 +22,10 @@ avi_video_codec::format::format(uint32_t _width, uint32_t _height, uint32_t _com clr_important = 0; } +void avi_video_codec::send_performance_counters(uint64_t b, uint64_t w) +{ +} + avi_audio_codec::format::format(uint16_t tag) { max_bytes_per_sec = 200000; From f4f523c272bb4d862959af32d96c9ab38626c3cf Mon Sep 17 00:00:00 2001 From: Ilari Liusvaara Date: Tue, 24 Apr 2012 14:37:13 +0300 Subject: [PATCH 3/7] lsnes-dumpavi: Change order of dumper startup and lua startup This is so that lua script can change settings. --- src/util/lsnes-dumpavi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/lsnes-dumpavi.cpp b/src/util/lsnes-dumpavi.cpp index c0e2dd19..c37fbfc2 100644 --- a/src/util/lsnes-dumpavi.cpp +++ b/src/util/lsnes-dumpavi.cpp @@ -306,8 +306,8 @@ int main(int argc, char** argv) our_rom = &r; our_rom->region = gtype::toromregion(movie.gametype); our_rom->load(); - dumper_startup(dumper, mode, prefix, length); startup_lua_scripts(cmdline); + dumper_startup(dumper, mode, prefix, length); main_loop(r, movie, true); } catch(std::bad_alloc& e) { OOM_panic(); From 8e7e6cd1682929f67f6ac7056bc44b27c045e1ba Mon Sep 17 00:00:00 2001 From: Ilari Liusvaara Date: Tue, 24 Apr 2012 14:44:54 +0300 Subject: [PATCH 4/7] AVI dumper: Fix secondary audio in mode 4 --- src/video/avi.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video/avi.cpp b/src/video/avi.cpp index 632bbe5c..8667128a 100644 --- a/src/video/avi.cpp +++ b/src/video/avi.cpp @@ -281,6 +281,7 @@ namespace sbuffer[sbuffer_fill++] = l; sbuffer[sbuffer_fill++] = r; forward_samples(false); + soxdumper->sample(l, r); return; } short x[2]; From c49f305892b3ff902a28a4794f6dd1c61e55dadf Mon Sep 17 00:00:00 2001 From: Ilari Liusvaara Date: Tue, 24 Apr 2012 15:05:29 +0300 Subject: [PATCH 5/7] AVI dumper: Do high-quality audio resampling in dedicated thread Gives faster dumping in mode 4 for quadcore. --- src/video/avi.cpp | 189 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 133 insertions(+), 56 deletions(-) diff --git a/src/video/avi.cpp b/src/video/avi.cpp index 8667128a..2e30e9be 100644 --- a/src/video/avi.cpp +++ b/src/video/avi.cpp @@ -25,6 +25,9 @@ namespace { + class avi_avsnoop; + avi_avsnoop* vid_dumper; + uint32_t rates[] = {8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000, 64000, 88200, 96000, 128000, 176400, 192000}; @@ -111,6 +114,24 @@ namespace uint32_t max_frames; }; + struct resample_worker : public worker_thread + { + resample_worker(double _ratio, uint32_t _nch); + ~resample_worker(); + void entry(); + void sendblock(short* block, size_t frames); + void sendend(); + private: + std::vector buffers; + std::vector buffers2; + std::vector buffers3; + std::vector buffers4; + size_t bufused; + double ratio; + uint32_t nch; + void* resampler; + }; + struct avi_worker : public worker_thread { avi_worker(const struct avi_info& info); @@ -134,7 +155,6 @@ namespace #define WORKFLAG_FLUSH 2 #define WORKFLAG_END 4 - avi_worker::avi_worker(const struct avi_info& info) : aviout(info.prefix, *info.vcodec, *info.acodec, info.sample_rate, info.audio_chans) { @@ -212,6 +232,64 @@ namespace } } + resample_worker::resample_worker(double _ratio, uint32_t _nch) + { + ratio = _ratio; + nch = _nch; + buffers.resize(RESAMPLE_BUFFER * nch); + buffers2.resize(RESAMPLE_BUFFER * nch); + buffers3.resize((RESAMPLE_BUFFER * nch * ratio) + 128 * nch); + buffers4.resize((RESAMPLE_BUFFER * nch * ratio) + 128 * nch); + bufused = 0; +#ifdef WITH_SECRET_RABBIT_CODE + int errc = 0; + resampler = src_new(SRC_SINC_BEST_QUALITY, nch, &errc); + if(errc) + throw std::runtime_error(std::string("Error initing libsamplerate: ") + + src_strerror(errc)); +#else + throw std::runtime_error("HQ sample rate conversion not available"); +#endif + fire(); + } + + resample_worker::~resample_worker() + { +#ifdef WITH_SECRET_RABBIT_CODE + src_delete((SRC_STATE*)resampler); +#endif + } + + void resample_worker::sendend() + { + rethrow(); + set_workflag(WORKFLAG_END); + request_quit(); + } + + void resample_worker::sendblock(short* block, size_t frames) + { +again: + rethrow(); + wait_busy(); + if(bufused + frames < RESAMPLE_BUFFER) { + memcpy(&buffers[bufused * nch], block, 2 * nch * frames); + bufused += frames; + block += (frames * nch); + frames = 0; + } else if(bufused < RESAMPLE_BUFFER) { + size_t processable = RESAMPLE_BUFFER - bufused; + memcpy(&buffers[bufused * nch], block, 2 * nch * processable); + block += (processable * nch); + frames -= processable; + bufused = RESAMPLE_BUFFER; + } + set_busy(); + set_workflag(WORKFLAG_QUEUE_FRAME); + if(frames > 0) + goto again; + } + void waitfn(); class avi_avsnoop : public information_dispatch @@ -230,32 +308,19 @@ namespace soundrate.second, 2); dcounter = 0; have_dumped_frame = false; - resampler = NULL; + resampler_w = NULL; if(soundrate_setting == 4) { double ratio = 1.0 * audio_record_rate * soundrate.second / soundrate.first; - sbuffer.resize(RESAMPLE_BUFFER * chans); - sbuffer2.resize(RESAMPLE_BUFFER * chans); - fbuffer.resize((RESAMPLE_BUFFER * ratio + 128) * chans + 128); - fbuffer2.resize((RESAMPLE_BUFFER * ratio + 128) * chans + 128); sbuffer_fill = 0; -#ifdef WITH_SECRET_RABBIT_CODE - int errc = 0; - resampler = src_new(SRC_SINC_BEST_QUALITY, info.audio_chans, &errc); - if(errc) - throw std::runtime_error(std::string("Error initing libsamplerate: ") + - src_strerror(errc)); -#else - throw std::runtime_error("HQ sample rate conversion not available"); -#endif + sbuffer.resize(RESAMPLE_BUFFER * chans); + resampler_w = new resample_worker(ratio, chans); } } ~avi_avsnoop() throw() { -#ifdef WITH_SECRET_RABBIT_CODE - if(resampler) - src_delete((SRC_STATE*)resampler); -#endif + if(resampler_w) + delete resampler_w; delete worker; delete soxdumper; } @@ -275,12 +340,15 @@ namespace void on_sample(short l, short r) { - if(resampler) { + if(resampler_w) { if(!have_dumped_frame) return; sbuffer[sbuffer_fill++] = l; sbuffer[sbuffer_fill++] = r; - forward_samples(false); + if(sbuffer_fill == sbuffer.size()) { + resampler_w->sendblock(&sbuffer[0], sbuffer_fill / chans); + sbuffer_fill = 0; + } soxdumper->sample(l, r); return; } @@ -301,7 +369,8 @@ namespace void on_dump_end() { if(worker) { - forward_samples(true); + if(resampler_w) + resampler_w->sendend(); worker->request_quit(); } if(soxdumper) @@ -317,52 +386,19 @@ namespace return true; } avi_worker* worker; + resample_worker* resampler_w; private: - void forward_samples(bool eos) - { - if(!eos && sbuffer_fill < sbuffer.size()) - return; -#ifdef WITH_SECRET_RABBIT_CODE - double ratio = 1.0 * audio_record_rate * soundrate.second / soundrate.first; - SRC_DATA block; - src_short_to_float_array(&sbuffer[0], &sbuffer2[0], sbuffer_fill); - block.data_in = &sbuffer2[0]; - block.data_out = &fbuffer2[0]; - block.input_frames = sbuffer_fill / chans; - block.input_frames_used = 0; - block.output_frames = fbuffer2.size() / chans; - block.output_frames_gen = 0; - block.end_of_input = eos ? 1 : 0; - block.src_ratio = ratio; - int errc = src_process((SRC_STATE*)resampler, &block); - if(errc) - throw std::runtime_error(std::string("Error using libsamplerate: ") + - src_strerror(errc)); - src_float_to_short_array(&fbuffer2[0], &fbuffer[0], block.output_frames_gen * chans); - worker->queue_audio(&fbuffer[0], block.output_frames_gen * chans); - if(block.input_frames_used * chans < sbuffer_fill) - memmove(&sbuffer[0], &sbuffer[block.output_frames_gen * chans], sbuffer_fill - - block.input_frames_used * chans); - sbuffer_fill -= block.input_frames_used * chans; -#endif - } sox_dumper* soxdumper; screen dscr; unsigned dcounter; bool have_dumped_frame; std::pair soundrate; uint32_t audio_record_rate; - void* resampler; std::vector sbuffer; - std::vector sbuffer2; - std::vector fbuffer2; - std::vector fbuffer; size_t sbuffer_fill; uint32_t chans; }; - avi_avsnoop* vid_dumper; - void waitfn() { vid_dumper->worker->wait_busy(); @@ -454,4 +490,45 @@ namespace adv_avi_dumper::~adv_avi_dumper() throw() { } + + void resample_worker::entry() + { + while(1) { + wait_workflag(); + uint32_t work = clear_workflag(~WORKFLAG_QUIT_REQUEST); + if(work & (WORKFLAG_QUEUE_FRAME | WORKFLAG_END)) { +#ifdef WITH_SECRET_RABBIT_CODE +again: + SRC_DATA block; + src_short_to_float_array(&buffers[0], &buffers2[0], bufused * nch); + block.data_in = &buffers2[0]; + block.data_out = &buffers3[0]; + block.input_frames = bufused; + block.input_frames_used = 0; + block.output_frames = buffers3.size() / nch; + block.output_frames_gen = 0; + block.end_of_input = (work & WORKFLAG_END) ? 1 : 0; + block.src_ratio = ratio; + int errc = src_process((SRC_STATE*)resampler, &block); + if(errc) + throw std::runtime_error(std::string("Error using libsamplerate: ") + + src_strerror(errc)); + src_float_to_short_array(&buffers3[0], &buffers4[0], block.output_frames_gen * nch); + vid_dumper->worker->queue_audio(&buffers4[0], block.output_frames_gen * nch); + if(block.input_frames_used < bufused) + memmove(&buffers[0], &buffers[block.output_frames_gen * nch], (bufused - + block.input_frames_used) * nch); + bufused -= block.input_frames_used; + if(block.output_frames_gen > 0 && work & WORKFLAG_END) + goto again; //Try again to get all the samples. +#endif + clear_workflag(WORKFLAG_END | WORKFLAG_FLUSH | WORKFLAG_QUEUE_FRAME); + clear_busy(); + if(work & WORKFLAG_END) + return; + } + if(work == WORKFLAG_QUIT_REQUEST) + break; + } + } } From bfe5e592224ec082fc9d56d79031e89c2b9adc8d Mon Sep 17 00:00:00 2001 From: Ilari Liusvaara Date: Wed, 25 Apr 2012 14:07:47 +0300 Subject: [PATCH 6/7] AVI ZMBV: Pad the buffers instead of doing OOB access Padding the buffers is faster than OOB access and won't eat that much memory. --- src/video/avi/codec/video/zmbv.cpp | 111 +++++++++++------------------ 1 file changed, 42 insertions(+), 69 deletions(-) diff --git a/src/video/avi/codec/video/zmbv.cpp b/src/video/avi/codec/video/zmbv.cpp index db136809..a39dcf77 100644 --- a/src/video/avi/codec/video/zmbv.cpp +++ b/src/video/avi/codec/video/zmbv.cpp @@ -6,6 +6,9 @@ #include #include +//The largest possible vector. +#define MAXIMUM_VECTOR 64 + namespace { numeric_setting clvl("avi-zmbv-compression", 0, 9, 7); @@ -73,70 +76,30 @@ namespace size_t outbuf_used; //Zlib state. z_stream zstream; - //Compute penalty for motion vector (dx, dy) on block with upper-left corner at (bx, by). - uint32_t mv_penalty(int32_t bx, int32_t by, int dx, int dy); + uint32_t mv_penalty(uint32_t bx, uint32_t by, int dx, int dy); //Do motion detection for block with upper-left corner at (bx, by). M is filled with the resulting //motion vector and t is initial guess for the motion vector. - void mv_detect(int32_t bx, int32_t by, motion& m, motion t); + void mv_detect(uint32_t bx, uint32_t by, motion& m, motion t); //Serialize movement vectors and furrent frame data to output buffer. If keyframe is true, keyframe is //written, otherwise non-keyframe. void serialize_frame(bool keyframe); }; - //Intersect the range [x, x+b) with [0, w). start is where the range starts, size is size of range, - //and offset is number of numbers clipped from low bound. - void rbound(int32_t x, int32_t w, uint32_t b, int32_t& start, int32_t& offset, int32_t& size) - { - start = x; - offset = 0; - size = b; - if(start < 0) { - offset = -start; - start = 0; - size = b - offset; - } - if(start + size > w) - size = w - start; - if(size < 0) - size = 0; - start = x + offset; - } - //Compute XOR of blocks. - void xor_blocks(uint32_t* target, uint32_t* src1, int32_t src1x, int32_t src1y, - int32_t src1w, int32_t src1h, uint32_t* src2, int32_t src2x, int32_t src2y, - int32_t src2w, int32_t src2h, uint32_t bw, uint32_t bh) + void xor_blocks(uint32_t* target, uint32_t* src1, uint32_t src1x, uint32_t src1y, + uint32_t src1w, uint32_t src1h, uint32_t* src2, uint32_t src2x, uint32_t src2y, + uint32_t src2w, uint32_t src2h, uint32_t bw, uint32_t bh) { - int32_t h_s1start; - int32_t h_s1off; - int32_t h_s1size; - int32_t h_s2start; - int32_t h_s2off; - int32_t h_s2size; - int32_t v_s1start; - int32_t v_s1off; - int32_t v_s1size; - int32_t v_s2start; - int32_t v_s2off; - int32_t v_s2size; - - rbound(src1x, src1w, bw, h_s1start, h_s1off, h_s1size); - rbound(src2x, src2w, bw, h_s2start, h_s2off, h_s2size); - rbound(src1y, src1h, bh, v_s1start, v_s1off, v_s1size); - rbound(src2y, src2h, bh, v_s2start, v_s2off, v_s2size); - - if(h_s1size < bw || v_s1size < bh) - memset(target, 0, 4 * bw * bh); - uint32_t* t1ptr = target + v_s1off * bh + h_s1off; - uint32_t* t2ptr = target + v_s2off * bh + h_s2off; - uint32_t* s1ptr = src1 + v_s1start * src1w + h_s1start; - uint32_t* s2ptr = src2 + v_s2start * src2w + h_s2start; - for(int32_t y = 0; y < v_s1size; y++) - memcpy(t1ptr + bw * y, s1ptr + src1w * y, 4 * h_s1size); - for(int32_t y = 0; y < v_s2size; y++) - for(int32_t x = 0; x < h_s2size; x++) - t2ptr[y * bw + x] ^= s2ptr[y * src2w + x]; + uint32_t* s1ptr = src1 + src1y * src1w + src1x; + uint32_t* s2ptr = src2 + src2y * src2w + src2x; + for(uint32_t y = 0; y < bh; y++) { + for(uint32_t x = 0; x < bw; x++) + target[x] = s1ptr[x] ^ s2ptr[x]; + target += bw; + s1ptr += src1w; + s2ptr += src2w; + } } //Estimate entropy. @@ -152,21 +115,26 @@ namespace return e; } - uint32_t avi_codec_zmbv::mv_penalty(int32_t bx, int32_t by, int dx, int dy) + uint32_t avi_codec_zmbv::mv_penalty(uint32_t bx, uint32_t by, int dx, int dy) { //Penalty is entropy estimate of resulting block. - xor_blocks(scratch, current_frame, bx, by, ewidth, eheight, prev_frame, bx + dx, by + dy, ewidth, - eheight, bw, bh); + xor_blocks(scratch, current_frame, bx, by, ewidth + 2 * MAXIMUM_VECTOR, eheight, prev_frame, bx + dx, + by + dy, ewidth + 2 * MAXIMUM_VECTOR, eheight, bw, bh); return entropy(scratch, bw, bh); } void avi_codec_zmbv::serialize_frame(bool keyframe) { uint32_t nhb, nvb, nb; + //In_stride/in_offset is in units of words, out_stride is in units of bytes. + size_t in_stride = (ewidth + 2 * MAXIMUM_VECTOR); + size_t in_offset = MAXIMUM_VECTOR * (in_stride + 1); size_t osize = 0; if(keyframe) { //Just copy the frame data and compress that. - memcpy(oscratch, current_frame, 4 * ewidth * eheight); + for(size_t y = 0; y < eheight; y++) + memcpy(oscratch + 4 * ewidth * y, current_frame + in_stride * y + in_offset, + 4 * ewidth); osize = 4 * ewidth * eheight; goto compress; } @@ -187,10 +155,11 @@ namespace for(size_t i = 0; i < nb; i++) { if(mv[i].p == 0) continue; - int32_t bx = (i % nhb) * bw; - int32_t by = (i / nhb) * bh; - xor_blocks(reinterpret_cast(oscratch + osize), current_frame, bx, by, ewidth, - eheight, prev_frame, bx + mv[i].dx, by + mv[i].dy, ewidth, eheight, bw, bh); + uint32_t bx = (i % nhb) * bw + MAXIMUM_VECTOR; + uint32_t by = (i / nhb) * bh + MAXIMUM_VECTOR; + xor_blocks(reinterpret_cast(oscratch + osize), current_frame, bx, by, ewidth + 2 * + MAXIMUM_VECTOR, eheight, prev_frame, bx + mv[i].dx, by + mv[i].dy, ewidth + 2 * + MAXIMUM_VECTOR, eheight, bw, bh); osize += 4 * bw * bh; } compress: @@ -227,7 +196,7 @@ compress: return (best.p == 0); } - void avi_codec_zmbv::mv_detect(int32_t bx, int32_t by, motion& m, motion t) + void avi_codec_zmbv::mv_detect(uint32_t bx, uint32_t by, motion& m, motion t) { //Try the suggested vector. motion c; @@ -290,10 +259,10 @@ compress: ready_flag = true; avi_video_codec::format fmt(ewidth, eheight, 0x56424D5A, 24); - pixbuf.resize(2 * ewidth * eheight + bw * bh); + pixbuf.resize(2 * (ewidth + 2 * MAXIMUM_VECTOR) * (eheight + 2 * MAXIMUM_VECTOR) + bw * bh); current_frame = &pixbuf[0]; - prev_frame = &pixbuf[ewidth * eheight]; - scratch = &pixbuf[2 * ewidth * eheight]; + prev_frame = &pixbuf[(ewidth + 2 * MAXIMUM_VECTOR) * (eheight + 2 * MAXIMUM_VECTOR)]; + scratch = &pixbuf[2 * (ewidth + 2 * MAXIMUM_VECTOR) * (eheight + 2 * MAXIMUM_VECTOR)]; mv.resize(((ewidth + bw - 1) / bw) * ((eheight + bh - 1) / bh)); size_t maxdiff = 4 * ((mv.size() + 1) / 2) + 4 * ewidth * eheight; outbuf_size = deflateBound(&zstream, maxdiff) + 128; @@ -316,9 +285,12 @@ compress: //If bigendian, swap. short magic = 258; + size_t frameoffset = MAXIMUM_VECTOR * (ewidth + 2 * MAXIMUM_VECTOR + 1); + size_t framestride = ewidth + 2 * MAXIMUM_VECTOR; if(reinterpret_cast(&magic)[0] == 1) for(size_t y = 0; y < iheight; y++) { - uint8_t* _current = reinterpret_cast(current_frame + ewidth * y); + uint8_t* _current = reinterpret_cast(current_frame + frameoffset + + framestride * y); uint8_t* _data = reinterpret_cast(&data[iwidth * y]); for(size_t i = 0; i < iwidth; i++) { _current[4 * i + 0] = _data[4 * i + 3]; @@ -329,7 +301,8 @@ compress: } else for(size_t y = 0; y < iheight; y++) { - uint8_t* _current = reinterpret_cast(current_frame + ewidth * y); + uint8_t* _current = reinterpret_cast(current_frame + frameoffset + + framestride * y); uint8_t* _data = reinterpret_cast(&data[iwidth * y]); for(size_t i = 0; i < iwidth; i++) { _current[4 * i + 2] = _data[4 * i + 0]; @@ -347,7 +320,7 @@ compress: t.dy = 0; t.p = 0; for(size_t i = 0; i < mv.size(); i++) { - mv_detect((i % nhb) * bw, (i / nhb) * bh, mv[i], t); + mv_detect((i % nhb) * bw + MAXIMUM_VECTOR, (i / nhb) * bh + MAXIMUM_VECTOR, mv[i], t); t = mv[i]; } } From eba73d320884f8e64fea485c36fa3b1bedb917cc Mon Sep 17 00:00:00 2001 From: Ilari Liusvaara Date: Wed, 25 Apr 2012 17:15:07 +0300 Subject: [PATCH 7/7] =?UTF-8?q?lsnes=20rr1-=CE=947=CE=B51?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- VERSION | 2 +- manual.lyx | 16 ++++++++++++++++ manual.txt | 8 ++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 0e194a09..980a9604 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1-Δ7 \ No newline at end of file +1-Δ7ε1 \ No newline at end of file diff --git a/manual.lyx b/manual.lyx index 7a8e1500..f64aea45 100644 --- a/manual.lyx +++ b/manual.lyx @@ -6176,5 +6176,21 @@ Wxwidgets: Redesign hotkeys dialog to avoid tree control (tree control doesn't Start paused option. \end_layout +\begin_layout Subsection +rr1-delta7epsilon1 +\end_layout + +\begin_layout Itemize +AVI: ZMBV support +\end_layout + +\begin_layout Itemize +lsnes-dumpavi: Start Lua before starting dumper +\end_layout + +\begin_layout Itemize +AVI: Fix secondary audio in mode 4. +\end_layout + \end_body \end_document diff --git a/manual.txt b/manual.txt index 45ad32eb..8a5a8441 100644 --- a/manual.txt +++ b/manual.txt @@ -3048,3 +3048,11 @@ set-axis joystick0axis19 disabled • Start paused option. +15.58 rr1-delta7epsilon1 + +• AVI: ZMBV support + +• lsnes-dumpavi: Start Lua before starting dumper + +• AVI: Fix secondary audio in mode 4. +