Merge branch 'rr1-maint'

This commit is contained in:
Ilari Liusvaara 2012-04-25 17:20:25 +03:00
commit 1e5258c8d4
10 changed files with 377 additions and 222 deletions

View file

@ -1 +1 @@
1-Δ7 1-Δ7ε1

View file

@ -85,6 +85,14 @@ public:
* Note: Don't call from outside workthread code. * Note: Don't call from outside workthread code.
*/ */
int operator()(int dummy); int operator()(int dummy);
/**
* Get wait counters.
*
* Retrns: Two-element tuple.
* - The first element is the amount of microseconds wait_busy() has waited.
* - The second element is the amount of microseconds wait_workflag() has waited.
*/
std::pair<uint64_t, uint64_t> get_wait_count();
protected: protected:
/** /**
* Thread entrypoint. * Thread entrypoint.
@ -106,6 +114,8 @@ private:
volatile bool busy; volatile bool busy;
volatile bool exception_caught; volatile bool exception_caught;
volatile bool exception_oom; volatile bool exception_oom;
volatile uint64_t waitamt_busy;
volatile uint64_t waitamt_work;
std::string exception_text; std::string exception_text;
}; };

View file

@ -94,6 +94,13 @@ struct avi_video_codec
* Returns: The packet. * Returns: The packet.
*/ */
virtual avi_packet getpacket() = 0; virtual avi_packet getpacket() = 0;
/**
* Send performance counters.
*
* Parameter b: Amount of busywaiting by emulator.
* Parameter w: Amount of workwaiting by dumper.
*/
virtual void send_performance_counters(uint64_t b, uint64_t w);
}; };
/** /**

View file

@ -6176,5 +6176,21 @@ Wxwidgets: Redesign hotkeys dialog to avoid tree control (tree control doesn't
Start paused option. Start paused option.
\end_layout \end_layout
\begin_layout Subsection
rr1-delta7epsilon1
\end_layout
\begin_layout Itemize
AVI: ZMBV support
\end_layout
\begin_layout Itemize
lsnes-dumpavi: Start Lua before starting dumper
\end_layout
\begin_layout Itemize
AVI: Fix secondary audio in mode 4.
\end_layout
\end_body \end_body
\end_document \end_document

View file

@ -3048,3 +3048,11 @@ set-axis joystick0axis19 disabled
• Start paused option. • Start paused option.
15.58 rr1-delta7epsilon1
• AVI: ZMBV support
• lsnes-dumpavi: Start Lua before starting dumper
• AVI: Fix secondary audio in mode 4.

View file

@ -1,5 +1,16 @@
#include "library/workthread.hpp" #include "library/workthread.hpp"
#include <stdexcept> #include <stdexcept>
#include <sys/time.h>
namespace
{
uint64_t ticks()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
}
}
struct worker_thread_reflector struct worker_thread_reflector
{ {
@ -15,6 +26,8 @@ worker_thread::worker_thread()
reflector = NULL; reflector = NULL;
workflag = 0; workflag = 0;
busy = false; busy = false;
waitamt_busy = 0;
waitamt_work = 0;
exception_caught = false; exception_caught = false;
exception_oom = false; exception_oom = false;
joined = false; joined = false;
@ -58,8 +71,12 @@ void worker_thread::clear_busy()
void worker_thread::wait_busy() void worker_thread::wait_busy()
{ {
umutex_class h(mutex); umutex_class h(mutex);
while(busy) if(busy) {
condition.wait(h); uint64_t tmp = ticks();
while(busy)
condition.wait(h);
waitamt_busy += (ticks() - tmp);
}
} }
void worker_thread::rethrow() void worker_thread::rethrow()
@ -90,11 +107,21 @@ uint32_t worker_thread::clear_workflag(uint32_t flag)
uint32_t worker_thread::wait_workflag() uint32_t worker_thread::wait_workflag()
{ {
umutex_class h(mutex); umutex_class h(mutex);
while(!workflag) if(!workflag) {
condition.wait(h); uint64_t tmp = ticks();
while(!workflag)
condition.wait(h);
waitamt_work += (ticks() - tmp);
}
return workflag; return workflag;
} }
std::pair<uint64_t, uint64_t> worker_thread::get_wait_count()
{
umutex_class h(mutex);
return std::make_pair(waitamt_busy, waitamt_work);
}
int worker_thread::operator()(int dummy) int worker_thread::operator()(int dummy)
{ {
try { try {

View file

@ -292,8 +292,8 @@ int main(int argc, char** argv)
if(!our_rom->region) if(!our_rom->region)
throw std::runtime_error("Core does not support game type '" + movie.gametype + "'"); throw std::runtime_error("Core does not support game type '" + movie.gametype + "'");
our_rom->load(); our_rom->load();
dumper_startup(dumper, mode, prefix, length);
startup_lua_scripts(cmdline); startup_lua_scripts(cmdline);
dumper_startup(dumper, mode, prefix, length);
main_loop(r, movie, true); main_loop(r, movie, true);
} catch(std::bad_alloc& e) { } catch(std::bad_alloc& e) {
OOM_panic(); OOM_panic();

View file

@ -25,6 +25,9 @@
namespace namespace
{ {
class avi_avsnoop;
avi_avsnoop* vid_dumper;
uint32_t rates[] = {8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000, 64000, 88200, 96000, uint32_t rates[] = {8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000, 64000, 88200, 96000,
128000, 176400, 192000}; 128000, 176400, 192000};
@ -111,6 +114,24 @@ namespace
uint32_t max_frames; uint32_t max_frames;
}; };
struct resample_worker : public worker_thread
{
resample_worker(double _ratio, uint32_t _nch);
~resample_worker();
void entry();
void sendblock(short* block, size_t frames);
void sendend();
private:
std::vector<short> buffers;
std::vector<float> buffers2;
std::vector<float> buffers3;
std::vector<short> buffers4;
size_t bufused;
double ratio;
uint32_t nch;
void* resampler;
};
struct avi_worker : public worker_thread struct avi_worker : public worker_thread
{ {
avi_worker(const struct avi_info& info); avi_worker(const struct avi_info& info);
@ -127,16 +148,17 @@ namespace
uint32_t segframes; uint32_t segframes;
uint32_t max_segframes; uint32_t max_segframes;
bool closed; bool closed;
avi_video_codec* ivcodec;
}; };
#define WORKFLAG_QUEUE_FRAME 1 #define WORKFLAG_QUEUE_FRAME 1
#define WORKFLAG_FLUSH 2 #define WORKFLAG_FLUSH 2
#define WORKFLAG_END 4 #define WORKFLAG_END 4
avi_worker::avi_worker(const struct avi_info& info) avi_worker::avi_worker(const struct avi_info& info)
: aviout(info.prefix, *info.vcodec, *info.acodec, info.sample_rate, info.audio_chans) : aviout(info.prefix, *info.vcodec, *info.acodec, info.sample_rate, info.audio_chans)
{ {
ivcodec = info.vcodec;
segframes = 0; segframes = 0;
max_segframes = info.max_frames; max_segframes = info.max_frames;
fire(); fire();
@ -183,6 +205,8 @@ namespace
f.force_break = (segframes == max_segframes && max_segframes > 0); f.force_break = (segframes == max_segframes && max_segframes > 0);
if(f.force_break) if(f.force_break)
segframes = 0; segframes = 0;
auto wc = get_wait_count();
ivcodec->send_performance_counters(wc.first, wc.second);
memcpy(&f.data[0], frame, 4 * frame_width * frame_height); memcpy(&f.data[0], frame, 4 * frame_width * frame_height);
frame = NULL; frame = NULL;
clear_workflag(WORKFLAG_QUEUE_FRAME); clear_workflag(WORKFLAG_QUEUE_FRAME);
@ -208,6 +232,64 @@ namespace
} }
} }
resample_worker::resample_worker(double _ratio, uint32_t _nch)
{
ratio = _ratio;
nch = _nch;
buffers.resize(RESAMPLE_BUFFER * nch);
buffers2.resize(RESAMPLE_BUFFER * nch);
buffers3.resize((RESAMPLE_BUFFER * nch * ratio) + 128 * nch);
buffers4.resize((RESAMPLE_BUFFER * nch * ratio) + 128 * nch);
bufused = 0;
#ifdef WITH_SECRET_RABBIT_CODE
int errc = 0;
resampler = src_new(SRC_SINC_BEST_QUALITY, nch, &errc);
if(errc)
throw std::runtime_error(std::string("Error initing libsamplerate: ") +
src_strerror(errc));
#else
throw std::runtime_error("HQ sample rate conversion not available");
#endif
fire();
}
resample_worker::~resample_worker()
{
#ifdef WITH_SECRET_RABBIT_CODE
src_delete((SRC_STATE*)resampler);
#endif
}
void resample_worker::sendend()
{
rethrow();
set_workflag(WORKFLAG_END);
request_quit();
}
void resample_worker::sendblock(short* block, size_t frames)
{
again:
rethrow();
wait_busy();
if(bufused + frames < RESAMPLE_BUFFER) {
memcpy(&buffers[bufused * nch], block, 2 * nch * frames);
bufused += frames;
block += (frames * nch);
frames = 0;
} else if(bufused < RESAMPLE_BUFFER) {
size_t processable = RESAMPLE_BUFFER - bufused;
memcpy(&buffers[bufused * nch], block, 2 * nch * processable);
block += (processable * nch);
frames -= processable;
bufused = RESAMPLE_BUFFER;
}
set_busy();
set_workflag(WORKFLAG_QUEUE_FRAME);
if(frames > 0)
goto again;
}
void waitfn(); void waitfn();
class avi_avsnoop : public information_dispatch class avi_avsnoop : public information_dispatch
@ -226,32 +308,19 @@ namespace
soundrate.second, 2); soundrate.second, 2);
dcounter = 0; dcounter = 0;
have_dumped_frame = false; have_dumped_frame = false;
resampler = NULL; resampler_w = NULL;
if(soundrate_setting == 4) { if(soundrate_setting == 4) {
double ratio = 1.0 * audio_record_rate * soundrate.second / soundrate.first; double ratio = 1.0 * audio_record_rate * soundrate.second / soundrate.first;
sbuffer.resize(RESAMPLE_BUFFER * chans);
sbuffer2.resize(RESAMPLE_BUFFER * chans);
fbuffer.resize((RESAMPLE_BUFFER * ratio + 128) * chans + 128);
fbuffer2.resize((RESAMPLE_BUFFER * ratio + 128) * chans + 128);
sbuffer_fill = 0; sbuffer_fill = 0;
#ifdef WITH_SECRET_RABBIT_CODE sbuffer.resize(RESAMPLE_BUFFER * chans);
int errc = 0; resampler_w = new resample_worker(ratio, chans);
resampler = src_new(SRC_SINC_BEST_QUALITY, info.audio_chans, &errc);
if(errc)
throw std::runtime_error(std::string("Error initing libsamplerate: ") +
src_strerror(errc));
#else
throw std::runtime_error("HQ sample rate conversion not available");
#endif
} }
} }
~avi_avsnoop() throw() ~avi_avsnoop() throw()
{ {
#ifdef WITH_SECRET_RABBIT_CODE if(resampler_w)
if(resampler) delete resampler_w;
src_delete((SRC_STATE*)resampler);
#endif
delete worker; delete worker;
delete soxdumper; delete soxdumper;
} }
@ -271,12 +340,16 @@ namespace
void on_sample(short l, short r) void on_sample(short l, short r)
{ {
if(resampler) { if(resampler_w) {
if(!have_dumped_frame) if(!have_dumped_frame)
return; return;
sbuffer[sbuffer_fill++] = l; sbuffer[sbuffer_fill++] = l;
sbuffer[sbuffer_fill++] = r; sbuffer[sbuffer_fill++] = r;
forward_samples(false); if(sbuffer_fill == sbuffer.size()) {
resampler_w->sendblock(&sbuffer[0], sbuffer_fill / chans);
sbuffer_fill = 0;
}
soxdumper->sample(l, r);
return; return;
} }
short x[2]; short x[2];
@ -296,7 +369,8 @@ namespace
void on_dump_end() void on_dump_end()
{ {
if(worker) { if(worker) {
forward_samples(true); if(resampler_w)
resampler_w->sendend();
worker->request_quit(); worker->request_quit();
} }
if(soxdumper) if(soxdumper)
@ -312,52 +386,19 @@ namespace
return true; return true;
} }
avi_worker* worker; avi_worker* worker;
resample_worker* resampler_w;
private: private:
void forward_samples(bool eos)
{
if(!eos && sbuffer_fill < sbuffer.size())
return;
#ifdef WITH_SECRET_RABBIT_CODE
double ratio = 1.0 * audio_record_rate * soundrate.second / soundrate.first;
SRC_DATA block;
src_short_to_float_array(&sbuffer[0], &sbuffer2[0], sbuffer_fill);
block.data_in = &sbuffer2[0];
block.data_out = &fbuffer2[0];
block.input_frames = sbuffer_fill / chans;
block.input_frames_used = 0;
block.output_frames = fbuffer2.size() / chans;
block.output_frames_gen = 0;
block.end_of_input = eos ? 1 : 0;
block.src_ratio = ratio;
int errc = src_process((SRC_STATE*)resampler, &block);
if(errc)
throw std::runtime_error(std::string("Error using libsamplerate: ") +
src_strerror(errc));
src_float_to_short_array(&fbuffer2[0], &fbuffer[0], block.output_frames_gen * chans);
worker->queue_audio(&fbuffer[0], block.output_frames_gen * chans);
if(block.input_frames_used * chans < sbuffer_fill)
memmove(&sbuffer[0], &sbuffer[block.output_frames_gen * chans], sbuffer_fill -
block.input_frames_used * chans);
sbuffer_fill -= block.input_frames_used * chans;
#endif
}
sox_dumper* soxdumper; sox_dumper* soxdumper;
screen<false> dscr; screen<false> dscr;
unsigned dcounter; unsigned dcounter;
bool have_dumped_frame; bool have_dumped_frame;
std::pair<uint32_t, uint32_t> soundrate; std::pair<uint32_t, uint32_t> soundrate;
uint32_t audio_record_rate; uint32_t audio_record_rate;
void* resampler;
std::vector<short> sbuffer; std::vector<short> sbuffer;
std::vector<float> sbuffer2;
std::vector<float> fbuffer2;
std::vector<short> fbuffer;
size_t sbuffer_fill; size_t sbuffer_fill;
uint32_t chans; uint32_t chans;
}; };
avi_avsnoop* vid_dumper;
void waitfn() void waitfn()
{ {
vid_dumper->worker->wait_busy(); vid_dumper->worker->wait_busy();
@ -449,4 +490,45 @@ namespace
adv_avi_dumper::~adv_avi_dumper() throw() adv_avi_dumper::~adv_avi_dumper() throw()
{ {
} }
void resample_worker::entry()
{
while(1) {
wait_workflag();
uint32_t work = clear_workflag(~WORKFLAG_QUIT_REQUEST);
if(work & (WORKFLAG_QUEUE_FRAME | WORKFLAG_END)) {
#ifdef WITH_SECRET_RABBIT_CODE
again:
SRC_DATA block;
src_short_to_float_array(&buffers[0], &buffers2[0], bufused * nch);
block.data_in = &buffers2[0];
block.data_out = &buffers3[0];
block.input_frames = bufused;
block.input_frames_used = 0;
block.output_frames = buffers3.size() / nch;
block.output_frames_gen = 0;
block.end_of_input = (work & WORKFLAG_END) ? 1 : 0;
block.src_ratio = ratio;
int errc = src_process((SRC_STATE*)resampler, &block);
if(errc)
throw std::runtime_error(std::string("Error using libsamplerate: ") +
src_strerror(errc));
src_float_to_short_array(&buffers3[0], &buffers4[0], block.output_frames_gen * nch);
vid_dumper->worker->queue_audio(&buffers4[0], block.output_frames_gen * nch);
if(block.input_frames_used < bufused)
memmove(&buffers[0], &buffers[block.output_frames_gen * nch], (bufused -
block.input_frames_used) * nch);
bufused -= block.input_frames_used;
if(block.output_frames_gen > 0 && work & WORKFLAG_END)
goto again; //Try again to get all the samples.
#endif
clear_workflag(WORKFLAG_END | WORKFLAG_FLUSH | WORKFLAG_QUEUE_FRAME);
clear_busy();
if(work & WORKFLAG_END)
return;
}
if(work == WORKFLAG_QUIT_REQUEST)
break;
}
}
} }

View file

@ -22,6 +22,10 @@ avi_video_codec::format::format(uint32_t _width, uint32_t _height, uint32_t _com
clr_important = 0; clr_important = 0;
} }
void avi_video_codec::send_performance_counters(uint64_t b, uint64_t w)
{
}
avi_audio_codec::format::format(uint16_t tag) avi_audio_codec::format::format(uint16_t tag)
{ {
max_bytes_per_sec = 200000; max_bytes_per_sec = 200000;

View file

@ -6,6 +6,9 @@
#include <cerrno> #include <cerrno>
#include <stdexcept> #include <stdexcept>
//The largest possible vector.
#define MAXIMUM_VECTOR 64
namespace namespace
{ {
numeric_setting clvl("avi-zmbv-compression", 0, 9, 7); numeric_setting clvl("avi-zmbv-compression", 0, 9, 7);
@ -13,13 +16,18 @@ namespace
numeric_setting bwv("avi-zmbv-blockw", 8, 64, 16); numeric_setting bwv("avi-zmbv-blockw", 8, 64, 16);
numeric_setting bhv("avi-zmbv-blockh", 8, 64, 16); numeric_setting bhv("avi-zmbv-blockh", 8, 64, 16);
//Motion vector.
struct motion struct motion
{ {
//X motion (positive is to left), -64...63.
int dx; int dx;
//Y motion (positive it to up), -64...63.
int dy; int dy;
//How bad the vector is. 0 means the vector is perfect (no residual).
uint32_t p; uint32_t p;
}; };
//The main ZMBV decoder state.
struct avi_codec_zmbv : public avi_video_codec struct avi_codec_zmbv : public avi_video_codec
{ {
avi_codec_zmbv(uint32_t _level, uint32_t maxpframes, uint32_t _bw, uint32_t _bh); avi_codec_zmbv(uint32_t _level, uint32_t maxpframes, uint32_t _bw, uint32_t _bh);
@ -29,181 +37,158 @@ namespace
bool ready(); bool ready();
avi_packet getpacket(); avi_packet getpacket();
private: private:
//The current pending packet, if any.
avi_packet out; avi_packet out;
//False if there is a pending packet, true if ready to take a frame.
bool ready_flag; bool ready_flag;
//The size of supplied frames.
unsigned iwidth; unsigned iwidth;
unsigned iheight; unsigned iheight;
//The size of written frames.
unsigned ewidth; unsigned ewidth;
unsigned eheight; unsigned eheight;
//P-frames written since last I-frame.
unsigned pframes; unsigned pframes;
//Maximum number of P-frames to write in sequence.
unsigned max_pframes; unsigned max_pframes;
//Compression level to use.
unsigned level; unsigned level;
//Size of one block.
//Size of block.
uint32_t bw; uint32_t bw;
uint32_t bh; uint32_t bh;
//Entropy estimator table. //Motion vector buffer, one motion vector for each block, in left-to-right, top-to-bottom order.
std::vector<uint32_t> entropy_tab;
//Temporary scratch memory (one block).
std::vector<uint32_t> tmp;
//Motion vector buffer.
std::vector<motion> mv; std::vector<motion> mv;
//Previous&Current frame. //Pixel buffer (2 full frames and one block).
std::vector<uint32_t> current; std::vector<uint32_t> pixbuf;
std::vector<uint32_t> prev; //Current frame pointer.
//Compression packet buffer and size. uint32_t* current_frame;
std::vector<char> diff; //Previous frame pointer.
size_t diffsize; uint32_t* prev_frame;
//Scratch block pointer.
uint32_t* scratch;
//Output buffer. Sufficient space to hold both compressed and uncompressed data.
std::vector<char> outbuffer;
//Output scratch memory.
char* oscratch;
//The actual output buffer. Pointer, size and ued.
char* outbuf;
size_t outbuf_size;
size_t outbuf_used;
//Zlib state.
z_stream zstream; z_stream zstream;
//Output packet buffer and size. //Compute penalty for motion vector (dx, dy) on block with upper-left corner at (bx, by).
std::vector<char> output; uint32_t mv_penalty(uint32_t bx, uint32_t by, int dx, int dy);
size_t output_size; //Do motion detection for block with upper-left corner at (bx, by). M is filled with the resulting
//motion vector and t is initial guess for the motion vector.
//Motion vector penalty. void mv_detect(uint32_t bx, uint32_t by, motion& m, motion t);
uint32_t mv_penalty(uint32_t* data, int32_t bx, int32_t by, int dx, int dy); //Serialize movement vectors and furrent frame data to output buffer. If keyframe is true, keyframe is
//Do motion detection. //written, otherwise non-keyframe.
void mv_detect(uint32_t* data, int32_t bx, int32_t by, motion& m, motion t); void serialize_frame(bool keyframe);
//Serialize to difference buffer.
void serialize_frame(bool keyframe, uint32_t* data);
//Take compression packet buffer and write output packet buffer.
void compress_packet(bool keyframe);
}; };
void rbound(int32_t x, int32_t w, uint32_t b, int32_t& start, int32_t& offset, int32_t& size) //Compute XOR of blocks.
void xor_blocks(uint32_t* target, uint32_t* src1, uint32_t src1x, uint32_t src1y,
uint32_t src1w, uint32_t src1h, uint32_t* src2, uint32_t src2x, uint32_t src2y,
uint32_t src2w, uint32_t src2h, uint32_t bw, uint32_t bh)
{ {
start = x; uint32_t* s1ptr = src1 + src1y * src1w + src1x;
offset = 0; uint32_t* s2ptr = src2 + src2y * src2w + src2x;
size = b; for(uint32_t y = 0; y < bh; y++) {
if(start < 0) { for(uint32_t x = 0; x < bw; x++)
offset = -start; target[x] = s1ptr[x] ^ s2ptr[x];
start = 0; target += bw;
size = b - offset; s1ptr += src1w;
s2ptr += src2w;
} }
if(start + size > w)
size = w - start;
if(size < 0)
size = 0;
start = x + offset;
} }
void xor_blocks(uint32_t* target, uint32_t* src1, int32_t src1x, int32_t src1y, //Estimate entropy.
int32_t src1w, int32_t src1h, uint32_t* src2, int32_t src2x, int32_t src2y, uint32_t entropy(uint32_t* data, uint32_t bw, uint32_t bh)
int32_t src2w, int32_t src2h, uint32_t bw, uint32_t bh)
{
int32_t h_s1start;
int32_t h_s1off;
int32_t h_s1size;
int32_t h_s2start;
int32_t h_s2off;
int32_t h_s2size;
int32_t v_s1start;
int32_t v_s1off;
int32_t v_s1size;
int32_t v_s2start;
int32_t v_s2off;
int32_t v_s2size;
rbound(src1x, src1w, bw, h_s1start, h_s1off, h_s1size);
rbound(src2x, src2w, bw, h_s2start, h_s2off, h_s2size);
rbound(src1y, src1h, bh, v_s1start, v_s1off, v_s1size);
rbound(src2y, src2h, bh, v_s2start, v_s2off, v_s2size);
if(h_s1size < bw || v_s1size < bh)
memset(target, 0, 4 * bw * bh);
uint32_t* t1ptr = target + v_s1off * bh + h_s1off;
uint32_t* t2ptr = target + v_s2off * bh + h_s2off;
uint32_t* s1ptr = src1 + v_s1start * src1w + h_s1start;
uint32_t* s2ptr = src2 + v_s2start * src2w + h_s2start;
for(int32_t y = 0; y < v_s1size; y++)
memcpy(t1ptr + bw * y, s1ptr + src1w * y, 4 * h_s1size);
for(int32_t y = 0; y < v_s2size; y++)
for(int32_t x = 0; x < h_s2size; x++)
t2ptr[y * bw + x] ^= s2ptr[y * src2w + x];
}
void entropy_init(std::vector<uint32_t>& mem, uint32_t bw, uint32_t bh)
{
size_t bytes = 4 * bw * bh;
mem.resize(bytes + 1);
mem[0] = 0;
mem[bytes] = 0;
double M0 = log(bytes);
double M1 = 700000000.0 / bytes;
for(size_t i = 1; i < bytes; i++)
mem[i] = M1 * (M0 - log(i));
}
uint32_t entropy(std::vector<uint32_t>& mem, uint32_t* data)
{ {
//Because XORs are essentially random, calculate the number of non-zeroes to ascertain badness.
uint8_t* _data = reinterpret_cast<uint8_t*>(data); uint8_t* _data = reinterpret_cast<uint8_t*>(data);
uint32_t e = 0; uint32_t e = 0;
size_t imax = mem.size() - 1; size_t imax = 4 * bw * bh;
for(size_t i = 0; i < imax; i++) for(size_t i = 0; i < imax; i++)
if(_data[i]) if(_data[i])
e++; e++;
return e; return e;
} }
uint32_t avi_codec_zmbv::mv_penalty(uint32_t* data, int32_t bx, int32_t by, int dx, int dy) uint32_t avi_codec_zmbv::mv_penalty(uint32_t bx, uint32_t by, int dx, int dy)
{ {
xor_blocks(&tmp[0], data, bx, by, ewidth, eheight, &prev[0], bx + dx, by + dy, ewidth, eheight, bw, //Penalty is entropy estimate of resulting block.
bh); xor_blocks(scratch, current_frame, bx, by, ewidth + 2 * MAXIMUM_VECTOR, eheight, prev_frame, bx + dx,
return entropy(entropy_tab, &tmp[0]); by + dy, ewidth + 2 * MAXIMUM_VECTOR, eheight, bw, bh);
return entropy(scratch, bw, bh);
} }
void avi_codec_zmbv::serialize_frame(bool keyframe, uint32_t* data) void avi_codec_zmbv::serialize_frame(bool keyframe)
{ {
if(keyframe) { uint32_t nhb, nvb, nb;
memcpy(&diff[0], data, 4 * ewidth * eheight); //In_stride/in_offset is in units of words, out_stride is in units of bytes.
diffsize = 4 * ewidth * eheight; size_t in_stride = (ewidth + 2 * MAXIMUM_VECTOR);
return; size_t in_offset = MAXIMUM_VECTOR * (in_stride + 1);
}
uint32_t nhb = (ewidth + bw - 1) / bw;
uint32_t nvb = (eheight + bh - 1) / bh;
uint32_t nb = nhb * nvb;
size_t osize = 0; size_t osize = 0;
for(size_t i = 0; i < nb; i++) { if(keyframe) {
diff[osize++] = (mv[i].dx << 1) | (mv[i].p ? 1 : 0); //Just copy the frame data and compress that.
diff[osize++] = (mv[i].dy << 1); for(size_t y = 0; y < eheight; y++)
memcpy(oscratch + 4 * ewidth * y, current_frame + in_stride * y + in_offset,
4 * ewidth);
osize = 4 * ewidth * eheight;
goto compress;
} }
//Number of blocks.
nhb = (ewidth + bw - 1) / bw;
nvb = (eheight + bh - 1) / bh;
nb = nhb * nvb;
osize = 0;
//Serialize the motion vectors.
for(size_t i = 0; i < nb; i++) {
oscratch[osize++] = (mv[i].dx << 1) | (mv[i].p ? 1 : 0);
oscratch[osize++] = (mv[i].dy << 1);
}
//Pad to multiple of 4 bytes.
while(osize % 4) while(osize % 4)
diff[osize++] = 0; oscratch[osize++] = 0;
//Serialize the residuals.
for(size_t i = 0; i < nb; i++) { for(size_t i = 0; i < nb; i++) {
if(mv[i].p == 0) if(mv[i].p == 0)
continue; continue;
int32_t bx = (i % nhb) * bw; uint32_t bx = (i % nhb) * bw + MAXIMUM_VECTOR;
int32_t by = (i / nhb) * bh; uint32_t by = (i / nhb) * bh + MAXIMUM_VECTOR;
xor_blocks(reinterpret_cast<uint32_t*>(&diff[osize]), data, bx, by, ewidth, eheight, &prev[0], xor_blocks(reinterpret_cast<uint32_t*>(oscratch + osize), current_frame, bx, by, ewidth + 2 *
bx + mv[i].dx, by + mv[i].dy, ewidth, eheight, bw, bh); MAXIMUM_VECTOR, eheight, prev_frame, bx + mv[i].dx, by + mv[i].dy, ewidth + 2 *
MAXIMUM_VECTOR, eheight, bw, bh);
osize += 4 * bw * bh; osize += 4 * bw * bh;
} }
diffsize = osize; compress:
} //Compress the output data.
zstream.next_in = reinterpret_cast<uint8_t*>(oscratch);
zstream.avail_in = osize;
void avi_codec_zmbv::compress_packet(bool keyframe) osize = 0;
{ outbuf[osize++] = keyframe ? 1 : 0; //Indicate keyframe/not.
size_t osize = 0;
output[osize++] = keyframe ? 1 : 0; //Indicate keyframe/not.
if(keyframe) { if(keyframe) {
output[osize++] = 0; //Version 0.1 //Write the keyframe header.
output[osize++] = 1; outbuf[osize++] = 0; //Version 0.1
output[osize++] = 1; //Zlib compression. outbuf[osize++] = 1;
output[osize++] = 8; //32 bit. outbuf[osize++] = 1; //Zlib compression.
output[osize++] = bw; //Block size. outbuf[osize++] = 8; //32 bit.
output[osize++] = bh; outbuf[osize++] = bw; //Block size.
outbuf[osize++] = bh;
deflateReset(&zstream); //Reset the zlib context. deflateReset(&zstream); //Reset the zlib context.
} }
zstream.next_in = reinterpret_cast<uint8_t*>(&diff[0]); zstream.next_out = reinterpret_cast<uint8_t*>(&outbuf[osize]);
zstream.avail_in = diffsize; zstream.avail_out = outbuf_size - osize;
zstream.next_out = reinterpret_cast<uint8_t*>(&output[osize]);
zstream.avail_out = output.size() - osize;
if(deflate(&zstream, Z_SYNC_FLUSH) != Z_OK) if(deflate(&zstream, Z_SYNC_FLUSH) != Z_OK)
throw std::runtime_error("Zlib error while compressing data"); throw std::runtime_error("Zlib error while compressing data");
if(zstream.avail_in || !zstream.avail_out) if(zstream.avail_in || !zstream.avail_out)
throw std::runtime_error("Buffer overrun while compressing data"); throw std::runtime_error("Buffer overrun while compressing data");
output_size = output.size() - zstream.avail_out; outbuf_used = outbuf_size - zstream.avail_out;
} }
//If candidate is better than best, update best. Returns true if ideal has been reached, else false.
bool update_best(motion& best, motion& candidate) bool update_best(motion& best, motion& candidate)
{ {
if(candidate.p < best.p) if(candidate.p < best.p)
@ -211,28 +196,31 @@ namespace
return (best.p == 0); return (best.p == 0);
} }
void avi_codec_zmbv::mv_detect(uint32_t* data, int32_t bx, int32_t by, motion& m, motion t) void avi_codec_zmbv::mv_detect(uint32_t bx, uint32_t by, motion& m, motion t)
{ {
//Try the suggested vector.
motion c; motion c;
m.p = mv_penalty(data, bx, by, m.dx = t.dx, m.dy = t.dy); m.p = mv_penalty(bx, by, m.dx = t.dx, m.dy = t.dy);
if(!m.p) if(!m.p)
return; return;
c.p = mv_penalty(data, bx, by, c.dx = 0, c.dy = 0); //Try the zero vector.
c.p = mv_penalty(bx, by, c.dx = 0, c.dy = 0);
if(update_best(m, c)) if(update_best(m, c))
return; return;
//Try cardinal vectors up to 9 units.
for(int s = 1; s < 10; s++) { for(int s = 1; s < 10; s++) {
if(s == 0) if(s == 0)
continue; continue;
c.p = mv_penalty(data, bx, by, c.dx = -s, c.dy = 0); c.p = mv_penalty(bx, by, c.dx = -s, c.dy = 0);
if(update_best(m, c)) if(update_best(m, c))
return; return;
c.p = mv_penalty(data, bx, by, c.dx = 0, c.dy = -s); c.p = mv_penalty(bx, by, c.dx = 0, c.dy = -s);
if(update_best(m, c)) if(update_best(m, c))
return; return;
c.p = mv_penalty(data, bx, by, c.dx = s, c.dy = 0); c.p = mv_penalty(bx, by, c.dx = s, c.dy = 0);
if(update_best(m, c)) if(update_best(m, c))
return; return;
c.p = mv_penalty(data, bx, by, c.dx = 0, c.dy = s); c.p = mv_penalty(bx, by, c.dx = 0, c.dy = s);
if(update_best(m, c)) if(update_best(m, c))
return; return;
} }
@ -271,19 +259,23 @@ namespace
ready_flag = true; ready_flag = true;
avi_video_codec::format fmt(ewidth, eheight, 0x56424D5A, 24); avi_video_codec::format fmt(ewidth, eheight, 0x56424D5A, 24);
entropy_init(entropy_tab, bw, bh); pixbuf.resize(2 * (ewidth + 2 * MAXIMUM_VECTOR) * (eheight + 2 * MAXIMUM_VECTOR) + bw * bh);
prev.resize(4 * ewidth * eheight); current_frame = &pixbuf[0];
current.resize(4 * ewidth * eheight); prev_frame = &pixbuf[(ewidth + 2 * MAXIMUM_VECTOR) * (eheight + 2 * MAXIMUM_VECTOR)];
tmp.resize(4 * bw * bh); scratch = &pixbuf[2 * (ewidth + 2 * MAXIMUM_VECTOR) * (eheight + 2 * MAXIMUM_VECTOR)];
mv.resize(((ewidth + bw - 1) / bw) * ((eheight + bh - 1) / bh)); mv.resize(((ewidth + bw - 1) / bw) * ((eheight + bh - 1) / bh));
diff.resize(4 * ((mv.size() + 1) / 2) + 4 * ewidth * eheight); size_t maxdiff = 4 * ((mv.size() + 1) / 2) + 4 * ewidth * eheight;
output.resize(deflateBound(&zstream, diff.size()) + 128); outbuf_size = deflateBound(&zstream, maxdiff) + 128;
outbuffer.resize(maxdiff + outbuf_size);
oscratch = &outbuffer[outbuf_size];
outbuf = &outbuffer[0];
memset(&pixbuf[0], 0, 4 * pixbuf.size());
return fmt; return fmt;
} }
void avi_codec_zmbv::frame(uint32_t* data) void avi_codec_zmbv::frame(uint32_t* data)
{ {
bool buffer_loaded = false; //Keyframe/not determination.
bool keyframe = false; bool keyframe = false;
if(pframes >= max_pframes) { if(pframes >= max_pframes) {
keyframe = true; keyframe = true;
@ -293,25 +285,34 @@ namespace
//If bigendian, swap. //If bigendian, swap.
short magic = 258; short magic = 258;
size_t frameoffset = MAXIMUM_VECTOR * (ewidth + 2 * MAXIMUM_VECTOR + 1);
size_t framestride = ewidth + 2 * MAXIMUM_VECTOR;
if(reinterpret_cast<uint8_t*>(&magic)[0] == 1) if(reinterpret_cast<uint8_t*>(&magic)[0] == 1)
for(size_t i = 0; i < ewidth * eheight; i++) { for(size_t y = 0; y < iheight; y++) {
uint8_t* _current = reinterpret_cast<uint8_t*>(&current[0]); uint8_t* _current = reinterpret_cast<uint8_t*>(current_frame + frameoffset +
uint8_t* _data = reinterpret_cast<uint8_t*>(&data[0]); framestride * y);
_current[4 * i + 0] = _data[4 * i + 3]; uint8_t* _data = reinterpret_cast<uint8_t*>(&data[iwidth * y]);
_current[4 * i + 1] = _data[4 * i + 2]; for(size_t i = 0; i < iwidth; i++) {
_current[4 * i + 2] = _data[4 * i + 1]; _current[4 * i + 0] = _data[4 * i + 3];
_current[4 * i + 3] = _data[4 * i + 0]; _current[4 * i + 1] = _data[4 * i + 2];
_current[4 * i + 2] = _data[4 * i + 1];
_current[4 * i + 3] = _data[4 * i + 0];
}
} }
else else
for(size_t i = 0; i < ewidth * eheight; i++) { for(size_t y = 0; y < iheight; y++) {
uint8_t* _current = reinterpret_cast<uint8_t*>(&current[0]); uint8_t* _current = reinterpret_cast<uint8_t*>(current_frame + frameoffset +
uint8_t* _data = reinterpret_cast<uint8_t*>(&data[0]); framestride * y);
_current[4 * i + 2] = _data[4 * i + 0]; uint8_t* _data = reinterpret_cast<uint8_t*>(&data[iwidth * y]);
_current[4 * i + 1] = _data[4 * i + 1]; for(size_t i = 0; i < iwidth; i++) {
_current[4 * i + 0] = _data[4 * i + 2]; _current[4 * i + 2] = _data[4 * i + 0];
_current[4 * i + 3] = _data[4 * i + 3]; _current[4 * i + 1] = _data[4 * i + 1];
_current[4 * i + 0] = _data[4 * i + 2];
_current[4 * i + 3] = _data[4 * i + 3];
}
} }
//Estimate motion vectors for all blocks if non-keyframe.
uint32_t nhb = (ewidth + bw - 1) / bw; uint32_t nhb = (ewidth + bw - 1) / bw;
if(!keyframe) { if(!keyframe) {
motion t; motion t;
@ -319,16 +320,16 @@ namespace
t.dy = 0; t.dy = 0;
t.p = 0; t.p = 0;
for(size_t i = 0; i < mv.size(); i++) { for(size_t i = 0; i < mv.size(); i++) {
mv_detect(&current[0], (i % nhb) * bw, (i / nhb) * bh, mv[i], t); mv_detect((i % nhb) * bw + MAXIMUM_VECTOR, (i / nhb) * bh + MAXIMUM_VECTOR, mv[i], t);
t = mv[i]; t = mv[i];
} }
} }
serialize_frame(keyframe, &current[0]); //Serialize and output.
compress_packet(keyframe); serialize_frame(keyframe);
memcpy(&prev[0], &current[0], 4 * ewidth * eheight); std::swap(current_frame, prev_frame);
out.payload.resize(output_size); out.payload.resize(outbuf_used);
memcpy(&out.payload[0], &output[0], output_size); memcpy(&out.payload[0], outbuf, outbuf_used);
out.typecode = 0x6264; //Not exactly correct according to specs... out.typecode = 0x6264; //Not exactly correct according to specs...
out.hidden = false; out.hidden = false;
out.indexflags = keyframe ? 0x10 : 0; out.indexflags = keyframe ? 0x10 : 0;
@ -346,7 +347,7 @@ namespace
return out; return out;
} }
//ZMBV encoder factory object.
avi_video_codec_type rgb("zmbv", "Zip Motion Blocks Video codec", avi_video_codec_type rgb("zmbv", "Zip Motion Blocks Video codec",
[]() -> avi_video_codec* { return new avi_codec_zmbv(clvl, kint, bwv, bhv);}); []() -> avi_video_codec* { return new avi_codec_zmbv(clvl, kint, bwv, bhv);});
} }