lsnes/fonts/parsehexfont.cpp
2011-09-15 22:56:33 +03:00

258 lines
7.8 KiB
C++

#include <map>
#include <string>
#include <iostream>
#include <cstdint>
#include <cmath>
#include <vector>
#include <cstdlib>
#include <ctime>
#include <iomanip>
std::map<uint32_t, uint32_t> glyph_index;
std::map<std::string, uint32_t> glyph_offsets;
std::map<uint32_t, std::string> glyphs;
uint32_t next_offset = 1;
void add_glyph(uint32_t codepoint, const std::string& appearence)
{
if(((codepoint >> 16) > 10) || ((codepoint & 0x1FF800) == 0xD800)) {
std::cerr << "Illegal codepoint " << std::hex << codepoint << "." << std::endl;
exit(1);
}
if(!glyph_offsets.count(appearence)) {
if(appearence.find_first_not_of("0123456789ABCDEFabcdef") < appearence.length()) {
std::cerr << "Invalid font representation (invalid hex char)." << std::endl;
std::cerr << "Faulty representation: '" << appearence << "'." << std::endl;
exit(1);
}
glyph_offsets[appearence] = next_offset;
glyph_index[next_offset] = codepoint;
if(appearence.length() == 32)
next_offset += 5;
else if(appearence.length() == 64)
next_offset += 9;
else {
std::cerr << "Invalid font representation (length " << appearence.length() << ")."
<< std::endl;
std::cerr << "Faulty representation: '" << appearence << "'." << std::endl;
exit(1);
}
}
glyphs[codepoint] = appearence;
}
uint32_t bucket_size(uint32_t items)
{
if(items <= 4)
return items;
return static_cast<uint32_t>(pow(items, 1.5));
}
//This is Jenkin's MIX function.
uint32_t keyhash(uint32_t key, uint32_t item, uint32_t mod)
{
uint32_t a = key;
uint32_t b = 0;
uint32_t c = item;
a=a-b; a=a-c; a=a^(c >> 13);
b=b-c; b=b-a; b=b^(a << 8);
c=c-a; c=c-b; c=c^(b >> 13);
a=a-b; a=a-c; a=a^(c >> 12);
b=b-c; b=b-a; b=b^(a << 16);
c=c-a; c=c-b; c=c^(b >> 5);
a=a-b; a=a-c; a=a^(c >> 3);
b=b-c; b=b-a; b=b^(a << 10);
c=c-a; c=c-b; c=c^(b >> 15);
return c % mod;
}
uint32_t wrong_key(uint32_t key, uint32_t hash, uint32_t mod)
{
uint32_t i = 0;
if(mod <= 1)
return 0;
while(keyhash(key, i, mod) == hash)
i++;
return i;
}
std::pair<uint32_t, uint32_t> make_subdirectory(std::vector<uint32_t>& items)
{
if(items.size() < 2)
return std::make_pair(0, items.size());
std::vector<uint32_t> memory;
memory.resize(bucket_size(items.size()));
uint32_t seed = 1;
unsigned tries = 0;
while(true) {
//Safety hatch: If unsuccessful too many times, increase the hash size.
if(tries == 100) {
memory.resize(memory.size() + 1);
tries = 0;
}
bool success = true;
seed = rand();
for(uint32_t i = 0; i < memory.size(); i++)
memory[i] = 0xFFFFFFFFUL;
for(uint32_t i = 0; i < items.size(); i++) {
uint32_t j = keyhash(seed, items[i], memory.size());
if(memory[j] != 0xFFFFFFFFUL) {
success = false;
break;
}
memory[j] = items[i];
}
if(success)
break;
tries++;
}
return std::make_pair(seed, memory.size());
}
void write_subdirectory(std::vector<uint32_t>& items, std::pair<uint32_t, uint32_t> p,
uint32_t badkey)
{
if(!p.second)
return;
std::vector<uint32_t> memory;
std::cout << "," << p.first << "UL," << p.second << "UL";
memory.resize(p.second);
for(uint32_t i = 0; i < memory.size(); i++)
memory[i] = badkey;
for(uint32_t i = 0; i < items.size(); i++)
memory[keyhash(p.first, items[i], memory.size())] = items[i];
for(uint32_t i = 0; i < memory.size(); i++) {
if(memory[i] != badkey)
std::cout << "," << memory[i] << "UL," << glyph_offsets[glyphs[memory[i]]] << "UL";
else
std::cout << "," << badkey << "UL,0UL";
}
std::cout << std::endl;
}
uint32_t glyph_part_string(const std::string& str)
{
return strtoul(str.c_str(), NULL, 16);
}
void write_glyphs()
{
for(std::map<uint32_t, uint32_t>::iterator i = glyph_index.begin(); i != glyph_index.end(); ++i) {
std::string glyph = glyphs[i->second];
if(glyph.length() == 32) {
std::cout << ",0";
for(unsigned i = 0; i < 32; i += 8)
std::cout << "," << glyph_part_string(glyph.substr(i, 8)) << "UL";
} else if(glyph.length() == 64) {
std::cout << ",1";
for(unsigned i = 0; i < 64; i += 8)
std::cout << "," << glyph_part_string(glyph.substr(i, 8)) << "UL";
} else {
std::cerr << "INTERNAL ERROR: Invalid glyph data" << std::endl;
exit(1);
}
}
std::cout << std::endl;
}
void write_main_directory(uint32_t seed, std::vector<std::vector<uint32_t> >& main_directory,
std::vector<std::pair<uint32_t, uint32_t> >& subdirectories)
{
std::cout << "," << seed << "UL";
std::cout << "," << main_directory.size() << "UL";
uint32_t subdir_offset = next_offset + 4 + main_directory.size();
for(size_t i = 0; i < main_directory.size(); i++) {
if(subdirectories[i].second) {
std::cout << "," << subdir_offset << "UL";
subdir_offset = subdir_offset + 2 + 2 * subdirectories[i].second;
} else
std::cout << "," << next_offset << "UL";
}
std::cout << std::endl;
}
int main()
{
std::string line;
srand(time(NULL));
while(std::getline(std::cin, line)) {
if(line[line.length() - 1] == '\r')
line = line.substr(0, line.length() - 1);
size_t split = line.find_first_of("#:");
if(split > line.length() || line[split] == '#')
continue;
std::string codepoint = line.substr(0, split);
std::string appearence = line.substr(split + 1);
uint32_t cp = 0;
char* end;
cp = strtoul(codepoint.c_str(), &end, 16);
if(*end) {
std::cerr << "Invalid codepoint number '" << codepoint << "." << std::endl;
exit(1);
}
add_glyph(cp, appearence);
}
std::cerr << "Loaded " << glyphs.size() << " glyphs (" << glyph_offsets.size() << " distinct)." << std::endl;
if(!glyphs.size()) {
std::cerr << "Need at least 1 glyph." << std::endl;
return 1;
}
uint32_t best_seed = 0;
uint32_t best_score = 0xFFFFFFFFUL;
uint32_t main_directory_size = glyphs.size();
for(unsigned i = 0; i < 1000; i++) {
if(i % 10 == 0)
std::cerr << "." << std::flush;
uint32_t seed = rand();
std::vector<uint32_t> bucket_items;
bucket_items.resize(main_directory_size);
for(uint32_t i = 0; i < main_directory_size; i++)
bucket_items[i] = 0;
for(std::map<uint32_t, std::string>::iterator i = glyphs.begin(); i != glyphs.end(); ++i)
bucket_items[keyhash(seed, i->first, main_directory_size)]++;
uint32_t score = 0;
for(uint32_t i = 0; i < main_directory_size; i++)
if(bucket_items[i])
score = score + 2 * bucket_size(bucket_items[i]) + 3;
else
score = score + 1;
if(score < best_score) {
best_seed = seed;
best_score = score;
}
}
std::cerr << std::endl;
std::cerr << "Best estimated score: " << best_score << " for seed " << best_seed << "." << std::endl;
std::vector<std::vector<uint32_t> > main_directory;
std::vector<std::pair<uint32_t, uint32_t> > subdirectories;
main_directory.resize(main_directory_size);
subdirectories.resize(main_directory_size);
for(std::map<uint32_t, std::string>::iterator i = glyphs.begin(); i != glyphs.end(); ++i)
main_directory[keyhash(best_seed, i->first, main_directory_size)].push_back(i->first);
uint32_t update_interval = main_directory_size / 10;
unsigned real_score = 0;
for(size_t i = 0; i < main_directory.size(); i++) {
if((i % update_interval) == 0)
std::cerr << "." << std::flush;
subdirectories[i] = make_subdirectory(main_directory[i]);
if(subdirectories[i].second)
real_score = real_score + 2 * subdirectories[i].second + 3;
else
real_score = real_score + 1;
}
std::cerr << std::endl;
std::cerr << "Final score: " << real_score << " (" << next_offset + real_score + 4 << " elements)."
<< std::endl;
std::cout << "#include <cstdint>" << std::endl;
std::cout << "uint32_t fontdata_size = " << next_offset + real_score + 4 << ";" << std::endl;
std::cout << "uint32_t fontdata[] = {";
std::cout << next_offset + 2 << "UL";
write_glyphs();
std::cout << ",0,0" << std::endl;
write_main_directory(best_seed, main_directory, subdirectories);
for(size_t i = 0; i < main_directory.size(); i++)
write_subdirectory(main_directory[i], subdirectories[i], wrong_key(best_seed, i, main_directory_size));
std::cout << "};" << std::endl;
return 0;
}