#include #include #include #include #include #include #include #include #include std::map glyph_index; std::map glyph_offsets; std::map glyphs; uint32_t next_offset = 1; void add_glyph(uint32_t codepoint, const std::string& appearence) { if(((codepoint >> 16) > 10) || ((codepoint & 0x1FF800) == 0xD800)) { std::cerr << "Illegal codepoint " << std::hex << codepoint << "." << std::endl; exit(1); } if(!glyph_offsets.count(appearence)) { if(appearence.find_first_not_of("0123456789ABCDEFabcdef") < appearence.length()) { std::cerr << "Invalid font representation (invalid hex char)." << std::endl; std::cerr << "Faulty representation: '" << appearence << "'." << std::endl; exit(1); } glyph_offsets[appearence] = next_offset; glyph_index[next_offset] = codepoint; if(appearence.length() == 32) next_offset += 5; else if(appearence.length() == 64) next_offset += 9; else { std::cerr << "Invalid font representation (length " << appearence.length() << ")." << std::endl; std::cerr << "Faulty representation: '" << appearence << "'." << std::endl; exit(1); } } glyphs[codepoint] = appearence; } uint32_t bucket_size(uint32_t items) { if(items <= 4) return items; return static_cast(pow(items, 1.5)); } //This is Jenkin's MIX function. uint32_t keyhash(uint32_t key, uint32_t item, uint32_t mod) { uint32_t a = key; uint32_t b = 0; uint32_t c = item; a=a-b; a=a-c; a=a^(c >> 13); b=b-c; b=b-a; b=b^(a << 8); c=c-a; c=c-b; c=c^(b >> 13); a=a-b; a=a-c; a=a^(c >> 12); b=b-c; b=b-a; b=b^(a << 16); c=c-a; c=c-b; c=c^(b >> 5); a=a-b; a=a-c; a=a^(c >> 3); b=b-c; b=b-a; b=b^(a << 10); c=c-a; c=c-b; c=c^(b >> 15); return c % mod; } uint32_t wrong_key(uint32_t key, uint32_t hash, uint32_t mod) { uint32_t i = 0; if(mod <= 1) return 0; while(keyhash(key, i, mod) == hash) i++; return i; } std::pair make_subdirectory(std::vector& items) { if(items.size() < 2) return std::make_pair(0, items.size()); std::vector memory; memory.resize(bucket_size(items.size())); uint32_t seed = 1; unsigned tries = 0; while(true) { //Safety hatch: If unsuccessful too many times, increase the hash size. if(tries == 100) { memory.resize(memory.size() + 1); tries = 0; } bool success = true; seed = rand(); for(uint32_t i = 0; i < memory.size(); i++) memory[i] = 0xFFFFFFFFUL; for(uint32_t i = 0; i < items.size(); i++) { uint32_t j = keyhash(seed, items[i], memory.size()); if(memory[j] != 0xFFFFFFFFUL) { success = false; break; } memory[j] = items[i]; } if(success) break; tries++; } return std::make_pair(seed, memory.size()); } void write_subdirectory(std::vector& items, std::pair p, uint32_t badkey) { if(!p.second) return; std::vector memory; std::cout << "," << p.first << "UL," << p.second << "UL"; memory.resize(p.second); for(uint32_t i = 0; i < memory.size(); i++) memory[i] = badkey; for(uint32_t i = 0; i < items.size(); i++) memory[keyhash(p.first, items[i], memory.size())] = items[i]; for(uint32_t i = 0; i < memory.size(); i++) { if(memory[i] != badkey) std::cout << "," << memory[i] << "UL," << glyph_offsets[glyphs[memory[i]]] << "UL"; else std::cout << "," << badkey << "UL,0UL"; } std::cout << std::endl; } uint32_t glyph_part_string(const std::string& str) { return strtoul(str.c_str(), NULL, 16); } void write_glyphs() { for(std::map::iterator i = glyph_index.begin(); i != glyph_index.end(); ++i) { std::string glyph = glyphs[i->second]; if(glyph.length() == 32) { std::cout << ",0"; for(unsigned i = 0; i < 32; i += 8) std::cout << "," << glyph_part_string(glyph.substr(i, 8)) << "UL"; } else if(glyph.length() == 64) { std::cout << ",1"; for(unsigned i = 0; i < 64; i += 8) std::cout << "," << glyph_part_string(glyph.substr(i, 8)) << "UL"; } else { std::cerr << "INTERNAL ERROR: Invalid glyph data" << std::endl; exit(1); } } std::cout << std::endl; } void write_main_directory(uint32_t seed, std::vector >& main_directory, std::vector >& subdirectories) { std::cout << "," << seed << "UL"; std::cout << "," << main_directory.size() << "UL"; uint32_t subdir_offset = next_offset + 4 + main_directory.size(); for(size_t i = 0; i < main_directory.size(); i++) { if(subdirectories[i].second) { std::cout << "," << subdir_offset << "UL"; subdir_offset = subdir_offset + 2 + 2 * subdirectories[i].second; } else std::cout << "," << next_offset << "UL"; } std::cout << std::endl; } int main() { std::string line; srand(time(NULL)); while(std::getline(std::cin, line)) { if(line[line.length() - 1] == '\r') line = line.substr(0, line.length() - 1); size_t split = line.find_first_of("#:"); if(split > line.length() || line[split] == '#') continue; std::string codepoint = line.substr(0, split); std::string appearence = line.substr(split + 1); uint32_t cp = 0; char* end; cp = strtoul(codepoint.c_str(), &end, 16); if(*end) { std::cerr << "Invalid codepoint number '" << codepoint << "." << std::endl; exit(1); } add_glyph(cp, appearence); } std::cerr << "Loaded " << glyphs.size() << " glyphs (" << glyph_offsets.size() << " distinct)." << std::endl; if(!glyphs.size()) { std::cerr << "Need at least 1 glyph." << std::endl; return 1; } uint32_t best_seed = 0; uint32_t best_score = 0xFFFFFFFFUL; uint32_t main_directory_size = glyphs.size(); for(unsigned i = 0; i < 1000; i++) { if(i % 10 == 0) std::cerr << "." << std::flush; uint32_t seed = rand(); std::vector bucket_items; bucket_items.resize(main_directory_size); for(uint32_t i = 0; i < main_directory_size; i++) bucket_items[i] = 0; for(std::map::iterator i = glyphs.begin(); i != glyphs.end(); ++i) bucket_items[keyhash(seed, i->first, main_directory_size)]++; uint32_t score = 0; for(uint32_t i = 0; i < main_directory_size; i++) if(bucket_items[i]) score = score + 2 * bucket_size(bucket_items[i]) + 3; else score = score + 1; if(score < best_score) { best_seed = seed; best_score = score; } } std::cerr << std::endl; std::cerr << "Best estimated score: " << best_score << " for seed " << best_seed << "." << std::endl; std::vector > main_directory; std::vector > subdirectories; main_directory.resize(main_directory_size); subdirectories.resize(main_directory_size); for(std::map::iterator i = glyphs.begin(); i != glyphs.end(); ++i) main_directory[keyhash(best_seed, i->first, main_directory_size)].push_back(i->first); uint32_t update_interval = main_directory_size / 10; unsigned real_score = 0; for(size_t i = 0; i < main_directory.size(); i++) { if((i % update_interval) == 0) std::cerr << "." << std::flush; subdirectories[i] = make_subdirectory(main_directory[i]); if(subdirectories[i].second) real_score = real_score + 2 * subdirectories[i].second + 3; else real_score = real_score + 1; } std::cerr << std::endl; std::cerr << "Final score: " << real_score << " (" << next_offset + real_score + 4 << " elements)." << std::endl; std::cout << "#include " << std::endl; std::cout << "uint32_t fontdata_size = " << next_offset + real_score + 4 << ";" << std::endl; std::cout << "uint32_t fontdata[] = {"; std::cout << next_offset + 2 << "UL"; write_glyphs(); std::cout << ",0,0" << std::endl; write_main_directory(best_seed, main_directory, subdirectories); for(size_t i = 0; i < main_directory.size(); i++) write_subdirectory(main_directory[i], subdirectories[i], wrong_key(best_seed, i, main_directory_size)); std::cout << "};" << std::endl; return 0; }