Use std::u32string instead of std::vector<uint32_t> for UTF-32 strings
This also uncovered a bug in copy_from_utf8.
This commit is contained in:
parent
3c2aefdc06
commit
92adf8519c
6 changed files with 68 additions and 98 deletions
|
@ -9,22 +9,6 @@
|
|||
#include <map>
|
||||
#include "framebuffer.hpp"
|
||||
|
||||
class ligature_key
|
||||
{
|
||||
public:
|
||||
ligature_key(const std::vector<uint32_t>& key) throw(std::bad_alloc);
|
||||
const std::vector<uint32_t>& get() const throw() { return ikey; }
|
||||
size_t length() const throw() { return ikey.size(); }
|
||||
bool operator<(const ligature_key& key) const throw();
|
||||
bool operator<=(const ligature_key& key) const throw() { return !(key < *this); }
|
||||
bool operator==(const ligature_key& key) const throw();
|
||||
bool operator!=(const ligature_key& key) const throw() { return !(key == *this); }
|
||||
bool operator>=(const ligature_key& key) const throw() { return !(*this < key); }
|
||||
bool operator>(const ligature_key& key) const throw() { return key < *this; }
|
||||
private:
|
||||
std::vector<uint32_t> ikey;
|
||||
};
|
||||
|
||||
struct font_glyph_data
|
||||
{
|
||||
font_glyph_data();
|
||||
|
@ -43,13 +27,13 @@ struct custom_font
|
|||
public:
|
||||
custom_font();
|
||||
custom_font(const std::string& file);
|
||||
void add(const ligature_key& key, const font_glyph_data& glyph) throw(std::bad_alloc);
|
||||
ligature_key best_ligature_match(const std::vector<uint32_t>& codepoints, size_t start) const
|
||||
void add(const std::u32string& key, const font_glyph_data& glyph) throw(std::bad_alloc);
|
||||
std::u32string best_ligature_match(const std::u32string& codepoints, size_t start) const
|
||||
throw(std::bad_alloc);
|
||||
const font_glyph_data& lookup_glyph(const ligature_key& key) const throw();
|
||||
const font_glyph_data& lookup_glyph(const std::u32string& key) const throw();
|
||||
unsigned get_rowadvance() const throw() { return rowadvance; }
|
||||
private:
|
||||
std::map<ligature_key, font_glyph_data> glyphs;
|
||||
std::map<std::u32string, font_glyph_data> glyphs;
|
||||
unsigned rowadvance;
|
||||
};
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#ifndef _library__utf8__hpp__included__
|
||||
#define _library__utf8__hpp__included__
|
||||
|
||||
#include <iostream>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
@ -27,6 +28,16 @@ int32_t utf8_parse_byte(int ch, uint16_t& state) throw();
|
|||
*/
|
||||
size_t utf8_strlen(const std::string& str) throw();
|
||||
|
||||
/**
|
||||
* Transform UTF-8 into UTF-32.
|
||||
*/
|
||||
std::u32string to_u32string(const std::string& utf8);
|
||||
|
||||
/**
|
||||
* Transform UTF-32 into UTF-8.
|
||||
*/
|
||||
std::string to_u8string(const std::u32string& utf32);
|
||||
|
||||
/**
|
||||
* Iterator copy from UTF-8 to UTF-32
|
||||
*/
|
||||
|
@ -35,7 +46,7 @@ inline void copy_from_utf8(srcitr begin, srcitr end, dstitr target)
|
|||
{
|
||||
uint16_t state = utf8_initial_state;
|
||||
for(srcitr i = begin; i != end; i++) {
|
||||
int32_t x = utf8_parse_byte(*i, state);
|
||||
int32_t x = utf8_parse_byte((unsigned char)*i, state);
|
||||
if(x >= 0) {
|
||||
*target = x;
|
||||
++target;
|
||||
|
|
|
@ -4,29 +4,6 @@
|
|||
#include "zip.hpp"
|
||||
#include "string.hpp"
|
||||
|
||||
ligature_key::ligature_key(const std::vector<uint32_t>& key) throw(std::bad_alloc)
|
||||
{
|
||||
ikey = key;
|
||||
}
|
||||
|
||||
bool ligature_key::operator<(const ligature_key& key) const throw()
|
||||
{
|
||||
for(size_t i = 0; i < ikey.size() && i < key.ikey.size(); i++)
|
||||
if(ikey[i] < key.ikey[i])
|
||||
return true;
|
||||
else if(ikey[i] > key.ikey[i])
|
||||
return false;
|
||||
return (ikey.size() < key.ikey.size());
|
||||
}
|
||||
|
||||
bool ligature_key::operator==(const ligature_key& key) const throw()
|
||||
{
|
||||
for(size_t i = 0; i < ikey.size() && i < key.ikey.size(); i++)
|
||||
if(ikey[i] != key.ikey[i])
|
||||
return false;
|
||||
return (ikey.size() == key.ikey.size());
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void bound(int32_t c, uint32_t odim, uint32_t dim, uint32_t& dc, uint32_t& off, uint32_t& size)
|
||||
|
@ -182,7 +159,7 @@ custom_font::custom_font(const std::string& file)
|
|||
zip_reader r(file);
|
||||
for(auto member : r) {
|
||||
//Parse the key out of filename.
|
||||
std::vector<uint32_t> k;
|
||||
std::u32string key;
|
||||
std::string tname = member;
|
||||
std::string tmp;
|
||||
if(tname == "bad") {
|
||||
|
@ -190,14 +167,13 @@ custom_font::custom_font(const std::string& file)
|
|||
} else if(regex_match("[0-9]+(-[0-9]+)*", tname))
|
||||
while(tname != "") {
|
||||
extract_token(tname, tmp, "-");
|
||||
k.push_back(parse_value<uint32_t>(tmp));
|
||||
key.append(1, parse_value<uint32_t>(tmp));
|
||||
}
|
||||
else {
|
||||
delete toclose;
|
||||
toclose = NULL;
|
||||
continue;
|
||||
}
|
||||
ligature_key key(k);
|
||||
std::istream& s = r[member];
|
||||
toclose = &s;
|
||||
try {
|
||||
|
@ -221,44 +197,44 @@ custom_font::custom_font(const std::string& file)
|
|||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const ligature_key& lkey)
|
||||
std::ostream& operator<<(std::ostream& os, const std::u32string& lkey)
|
||||
{
|
||||
if(!lkey.length())
|
||||
return (os << "bad");
|
||||
for(size_t i = 0; i < lkey.length(); i++) {
|
||||
if(i)
|
||||
os << "-";
|
||||
os << lkey.get()[i];
|
||||
os << static_cast<uint32_t>(lkey[i]);
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
void custom_font::add(const ligature_key& key, const font_glyph_data& glyph) throw(std::bad_alloc)
|
||||
void custom_font::add(const std::u32string& key, const font_glyph_data& glyph) throw(std::bad_alloc)
|
||||
{
|
||||
glyphs[key] = glyph;
|
||||
if(glyph.height > rowadvance)
|
||||
rowadvance = glyph.height;
|
||||
}
|
||||
|
||||
ligature_key custom_font::best_ligature_match(const std::vector<uint32_t>& codepoints, size_t start) const
|
||||
std::u32string custom_font::best_ligature_match(const std::u32string& codepoints, size_t start) const
|
||||
throw(std::bad_alloc)
|
||||
{
|
||||
std::vector<uint32_t> tmp;
|
||||
if(start >= codepoints.size())
|
||||
return ligature_key(tmp); //Bad.
|
||||
ligature_key best(tmp);
|
||||
std::u32string tmp;
|
||||
if(start >= codepoints.length())
|
||||
return tmp; //Bad.
|
||||
std::u32string best = tmp;
|
||||
for(size_t i = 1; i <= codepoints.size() - start; i++) {
|
||||
tmp.push_back(codepoints[start + i - 1]);
|
||||
ligature_key lkey(tmp);
|
||||
tmp.append(1, codepoints[start + i - 1]);
|
||||
std::u32string lkey = tmp;
|
||||
if(glyphs.count(lkey))
|
||||
best = lkey;
|
||||
auto j = glyphs.lower_bound(lkey);
|
||||
//If lower_bound is greater than equivalent length of string, there can be no better match.
|
||||
if(j == glyphs.end())
|
||||
break;
|
||||
const std::vector<uint32_t>& tmp2 = j->first.get();
|
||||
const std::u32string& tmp2 = j->first;
|
||||
bool best_found = false;
|
||||
for(size_t k = 0; k < tmp2.size() && start + k < codepoints.size(); k++)
|
||||
for(size_t k = 0; k < tmp2.length() && start + k < codepoints.length(); k++)
|
||||
if(tmp2[k] > codepoints[start + k]) {
|
||||
best_found = true;
|
||||
break;
|
||||
|
@ -270,7 +246,7 @@ ligature_key custom_font::best_ligature_match(const std::vector<uint32_t>& codep
|
|||
return best;
|
||||
}
|
||||
|
||||
const font_glyph_data& custom_font::lookup_glyph(const ligature_key& key) const throw()
|
||||
const font_glyph_data& custom_font::lookup_glyph(const std::u32string& key) const throw()
|
||||
{
|
||||
static font_glyph_data empty_glyph;
|
||||
auto i = glyphs.find(key);
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include <sstream>
|
||||
#include "utf8.hpp"
|
||||
|
||||
namespace
|
||||
|
@ -157,6 +158,33 @@ size_t utf8_strlen(const std::string& str) throw()
|
|||
return r;
|
||||
}
|
||||
|
||||
std::u32string to_u32string(const std::string& utf8)
|
||||
{
|
||||
std::u32string x;
|
||||
x.resize(utf8_strlen(utf8));
|
||||
copy_from_utf8(utf8.begin(), utf8.end(), x.begin());
|
||||
return x;
|
||||
}
|
||||
|
||||
std::string to_u8string(const std::u32string& utf32)
|
||||
{
|
||||
std::ostringstream s;
|
||||
for(auto i : utf32) {
|
||||
if(i < 0x80)
|
||||
s << (unsigned char)i;
|
||||
else if(i < 0x800)
|
||||
s << (unsigned char)(0xC0 + (i >> 6)) << (unsigned char)(0x80 + (i & 0x3F));
|
||||
else if(i < 0x10000)
|
||||
s << (unsigned char)(0xE0 + (i >> 12)) << (unsigned char)(0x80 + ((i >> 6) & 0x3F))
|
||||
<< (unsigned char)(0x80 + (i & 0x3F));
|
||||
else if(i < 0x10FFFF)
|
||||
s << (unsigned char)(0xF0 + (i >> 18)) << (unsigned char)(0x80 + ((i >> 12) & 0x3F))
|
||||
<< (unsigned char)(0x80 + ((i >> 6) & 0x3F))
|
||||
<< (unsigned char)(0x80 + (i & 0x3F));
|
||||
}
|
||||
return s.str();
|
||||
}
|
||||
|
||||
#ifdef TEST_UTF8
|
||||
#include <iostream>
|
||||
char* format_dword(uint16_t s)
|
||||
|
|
|
@ -39,14 +39,13 @@ namespace
|
|||
fg.set_palette(scr);
|
||||
bg.set_palette(scr);
|
||||
const custom_font& fdata = font->object()->get_font();
|
||||
std::vector<uint32_t> _text;
|
||||
copy_from_utf8(text.begin(), text.end(), std::back_inserter(_text));
|
||||
std::u32string _text = to_u32string(text);
|
||||
int32_t orig_x = x;
|
||||
int32_t drawx = x;
|
||||
int32_t drawy = y;
|
||||
for(size_t i = 0; i < _text.size();) {
|
||||
uint32_t cp = _text[i];
|
||||
ligature_key k = fdata.best_ligature_match(_text, i);
|
||||
std::u32string k = fdata.best_ligature_match(_text, i);
|
||||
const font_glyph_data& glyph = fdata.lookup_glyph(k);
|
||||
if(k.length())
|
||||
i += k.length();
|
||||
|
|
|
@ -153,8 +153,6 @@ private:
|
|||
std::string _line2;
|
||||
void format_lines();
|
||||
void add_port(unsigned& c, unsigned pid, const port_type& p, const port_type_set& pts);
|
||||
std::string vector_to_string(const std::vector<uint32_t>& cp);
|
||||
std::vector<uint32_t> string_to_vector(const std::string& str);
|
||||
std::list<control_info> controlinfo;
|
||||
};
|
||||
|
||||
|
@ -234,32 +232,6 @@ uint32_t frame_controls::read_pollcount(pollcounter_vector& v, unsigned idx)
|
|||
return v.get_polls(idx);
|
||||
}
|
||||
|
||||
std::string frame_controls::vector_to_string(const std::vector<uint32_t>& cp)
|
||||
{
|
||||
std::ostringstream s;
|
||||
for(auto i : cp) {
|
||||
if(i < 0x80)
|
||||
s << (unsigned char)i;
|
||||
else if(i < 0x800)
|
||||
s << (unsigned char)(0xC0 + (i >> 6)) << (unsigned char)(0x80 + (i & 0x3F));
|
||||
else if(i < 0x10000)
|
||||
s << (unsigned char)(0xE0 + (i >> 12)) << (unsigned char)(0x80 + ((i >> 6) & 0x3F))
|
||||
<< (unsigned char)(0x80 + (i & 0x3F));
|
||||
else if(i < 0x10FFFF)
|
||||
s << (unsigned char)(0xF0 + (i >> 18)) << (unsigned char)(0x80 + ((i >> 12) & 0x3F))
|
||||
<< (unsigned char)(0x80 + ((i >> 6) & 0x3F))
|
||||
<< (unsigned char)(0x80 + (i & 0x3F));
|
||||
}
|
||||
return s.str();
|
||||
}
|
||||
|
||||
std::vector<uint32_t> frame_controls::string_to_vector(const std::string& str)
|
||||
{
|
||||
std::vector<uint32_t> cp;
|
||||
copy_from_utf8(str.begin(), str.end(), std::back_inserter(cp));
|
||||
return cp;
|
||||
}
|
||||
|
||||
void frame_controls::format_lines()
|
||||
{
|
||||
_width = 0;
|
||||
|
@ -267,8 +239,8 @@ void frame_controls::format_lines()
|
|||
if(i.position_left + i.reserved > _width)
|
||||
_width = i.position_left + i.reserved;
|
||||
}
|
||||
std::vector<uint32_t> cp1;
|
||||
std::vector<uint32_t> cp2;
|
||||
std::u32string cp1;
|
||||
std::u32string cp2;
|
||||
uint32_t off = divcnt + 1;
|
||||
cp1.resize(_width + divcnt + 1);
|
||||
cp2.resize(_width + divcnt + 1);
|
||||
|
@ -280,23 +252,23 @@ void frame_controls::format_lines()
|
|||
//For every port-controller, find the least coordinate.
|
||||
for(auto i : controlinfo) {
|
||||
if(i.type == -1) {
|
||||
auto _title = string_to_vector(i.title);
|
||||
auto _title = to_u32string(i.title);
|
||||
std::copy(_title.begin(), _title.end(), &cp1[i.position_left + off]);
|
||||
} else if(i.type == -2) {
|
||||
auto _title = string_to_vector((stringfmt() << i.port << "-" << i.controller).str());
|
||||
auto _title = to_u32string((stringfmt() << i.port << "-" << i.controller).str());
|
||||
std::copy(_title.begin(), _title.end(), &cp1[i.position_left + off]);
|
||||
}
|
||||
}
|
||||
//Line2
|
||||
for(auto i : controlinfo) {
|
||||
auto _title = string_to_vector(i.title);
|
||||
auto _title = to_u32string(i.title);
|
||||
if(i.type == -1 || i.type == 1)
|
||||
std::copy(_title.begin(), _title.end(), &cp2[i.position_left + off]);
|
||||
if(i.type == 0)
|
||||
cp2[i.position_left + off] = i.ch;
|
||||
}
|
||||
_line1 = vector_to_string(cp1);
|
||||
_line2 = vector_to_string(cp2);
|
||||
_line1 = to_u8string(cp1);
|
||||
_line2 = to_u8string(cp2);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue