Use std::u32string instead of std::vector<uint32_t> for UTF-32 strings

This also uncovered a bug in copy_from_utf8.
This commit is contained in:
Ilari Liusvaara 2013-03-25 00:29:48 +02:00
parent 3c2aefdc06
commit 92adf8519c
6 changed files with 68 additions and 98 deletions

View file

@ -9,22 +9,6 @@
#include <map>
#include "framebuffer.hpp"
class ligature_key
{
public:
ligature_key(const std::vector<uint32_t>& key) throw(std::bad_alloc);
const std::vector<uint32_t>& get() const throw() { return ikey; }
size_t length() const throw() { return ikey.size(); }
bool operator<(const ligature_key& key) const throw();
bool operator<=(const ligature_key& key) const throw() { return !(key < *this); }
bool operator==(const ligature_key& key) const throw();
bool operator!=(const ligature_key& key) const throw() { return !(key == *this); }
bool operator>=(const ligature_key& key) const throw() { return !(*this < key); }
bool operator>(const ligature_key& key) const throw() { return key < *this; }
private:
std::vector<uint32_t> ikey;
};
struct font_glyph_data
{
font_glyph_data();
@ -43,13 +27,13 @@ struct custom_font
public:
custom_font();
custom_font(const std::string& file);
void add(const ligature_key& key, const font_glyph_data& glyph) throw(std::bad_alloc);
ligature_key best_ligature_match(const std::vector<uint32_t>& codepoints, size_t start) const
void add(const std::u32string& key, const font_glyph_data& glyph) throw(std::bad_alloc);
std::u32string best_ligature_match(const std::u32string& codepoints, size_t start) const
throw(std::bad_alloc);
const font_glyph_data& lookup_glyph(const ligature_key& key) const throw();
const font_glyph_data& lookup_glyph(const std::u32string& key) const throw();
unsigned get_rowadvance() const throw() { return rowadvance; }
private:
std::map<ligature_key, font_glyph_data> glyphs;
std::map<std::u32string, font_glyph_data> glyphs;
unsigned rowadvance;
};

View file

@ -1,6 +1,7 @@
#ifndef _library__utf8__hpp__included__
#define _library__utf8__hpp__included__
#include <iostream>
#include <cstdint>
#include <cstdlib>
#include <string>
@ -27,6 +28,16 @@ int32_t utf8_parse_byte(int ch, uint16_t& state) throw();
*/
size_t utf8_strlen(const std::string& str) throw();
/**
* Transform UTF-8 into UTF-32.
*/
std::u32string to_u32string(const std::string& utf8);
/**
* Transform UTF-32 into UTF-8.
*/
std::string to_u8string(const std::u32string& utf32);
/**
* Iterator copy from UTF-8 to UTF-32
*/
@ -35,7 +46,7 @@ inline void copy_from_utf8(srcitr begin, srcitr end, dstitr target)
{
uint16_t state = utf8_initial_state;
for(srcitr i = begin; i != end; i++) {
int32_t x = utf8_parse_byte(*i, state);
int32_t x = utf8_parse_byte((unsigned char)*i, state);
if(x >= 0) {
*target = x;
++target;

View file

@ -4,29 +4,6 @@
#include "zip.hpp"
#include "string.hpp"
ligature_key::ligature_key(const std::vector<uint32_t>& key) throw(std::bad_alloc)
{
ikey = key;
}
bool ligature_key::operator<(const ligature_key& key) const throw()
{
for(size_t i = 0; i < ikey.size() && i < key.ikey.size(); i++)
if(ikey[i] < key.ikey[i])
return true;
else if(ikey[i] > key.ikey[i])
return false;
return (ikey.size() < key.ikey.size());
}
bool ligature_key::operator==(const ligature_key& key) const throw()
{
for(size_t i = 0; i < ikey.size() && i < key.ikey.size(); i++)
if(ikey[i] != key.ikey[i])
return false;
return (ikey.size() == key.ikey.size());
}
namespace
{
void bound(int32_t c, uint32_t odim, uint32_t dim, uint32_t& dc, uint32_t& off, uint32_t& size)
@ -182,7 +159,7 @@ custom_font::custom_font(const std::string& file)
zip_reader r(file);
for(auto member : r) {
//Parse the key out of filename.
std::vector<uint32_t> k;
std::u32string key;
std::string tname = member;
std::string tmp;
if(tname == "bad") {
@ -190,14 +167,13 @@ custom_font::custom_font(const std::string& file)
} else if(regex_match("[0-9]+(-[0-9]+)*", tname))
while(tname != "") {
extract_token(tname, tmp, "-");
k.push_back(parse_value<uint32_t>(tmp));
key.append(1, parse_value<uint32_t>(tmp));
}
else {
delete toclose;
toclose = NULL;
continue;
}
ligature_key key(k);
std::istream& s = r[member];
toclose = &s;
try {
@ -221,44 +197,44 @@ custom_font::custom_font(const std::string& file)
}
}
std::ostream& operator<<(std::ostream& os, const ligature_key& lkey)
std::ostream& operator<<(std::ostream& os, const std::u32string& lkey)
{
if(!lkey.length())
return (os << "bad");
for(size_t i = 0; i < lkey.length(); i++) {
if(i)
os << "-";
os << lkey.get()[i];
os << static_cast<uint32_t>(lkey[i]);
}
return os;
}
void custom_font::add(const ligature_key& key, const font_glyph_data& glyph) throw(std::bad_alloc)
void custom_font::add(const std::u32string& key, const font_glyph_data& glyph) throw(std::bad_alloc)
{
glyphs[key] = glyph;
if(glyph.height > rowadvance)
rowadvance = glyph.height;
}
ligature_key custom_font::best_ligature_match(const std::vector<uint32_t>& codepoints, size_t start) const
std::u32string custom_font::best_ligature_match(const std::u32string& codepoints, size_t start) const
throw(std::bad_alloc)
{
std::vector<uint32_t> tmp;
if(start >= codepoints.size())
return ligature_key(tmp); //Bad.
ligature_key best(tmp);
std::u32string tmp;
if(start >= codepoints.length())
return tmp; //Bad.
std::u32string best = tmp;
for(size_t i = 1; i <= codepoints.size() - start; i++) {
tmp.push_back(codepoints[start + i - 1]);
ligature_key lkey(tmp);
tmp.append(1, codepoints[start + i - 1]);
std::u32string lkey = tmp;
if(glyphs.count(lkey))
best = lkey;
auto j = glyphs.lower_bound(lkey);
//If lower_bound is greater than equivalent length of string, there can be no better match.
if(j == glyphs.end())
break;
const std::vector<uint32_t>& tmp2 = j->first.get();
const std::u32string& tmp2 = j->first;
bool best_found = false;
for(size_t k = 0; k < tmp2.size() && start + k < codepoints.size(); k++)
for(size_t k = 0; k < tmp2.length() && start + k < codepoints.length(); k++)
if(tmp2[k] > codepoints[start + k]) {
best_found = true;
break;
@ -270,7 +246,7 @@ ligature_key custom_font::best_ligature_match(const std::vector<uint32_t>& codep
return best;
}
const font_glyph_data& custom_font::lookup_glyph(const ligature_key& key) const throw()
const font_glyph_data& custom_font::lookup_glyph(const std::u32string& key) const throw()
{
static font_glyph_data empty_glyph;
auto i = glyphs.find(key);

View file

@ -1,3 +1,4 @@
#include <sstream>
#include "utf8.hpp"
namespace
@ -157,6 +158,33 @@ size_t utf8_strlen(const std::string& str) throw()
return r;
}
std::u32string to_u32string(const std::string& utf8)
{
std::u32string x;
x.resize(utf8_strlen(utf8));
copy_from_utf8(utf8.begin(), utf8.end(), x.begin());
return x;
}
std::string to_u8string(const std::u32string& utf32)
{
std::ostringstream s;
for(auto i : utf32) {
if(i < 0x80)
s << (unsigned char)i;
else if(i < 0x800)
s << (unsigned char)(0xC0 + (i >> 6)) << (unsigned char)(0x80 + (i & 0x3F));
else if(i < 0x10000)
s << (unsigned char)(0xE0 + (i >> 12)) << (unsigned char)(0x80 + ((i >> 6) & 0x3F))
<< (unsigned char)(0x80 + (i & 0x3F));
else if(i < 0x10FFFF)
s << (unsigned char)(0xF0 + (i >> 18)) << (unsigned char)(0x80 + ((i >> 12) & 0x3F))
<< (unsigned char)(0x80 + ((i >> 6) & 0x3F))
<< (unsigned char)(0x80 + (i & 0x3F));
}
return s.str();
}
#ifdef TEST_UTF8
#include <iostream>
char* format_dword(uint16_t s)

View file

@ -39,14 +39,13 @@ namespace
fg.set_palette(scr);
bg.set_palette(scr);
const custom_font& fdata = font->object()->get_font();
std::vector<uint32_t> _text;
copy_from_utf8(text.begin(), text.end(), std::back_inserter(_text));
std::u32string _text = to_u32string(text);
int32_t orig_x = x;
int32_t drawx = x;
int32_t drawy = y;
for(size_t i = 0; i < _text.size();) {
uint32_t cp = _text[i];
ligature_key k = fdata.best_ligature_match(_text, i);
std::u32string k = fdata.best_ligature_match(_text, i);
const font_glyph_data& glyph = fdata.lookup_glyph(k);
if(k.length())
i += k.length();

View file

@ -153,8 +153,6 @@ private:
std::string _line2;
void format_lines();
void add_port(unsigned& c, unsigned pid, const port_type& p, const port_type_set& pts);
std::string vector_to_string(const std::vector<uint32_t>& cp);
std::vector<uint32_t> string_to_vector(const std::string& str);
std::list<control_info> controlinfo;
};
@ -234,32 +232,6 @@ uint32_t frame_controls::read_pollcount(pollcounter_vector& v, unsigned idx)
return v.get_polls(idx);
}
std::string frame_controls::vector_to_string(const std::vector<uint32_t>& cp)
{
std::ostringstream s;
for(auto i : cp) {
if(i < 0x80)
s << (unsigned char)i;
else if(i < 0x800)
s << (unsigned char)(0xC0 + (i >> 6)) << (unsigned char)(0x80 + (i & 0x3F));
else if(i < 0x10000)
s << (unsigned char)(0xE0 + (i >> 12)) << (unsigned char)(0x80 + ((i >> 6) & 0x3F))
<< (unsigned char)(0x80 + (i & 0x3F));
else if(i < 0x10FFFF)
s << (unsigned char)(0xF0 + (i >> 18)) << (unsigned char)(0x80 + ((i >> 12) & 0x3F))
<< (unsigned char)(0x80 + ((i >> 6) & 0x3F))
<< (unsigned char)(0x80 + (i & 0x3F));
}
return s.str();
}
std::vector<uint32_t> frame_controls::string_to_vector(const std::string& str)
{
std::vector<uint32_t> cp;
copy_from_utf8(str.begin(), str.end(), std::back_inserter(cp));
return cp;
}
void frame_controls::format_lines()
{
_width = 0;
@ -267,8 +239,8 @@ void frame_controls::format_lines()
if(i.position_left + i.reserved > _width)
_width = i.position_left + i.reserved;
}
std::vector<uint32_t> cp1;
std::vector<uint32_t> cp2;
std::u32string cp1;
std::u32string cp2;
uint32_t off = divcnt + 1;
cp1.resize(_width + divcnt + 1);
cp2.resize(_width + divcnt + 1);
@ -280,23 +252,23 @@ void frame_controls::format_lines()
//For every port-controller, find the least coordinate.
for(auto i : controlinfo) {
if(i.type == -1) {
auto _title = string_to_vector(i.title);
auto _title = to_u32string(i.title);
std::copy(_title.begin(), _title.end(), &cp1[i.position_left + off]);
} else if(i.type == -2) {
auto _title = string_to_vector((stringfmt() << i.port << "-" << i.controller).str());
auto _title = to_u32string((stringfmt() << i.port << "-" << i.controller).str());
std::copy(_title.begin(), _title.end(), &cp1[i.position_left + off]);
}
}
//Line2
for(auto i : controlinfo) {
auto _title = string_to_vector(i.title);
auto _title = to_u32string(i.title);
if(i.type == -1 || i.type == 1)
std::copy(_title.begin(), _title.end(), &cp2[i.position_left + off]);
if(i.type == 0)
cp2[i.position_left + off] = i.ch;
}
_line1 = vector_to_string(cp1);
_line2 = vector_to_string(cp2);
_line1 = to_u8string(cp1);
_line2 = to_u8string(cp2);
}