Refactor library UTF8 to dedicated namespace

This commit is contained in:
Ilari Liusvaara 2013-12-20 12:39:24 +02:00
parent 299a4c0e3c
commit 78cade7daa
16 changed files with 94 additions and 121 deletions

View file

@ -188,16 +188,16 @@ public:
pointer& pastend_inplace() throw(std::bad_alloc) { return field_inplace(U"-"); } pointer& pastend_inplace() throw(std::bad_alloc) { return field_inplace(U"-"); }
pointer index(uint64_t idx) const throw(std::bad_alloc); pointer index(uint64_t idx) const throw(std::bad_alloc);
pointer& index_inplace(uint64_t idx) throw(std::bad_alloc); pointer& index_inplace(uint64_t idx) throw(std::bad_alloc);
pointer field(const std::string& fld) const throw(std::bad_alloc) { return field(to_u32string(fld)); } pointer field(const std::string& fld) const throw(std::bad_alloc) { return field(utf8::to32(fld)); }
pointer& field_inplace(const std::string& fld) throw(std::bad_alloc) pointer& field_inplace(const std::string& fld) throw(std::bad_alloc)
{ {
return field_inplace(to_u32string(fld)); return field_inplace(utf8::to32(fld));
} }
pointer field(const std::u32string& fld) const throw(std::bad_alloc); pointer field(const std::u32string& fld) const throw(std::bad_alloc);
pointer& field_inplace(const std::u32string& fld) throw(std::bad_alloc); pointer& field_inplace(const std::u32string& fld) throw(std::bad_alloc);
pointer remove() const throw(std::bad_alloc); pointer remove() const throw(std::bad_alloc);
pointer& remove_inplace() throw(std::bad_alloc); pointer& remove_inplace() throw(std::bad_alloc);
std::string as_string8() const { return to_u8string(_pointer); } std::string as_string8() const { return utf8::to8(_pointer); }
std::u32string as_string() const { return _pointer; } std::u32string as_string() const { return _pointer; }
friend std::ostream& operator<<(std::ostream& s, const pointer& p); friend std::ostream& operator<<(std::ostream& s, const pointer& p);
friend std::basic_ostream<char32_t>& operator<<(std::basic_ostream<char32_t>& s, const pointer& p); friend std::basic_ostream<char32_t>& operator<<(std::basic_ostream<char32_t>& s, const pointer& p);
@ -258,7 +258,7 @@ public:
int type_of(const std::u32string& pointer) const throw(std::bad_alloc); int type_of(const std::u32string& pointer) const throw(std::bad_alloc);
int type_of(const std::string& pointer) const throw(std::bad_alloc) int type_of(const std::string& pointer) const throw(std::bad_alloc)
{ {
return type_of(to_u32string(pointer)); return type_of(utf8::to32(pointer));
} }
int type_of(const pointer& ptr) const throw(std::bad_alloc) int type_of(const pointer& ptr) const throw(std::bad_alloc)
{ {
@ -270,7 +270,7 @@ public:
int type_of_indirect(const std::u32string& pointer) const throw(std::bad_alloc); int type_of_indirect(const std::u32string& pointer) const throw(std::bad_alloc);
int type_of_indirect(const std::string& pointer) const throw(std::bad_alloc) int type_of_indirect(const std::string& pointer) const throw(std::bad_alloc)
{ {
return type_of_indirect(to_u32string(pointer)); return type_of_indirect(utf8::to32(pointer));
} }
int type_of_indirect(const pointer& ptr) const throw(std::bad_alloc) int type_of_indirect(const pointer& ptr) const throw(std::bad_alloc)
{ {
@ -282,7 +282,7 @@ public:
std::u32string resolve_indirect(const std::u32string& pointer) const throw(std::bad_alloc); std::u32string resolve_indirect(const std::u32string& pointer) const throw(std::bad_alloc);
std::string resolve_indirect(const std::string& pointer) const throw(std::bad_alloc) std::string resolve_indirect(const std::string& pointer) const throw(std::bad_alloc)
{ {
return to_u8string(resolve_indirect(to_u32string(pointer))); return utf8::to8(resolve_indirect(utf8::to32(pointer)));
} }
pointer resolve_indirect(const pointer& ptr) const throw(std::bad_alloc) pointer resolve_indirect(const pointer& ptr) const throw(std::bad_alloc)
{ {
@ -304,7 +304,7 @@ public:
* Read the string as UTF-8 (NT_STRING). * Read the string as UTF-8 (NT_STRING).
*/ */
const std::u32string& as_string() const throw(std::bad_alloc, error); const std::u32string& as_string() const throw(std::bad_alloc, error);
std::string as_string8() const throw(std::bad_alloc, error) { return to_u8string(as_string()); } std::string as_string8() const throw(std::bad_alloc, error) { return utf8::to8(as_string()); }
/** /**
* Get boolean value (NT_BOOLEAN). * Get boolean value (NT_BOOLEAN).
*/ */
@ -329,7 +329,7 @@ public:
size_t field_count(const std::u32string& key) const throw(error); size_t field_count(const std::u32string& key) const throw(error);
size_t field_count(const std::string& key) const throw(std::bad_alloc, error) size_t field_count(const std::string& key) const throw(std::bad_alloc, error)
{ {
return field_count(to_u32string(key)); return field_count(utf8::to32(key));
} }
/** /**
* Specified field exists (NT_OBJECT) * Specified field exists (NT_OBJECT)
@ -337,7 +337,7 @@ public:
bool field_exists(const std::u32string& key) const throw(error); bool field_exists(const std::u32string& key) const throw(error);
bool field_exists(const std::string& key) const throw(std::bad_alloc, error) bool field_exists(const std::string& key) const throw(std::bad_alloc, error)
{ {
return field_exists(to_u32string(key)); return field_exists(utf8::to32(key));
} }
/** /**
* Read specified key from object (NT_OBJECT). * Read specified key from object (NT_OBJECT).
@ -351,7 +351,7 @@ public:
} }
const node& field(const std::string& key, size_t subindex = 0) const throw(std::bad_alloc, error) const node& field(const std::string& key, size_t subindex = 0) const throw(std::bad_alloc, error)
{ {
return field(to_u32string(key), subindex); return field(utf8::to32(key), subindex);
} }
/** /**
@ -366,7 +366,7 @@ public:
} }
const node& follow(const std::string& pointer) const throw(std::bad_alloc, error) const node& follow(const std::string& pointer) const throw(std::bad_alloc, error)
{ {
return follow(to_u32string(pointer)); return follow(utf8::to32(pointer));
} }
const node& follow(const pointer& ptr) const throw(std::bad_alloc, error) const node& follow(const pointer& ptr) const throw(std::bad_alloc, error)
{ {
@ -381,7 +381,7 @@ public:
} }
const node& follow_indirect(const std::string& pointer) const throw(std::bad_alloc, error) const node& follow_indirect(const std::string& pointer) const throw(std::bad_alloc, error)
{ {
return follow_indirect(to_u32string(pointer)); return follow_indirect(utf8::to32(pointer));
} }
const node& follow_indirect(const pointer& ptr) const throw(std::bad_alloc, error) const node& follow_indirect(const pointer& ptr) const throw(std::bad_alloc, error)
{ {
@ -402,7 +402,7 @@ public:
node& set(string_tag, const std::u32string& key) throw(std::bad_alloc); node& set(string_tag, const std::u32string& key) throw(std::bad_alloc);
node& set(string_tag tag, const std::string& key) throw(std::bad_alloc) node& set(string_tag tag, const std::string& key) throw(std::bad_alloc)
{ {
return set(tag, to_u32string(key)); return set(tag, utf8::to32(key));
} }
/** /**
* Read/Write specified index from array (NT_ARRAY). * Read/Write specified index from array (NT_ARRAY).
@ -430,7 +430,7 @@ public:
} }
node& field(const std::string& key, size_t subindex = 0) throw(std::bad_alloc, error) node& field(const std::string& key, size_t subindex = 0) throw(std::bad_alloc, error)
{ {
return field(to_u32string(key), subindex); return field(utf8::to32(key), subindex);
} }
/** /**
* Insert new element to object (NT_OBJECT). * Insert new element to object (NT_OBJECT).
@ -438,7 +438,7 @@ public:
node& insert(const std::u32string& key, const node& node) throw(std::bad_alloc, error); node& insert(const std::u32string& key, const node& node) throw(std::bad_alloc, error);
node& insert(const std::string& key, const node& node) throw(std::bad_alloc, error) node& insert(const std::string& key, const node& node) throw(std::bad_alloc, error)
{ {
return insert(to_u32string(key), node); return insert(utf8::to32(key), node);
} }
/** /**
* Apply JSON pointer (RFC 6901). * Apply JSON pointer (RFC 6901).
@ -452,7 +452,7 @@ public:
} }
node& follow(const std::string& pointer) throw(std::bad_alloc, error) node& follow(const std::string& pointer) throw(std::bad_alloc, error)
{ {
return follow(to_u32string(pointer)); return follow(utf8::to32(pointer));
} }
node& follow(const pointer& ptr) throw(std::bad_alloc, error) node& follow(const pointer& ptr) throw(std::bad_alloc, error)
{ {
@ -467,7 +467,7 @@ public:
} }
node& follow_indirect(const std::string& pointer) throw(std::bad_alloc, error) node& follow_indirect(const std::string& pointer) throw(std::bad_alloc, error)
{ {
return follow_indirect(to_u32string(pointer)); return follow_indirect(utf8::to32(pointer));
} }
node& follow_indirect(const pointer& ptr) throw(std::bad_alloc, error) node& follow_indirect(const pointer& ptr) throw(std::bad_alloc, error)
{ {
@ -479,7 +479,7 @@ public:
node& operator[](const std::u32string& pointer) throw(std::bad_alloc, error); node& operator[](const std::u32string& pointer) throw(std::bad_alloc, error);
node& operator[](const std::string& pointer) throw(std::bad_alloc, error) node& operator[](const std::string& pointer) throw(std::bad_alloc, error)
{ {
return (*this)[to_u32string(pointer)]; return (*this)[utf8::to32(pointer)];
} }
node& operator[](const pointer& ptr) throw(std::bad_alloc, error) node& operator[](const pointer& ptr) throw(std::bad_alloc, error)
{ {
@ -491,7 +491,7 @@ public:
node& insert_node(const std::u32string& pointer, const node& nwn) throw(std::bad_alloc, error); node& insert_node(const std::u32string& pointer, const node& nwn) throw(std::bad_alloc, error);
node& insert_node(const std::string& pointer, const node& nwn) throw(std::bad_alloc, error) node& insert_node(const std::string& pointer, const node& nwn) throw(std::bad_alloc, error)
{ {
return insert_node(to_u32string(pointer), nwn); return insert_node(utf8::to32(pointer), nwn);
} }
node& insert_node(const pointer& ptr, const node& nwn) throw(std::bad_alloc, error) node& insert_node(const pointer& ptr, const node& nwn) throw(std::bad_alloc, error)
{ {
@ -503,7 +503,7 @@ public:
node delete_node(const std::u32string& pointer) throw(std::bad_alloc, error); node delete_node(const std::u32string& pointer) throw(std::bad_alloc, error);
node delete_node(const std::string& pointer) throw(std::bad_alloc, error) node delete_node(const std::string& pointer) throw(std::bad_alloc, error)
{ {
return delete_node(to_u32string(pointer)); return delete_node(utf8::to32(pointer));
} }
node delete_node(const pointer& ptr) throw(std::bad_alloc, error) node delete_node(const pointer& ptr) throw(std::bad_alloc, error)
{ {
@ -534,7 +534,7 @@ public:
void erase_field(const std::u32string& fld, size_t idx = 0) throw(error); void erase_field(const std::u32string& fld, size_t idx = 0) throw(error);
void erase_field(const std::string& fld, size_t idx = 0) throw(std::bad_alloc, error) void erase_field(const std::string& fld, size_t idx = 0) throw(std::bad_alloc, error)
{ {
erase_field(to_u32string(fld), idx); erase_field(utf8::to32(fld), idx);
} }
/** /**
* Delete an entiere array field. * Delete an entiere array field.
@ -542,7 +542,7 @@ public:
void erase_field_all(const std::u32string& fld) throw(error); void erase_field_all(const std::u32string& fld) throw(error);
void erase_field_all(const std::string& fld) throw(std::bad_alloc, error) void erase_field_all(const std::string& fld) throw(std::bad_alloc, error)
{ {
erase_field_all(to_u32string(fld)); erase_field_all(utf8::to32(fld));
} }
/** /**
* Apply a JSON patch. * Apply a JSON patch.
@ -566,7 +566,7 @@ public:
iterator() throw(); iterator() throw();
iterator(node& n) throw(error); iterator(node& n) throw(error);
std::u32string key() throw(std::bad_alloc, error); std::u32string key() throw(std::bad_alloc, error);
std::string key8() throw(std::bad_alloc, error) { return to_u8string(key()); } std::string key8() throw(std::bad_alloc, error) { return utf8::to8(key()); }
size_t index() throw(error); size_t index() throw(error);
node& operator*() throw(error); node& operator*() throw(error);
node* operator->() throw(error); node* operator->() throw(error);
@ -594,7 +594,7 @@ public:
const_iterator() throw(); const_iterator() throw();
const_iterator(const node& n) throw(error); const_iterator(const node& n) throw(error);
std::u32string key() throw(std::bad_alloc, error); std::u32string key() throw(std::bad_alloc, error);
std::string key8() throw(std::bad_alloc, error) { return to_u8string(key()); } std::string key8() throw(std::bad_alloc, error) { return utf8::to8(key()); }
size_t index() throw(error); size_t index() throw(error);
const node& operator*() throw(error); const node& operator*() throw(error);
const node* operator->() throw(error); const node* operator->() throw(error);

View file

@ -522,7 +522,7 @@ public:
const char* tolstring(int index, size_t& len) { return lua_tolstring(lua_handle, index, &len); } const char* tolstring(int index, size_t& len) { return lua_tolstring(lua_handle, index, &len); }
void pushlstring(const char* s, size_t len) { lua_pushlstring(lua_handle, s, len); } void pushlstring(const char* s, size_t len) { lua_pushlstring(lua_handle, s, len); }
void pushlstring(const std::string& s) { lua_pushlstring(lua_handle, s.c_str(), s.length()); } void pushlstring(const std::string& s) { lua_pushlstring(lua_handle, s.c_str(), s.length()); }
void pushlstring(const char32_t* s, size_t len) { pushlstring(to_u8string(std::u32string(s, len))); } void pushlstring(const char32_t* s, size_t len) { pushlstring(utf8::to8(std::u32string(s, len))); }
int pcall(int nargs, int nresults, int errfunc) { return lua_pcall(lua_handle, nargs, nresults, errfunc); } int pcall(int nargs, int nresults, int errfunc) { return lua_pcall(lua_handle, nargs, nresults, errfunc); }
int next(int index) { return lua_next(lua_handle, index); } int next(int index) { return lua_next(lua_handle, index); }
int isnoneornil(int index) { return lua_isnoneornil(lua_handle, index); } int isnoneornil(int index) { return lua_isnoneornil(lua_handle, index); }

View file

@ -33,7 +33,7 @@ class stringfmt
public: public:
stringfmt() {} stringfmt() {}
std::string str() { return x.str(); } std::string str() { return x.str(); }
std::u32string str32() { return to_u32string(x.str()); } std::u32string str32() { return utf8::to32(x.str()); }
template<typename T> stringfmt& operator<<(const T& y) { x << y; return *this; } template<typename T> stringfmt& operator<<(const T& y) { x << y; return *this; }
void throwex() { throw std::runtime_error(x.str()); } void throwex() { throw std::runtime_error(x.str()); }
private: private:

View file

@ -7,10 +7,12 @@
#include <string> #include <string>
#include <functional> #include <functional>
namespace utf8
{
/** /**
* Initial state for UTF-8 parser. * Initial state for UTF-8 parser.
*/ */
extern const uint16_t utf8_initial_state; extern const uint16_t initial_state;
/** /**
* Parse a byte. * Parse a byte.
* *
@ -20,38 +22,38 @@ extern const uint16_t utf8_initial_state;
* *
* Note: When called with EOF, max 1 codepoint can be emitted. * Note: When called with EOF, max 1 codepoint can be emitted.
*/ */
int32_t utf8_parse_byte(int ch, uint16_t& state) throw(); int32_t parse_byte(int ch, uint16_t& state) throw();
/** /**
* Return length of string in UTF-8 codepoints. * Return length of string in UTF-8 codepoints.
* *
* Parameter str: The string. * Parameter str: The string.
* Returns: The length in codepoints. * Returns: The length in codepoints.
*/ */
size_t utf8_strlen(const std::string& str) throw(); size_t strlen(const std::string& str) throw();
/** /**
* Transform UTF-8 into UTF-32. * Transform UTF-8 into UTF-32.
*/ */
std::u32string to_u32string(const std::string& utf8); std::u32string to32(const std::string& utf8);
/** /**
* Transform UTF-32 into UTF-8. * Transform UTF-32 into UTF-8.
*/ */
std::string to_u8string(const std::u32string& utf32); std::string to8(const std::u32string& utf32);
/** /**
* Iterator to function copy from UTF-8 to UTF-32 * Iterator to function copy from UTF-8 to UTF-32
*/ */
template<typename srcitr> template<typename srcitr>
inline void copy_from_utf8_2(srcitr begin, srcitr end, std::function<void(int32_t)> target) inline void to32i2(srcitr begin, srcitr end, std::function<void(int32_t)> target)
{ {
uint16_t state = utf8_initial_state; uint16_t state = initial_state;
for(srcitr i = begin; i != end; i++) { for(srcitr i = begin; i != end; i++) {
int32_t x = utf8_parse_byte((unsigned char)*i, state); int32_t x = parse_byte((unsigned char)*i, state);
if(x >= 0) if(x >= 0)
target(x); target(x);
} }
int32_t x = utf8_parse_byte(-1, state); int32_t x = parse_byte(-1, state);
if(x >= 0) if(x >= 0)
target(x); target(x);
} }
@ -60,10 +62,10 @@ inline void copy_from_utf8_2(srcitr begin, srcitr end, std::function<void(int32_
* Iterator copy from UTF-8 to UTF-32 * Iterator copy from UTF-8 to UTF-32
*/ */
template<typename srcitr, typename dstitr> template<typename srcitr, typename dstitr>
inline void copy_from_utf8(srcitr begin, srcitr end, dstitr target) inline void to32i(srcitr begin, srcitr end, dstitr target)
{ {
copy_from_utf8_2(begin, end, [&target](int32_t x) { *target = x; ++target; }); to32i2(begin, end, [&target](int32_t x) { *target = x; ++target; });
} }
}
#endif #endif

View file

@ -72,23 +72,11 @@ void cover_render_character(void* fb, unsigned x, unsigned y, uint32_t ch, uint3
void cover_render_string(void* fb, unsigned x, unsigned y, const std::string& str, uint32_t fg, uint32_t bg, void cover_render_string(void* fb, unsigned x, unsigned y, const std::string& str, uint32_t fg, uint32_t bg,
size_t w, size_t h, size_t istride, size_t pstride) size_t w, size_t h, size_t istride, size_t pstride)
{ {
size_t spos = 0; utf8::to32i2(str.begin(), str.end(), [fb, &x, &y, fg, bg, w, h, istride, pstride](int32_t u) {
size_t slen = str.length();
uint16_t state = utf8_initial_state;
while(true) {
int ch = (spos < slen) ? (unsigned char)str[spos] : - 1;
int32_t u = utf8_parse_byte(ch, state);
if(u < 0) {
if(ch < 0)
break;
spos++;
continue;
}
if(u != 9 && u != 10) if(u != 9 && u != 10)
cover_render_character(fb, x, y, u, fg, bg, w, h, istride, pstride); cover_render_character(fb, x, y, u, fg, bg, w, h, istride, pstride);
cover_next_position(u, x, y); cover_next_position(u, x, y);
spos++; });
}
} }
void cover_next_position(uint32_t ch, unsigned& x, unsigned& y) void cover_next_position(uint32_t ch, unsigned& x, unsigned& y)
@ -106,21 +94,9 @@ void cover_next_position(uint32_t ch, unsigned& x, unsigned& y)
void cover_next_position(const std::string& str, unsigned& x, unsigned& y) void cover_next_position(const std::string& str, unsigned& x, unsigned& y)
{ {
size_t spos = 0; utf8::to32i2(str.begin(), str.end(), [&x, &y](int32_t u) {
size_t slen = str.length();
uint16_t state = utf8_initial_state;
while(true) {
int ch = (spos < slen) ? (unsigned char)str[spos] : - 1;
int32_t u = utf8_parse_byte(ch, state);
if(u < 0) {
if(ch < 0)
break;
spos++;
continue;
}
cover_next_position(u, x, y); cover_next_position(u, x, y);
spos++; });
}
} }
std::vector<std::string> cover_information() std::vector<std::string> cover_information()

View file

@ -77,16 +77,16 @@ namespace
ret.type = port_controller_button::TYPE_BUTTON; ret.type = port_controller_button::TYPE_BUTTON;
ret.name = read_str(root, pname); ret.name = read_str(root, pname);
std::u32string symbol = (root.type_of(psymbol) != JSON::none) ? read_str32(root, psymbol) : std::u32string symbol = (root.type_of(psymbol) != JSON::none) ? read_str32(root, psymbol) :
to_u32string(ret.name); utf8::to32(ret.name);
if(symbol.length() != 1) if(symbol.length() != 1)
(stringfmt() << "Symbol at '" << ptr << "' must be 1 codepoint").throwex(); (stringfmt() << "Symbol at '" << ptr << "' must be 1 codepoint").throwex();
ret.symbol = symbol[0]; ret.symbol = symbol[0];
ret.rmin = 0; ret.rmin = 0;
ret.rmax = 0; ret.rmax = 0;
ret.centers = false; ret.centers = false;
ret.macro = (root.type_of(pmacro) != JSON::none) ? read_str(root, pmacro) : to_u8string(symbol); ret.macro = (root.type_of(pmacro) != JSON::none) ? read_str(root, pmacro) : utf8::to8(symbol);
std::string movie = (root.type_of(pmovie) != JSON::none) ? read_str(root, pmovie) : std::string movie = (root.type_of(pmovie) != JSON::none) ? read_str(root, pmovie) :
to_u8string(symbol); utf8::to8(symbol);
if(movie.length() != 1) if(movie.length() != 1)
(stringfmt() << "Movie at '" << ptr << "' must be 1 character").throwex(); (stringfmt() << "Movie at '" << ptr << "' must be 1 character").throwex();
ret.msymbol = movie[0]; ret.msymbol = movie[0];

View file

@ -11,7 +11,7 @@ emulator_status::~emulator_status() throw()
void emulator_status::set(const std::string& key, const std::string& value) throw(std::bad_alloc) void emulator_status::set(const std::string& key, const std::string& value) throw(std::bad_alloc)
{ {
umutex_class h(lock); umutex_class h(lock);
content[key] = to_u32string(value); content[key] = utf8::to32(value);
} }
void emulator_status::set(const std::string& key, const std::u32string& value) throw(std::bad_alloc) void emulator_status::set(const std::string& key, const std::u32string& value) throw(std::bad_alloc)

View file

@ -731,12 +731,12 @@ std::pair<size_t, size_t> font::get_metrics(const std::string& string) throw()
int32_t lineminy = 0; int32_t lineminy = 0;
int32_t linemaxy = 0; int32_t linemaxy = 0;
size_t linelength = 0; size_t linelength = 0;
uint16_t utfstate = utf8_initial_state; uint16_t utfstate = utf8::initial_state;
size_t itr = 0; size_t itr = 0;
size_t maxitr = string.length(); size_t maxitr = string.length();
while(true) { while(true) {
int ch = (itr < maxitr) ? static_cast<unsigned char>(string[itr++]) : -1; int ch = (itr < maxitr) ? static_cast<unsigned char>(string[itr++]) : -1;
int32_t cp = utf8_parse_byte(ch, utfstate); int32_t cp = utf8::parse_byte(ch, utfstate);
if(cp < 0 && ch < 0) { if(cp < 0 && ch < 0) {
//The end. //The end.
commit_width = (commit_width < linelength) ? linelength : commit_width; commit_width = (commit_width < linelength) ? linelength : commit_width;
@ -765,18 +765,14 @@ std::pair<size_t, size_t> font::get_metrics(const std::string& string) throw()
std::vector<font::layout> font::dolayout(const std::string& string) throw(std::bad_alloc) std::vector<font::layout> font::dolayout(const std::string& string) throw(std::bad_alloc)
{ {
//First, calculate the number of glyphs to draw. //First, calculate the number of glyphs to draw.
uint16_t utfstate = utf8_initial_state; uint16_t utfstate = utf8::initial_state;
size_t itr = 0; size_t itr = 0;
size_t maxitr = string.length(); size_t maxitr = string.length();
size_t chars = 0; size_t chars = 0;
while(true) { utf8::to32i2(string.begin(), string.end(), [&chars](int32_t cp) {
int ch = (itr < maxitr) ? static_cast<unsigned char>(string[itr++]) : -1;
int32_t cp = utf8_parse_byte(ch, utfstate);
if(cp < 0 && ch < 0)
break;
if(cp != 9 && cp != 10) if(cp != 9 && cp != 10)
chars++; chars++;
} });
//Allocate space. //Allocate space.
std::vector<layout> l; std::vector<layout> l;
l.resize(chars); l.resize(chars);
@ -784,10 +780,10 @@ std::vector<font::layout> font::dolayout(const std::string& string) throw(std::b
size_t gtr = 0; size_t gtr = 0;
size_t layout_x = 0; size_t layout_x = 0;
size_t layout_y = 0; size_t layout_y = 0;
utfstate = utf8_initial_state; utfstate = utf8::initial_state;
while(true) { while(true) {
int ch = (itr < maxitr) ? static_cast<unsigned char>(string[itr++]) : -1; int ch = (itr < maxitr) ? static_cast<unsigned char>(string[itr++]) : -1;
int32_t cp = utf8_parse_byte(ch, utfstate); int32_t cp = utf8::parse_byte(ch, utfstate);
if(cp < 0 && ch < 0) if(cp < 0 && ch < 0)
break; break;
const glyph& g = get_glyph(cp); const glyph& g = get_glyph(cp);
@ -814,18 +810,14 @@ template<bool X> void font::render(struct fb<X>& scr, int32_t x, int32_t y, cons
{ {
x += scr.get_origin_x(); x += scr.get_origin_x();
y += scr.get_origin_y(); y += scr.get_origin_y();
uint16_t utfstate = utf8_initial_state;
size_t itr = 0; size_t itr = 0;
size_t maxitr = text.length(); size_t maxitr = text.length();
size_t layout_x = 0; size_t layout_x = 0;
size_t layout_y = 0; size_t layout_y = 0;
size_t swidth = scr.get_width(); size_t swidth = scr.get_width();
size_t sheight = scr.get_height(); size_t sheight = scr.get_height();
while(true) { utf8::to32i2(text.begin(), text.end(), [this, x, y, &scr, &layout_x, &layout_y, swidth, sheight, hdbl, vdbl,
int ch = (itr < maxitr) ? static_cast<unsigned char>(text[itr++]) : -1; &fg, &bg](int32_t cp) {
int32_t cp = utf8_parse_byte(ch, utfstate);
if(cp < 0 && ch < 0)
break;
const glyph& g = get_glyph(cp); const glyph& g = get_glyph(cp);
switch(cp) { switch(cp) {
case 9: case 9:
@ -891,7 +883,7 @@ template<bool X> void font::render(struct fb<X>& scr, int32_t x, int32_t y, cons
} }
layout_x += (hdbl ? 2 : 1) * (g.wide ? 16 : 8); layout_x += (hdbl ? 2 : 1) * (g.wide ? 16 : 8);
} }
} });
} }
void color::set_palette(unsigned rshift, unsigned gshift, unsigned bshift, bool X) throw() void color::set_palette(unsigned rshift, unsigned gshift, unsigned bshift, bool X) throw()

View file

@ -314,7 +314,7 @@ node::node() throw() : node(null) {}
node::node(null_tag) throw() { vtype = null; } node::node(null_tag) throw() { vtype = null; }
node::node(boolean_tag, bool b) throw() { vtype = boolean; _boolean = b; } node::node(boolean_tag, bool b) throw() { vtype = boolean; _boolean = b; }
node::node(string_tag, const std::u32string& str) throw(std::bad_alloc) { vtype = string; _string = str; } node::node(string_tag, const std::u32string& str) throw(std::bad_alloc) { vtype = string; _string = str; }
node::node(string_tag, const std::string& str) throw(std::bad_alloc) { vtype = string; _string = to_u32string(str); } node::node(string_tag, const std::string& str) throw(std::bad_alloc) { vtype = string; _string = utf8::to32(str); }
node::node(number_tag, double n) throw() { vtype = number; _number.from<double>(n); } node::node(number_tag, double n) throw() { vtype = number; _number.from<double>(n); }
node::node(number_tag, int64_t n) throw() { vtype = number; _number.from<int64_t>(n); } node::node(number_tag, int64_t n) throw() { vtype = number; _number.from<int64_t>(n); }
node::node(number_tag, uint64_t n) throw() { vtype = number; _number.from<uint64_t>(n); } node::node(number_tag, uint64_t n) throw() { vtype = number; _number.from<uint64_t>(n); }
@ -666,7 +666,7 @@ namespace
template<typename T> size_t read_string_impl(T target, const std::string& doc, size_t ptr, size_t len) template<typename T> size_t read_string_impl(T target, const std::string& doc, size_t ptr, size_t len)
{ {
uint16_t ustate = utf8_initial_state; uint16_t ustate = utf8::initial_state;
int estate = 0; int estate = 0;
uint32_t extra = 0; uint32_t extra = 0;
uint32_t tmp; uint32_t tmp;
@ -676,7 +676,7 @@ namespace
int ch = -1; int ch = -1;
if(i < len) if(i < len)
ch = (unsigned char)doc[i]; ch = (unsigned char)doc[i];
int32_t uch = utf8_parse_byte(ch, ustate); int32_t uch = utf8::parse_byte(ch, ustate);
if(uch < 0) if(uch < 0)
continue; continue;
//Okay, have Unicode codepoint decoded. //Okay, have Unicode codepoint decoded.
@ -1608,7 +1608,7 @@ pointer::pointer()
pointer::pointer(const std::string& ptr) throw(std::bad_alloc) pointer::pointer(const std::string& ptr) throw(std::bad_alloc)
{ {
_pointer = to_u32string(ptr); _pointer = utf8::to32(ptr);
} }
pointer::pointer(const std::u32string& ptr) throw(std::bad_alloc) pointer::pointer(const std::u32string& ptr) throw(std::bad_alloc)
@ -1672,7 +1672,7 @@ pointer& pointer::remove_inplace() throw(std::bad_alloc)
std::ostream& operator<<(std::ostream& s, const pointer& p) std::ostream& operator<<(std::ostream& s, const pointer& p)
{ {
return s << to_u8string(p._pointer); return s << utf8::to8(p._pointer);
} }
std::basic_ostream<char32_t>& operator<<(std::basic_ostream<char32_t>& s, const pointer& p) std::basic_ostream<char32_t>& operator<<(std::basic_ostream<char32_t>& s, const pointer& p)

View file

@ -259,7 +259,7 @@ namespace
template<typename T> std::basic_string<T> separator(); template<typename T> std::basic_string<T> separator();
template<> std::basic_string<char> separator() template<> std::basic_string<char> separator()
{ {
return to_u8string(U"\u2023"); return utf8::to8(U"\u2023");
} }
template<> std::basic_string<char16_t> separator() template<> std::basic_string<char16_t> separator()
@ -298,7 +298,7 @@ template class string_list<char32_t>;
string_list<char> split_on_codepoint(const std::string& s, char32_t cp) string_list<char> split_on_codepoint(const std::string& s, char32_t cp)
{ {
std::string _cp = to_u8string(std::u32string(1, cp)); std::string _cp = utf8::to8(std::u32string(1, cp));
return _split_on_codepoint<char>(s, _cp); return _split_on_codepoint<char>(s, _cp);
} }

View file

@ -1,6 +1,8 @@
#include <sstream> #include <sstream>
#include "utf8.hpp" #include "utf8.hpp"
namespace utf8
{
namespace namespace
{ {
//First nibble values: //First nibble values:
@ -40,9 +42,9 @@ namespace
}; };
} }
extern const uint16_t utf8_initial_state = 0; extern const uint16_t initial_state = 0;
int32_t utf8_parse_byte(int ch, uint16_t& state) throw() int32_t parse_byte(int ch, uint16_t& state) throw()
{ {
unsigned char mch = (ch < 248) ? ch : 248; unsigned char mch = (ch < 248) ? ch : 248;
uint32_t astate = state >> 12; uint32_t astate = state >> 12;
@ -146,27 +148,27 @@ int32_t utf8_parse_byte(int ch, uint16_t& state) throw()
return -1; return -1;
} }
size_t utf8_strlen(const std::string& str) throw() size_t strlen(const std::string& str) throw()
{ {
uint16_t s = utf8_initial_state; uint16_t s = initial_state;
size_t r = 0; size_t r = 0;
for(size_t i = 0; i < str.length(); i++) for(size_t i = 0; i < str.length(); i++)
if(utf8_parse_byte(static_cast<uint8_t>(str[i]), s) >= 0) if(parse_byte(static_cast<uint8_t>(str[i]), s) >= 0)
r++; r++;
if(utf8_parse_byte(-1, s) >= 0) if(parse_byte(-1, s) >= 0)
r++; r++;
return r; return r;
} }
std::u32string to_u32string(const std::string& utf8) std::u32string to32(const std::string& utf8)
{ {
std::u32string x; std::u32string x;
x.resize(utf8_strlen(utf8)); x.resize(strlen(utf8));
copy_from_utf8(utf8.begin(), utf8.end(), x.begin()); to32i(utf8.begin(), utf8.end(), x.begin());
return x; return x;
} }
std::string to_u8string(const std::u32string& utf32) std::string to8(const std::u32string& utf32)
{ {
std::ostringstream s; std::ostringstream s;
for(auto i : utf32) { for(auto i : utf32) {
@ -184,6 +186,7 @@ std::string to_u8string(const std::u32string& utf32)
} }
return s.str(); return s.str();
} }
}
#ifdef TEST_UTF8 #ifdef TEST_UTF8
#include <iostream> #include <iostream>
@ -196,12 +199,12 @@ char* format_dword(uint16_t s)
int main() int main()
{ {
uint16_t s = utf8_initial_state; uint16_t s = utf8::initial_state;
while(true) { while(true) {
int c; int c;
int32_t d; int32_t d;
std::cin >> c; std::cin >> c;
d = utf8_parse_byte(c, s); d = utf8::parse_byte(c, s);
std::cout << "> " << d << " (status word=" << format_dword(s) << ")" << std::endl; std::cout << "> " << d << " (status word=" << format_dword(s) << ")" << std::endl;
if(c == -1 && d == -1) if(c == -1 && d == -1)
return 0; return 0;

View file

@ -43,7 +43,7 @@ namespace
bg.set_palette(scr); bg.set_palette(scr);
hl.set_palette(scr); hl.set_palette(scr);
const framebuffer::font2& fdata = font->get_font(); const framebuffer::font2& fdata = font->get_font();
std::u32string _text = to_u32string(text); std::u32string _text = utf8::to32(text);
int32_t orig_x = x; int32_t orig_x = x;
int32_t drawx = x; int32_t drawx = x;
int32_t drawy = y; int32_t drawy = y;

View file

@ -208,7 +208,7 @@ void frame_controls::add_port(unsigned& c, unsigned pid, const port_type& p, con
if(pcb.type == port_controller_button::TYPE_BUTTON) { if(pcb.type == port_controller_button::TYPE_BUTTON) {
if(last_multibyte) if(last_multibyte)
c++; c++;
controlinfo.push_back(control_info::buttoninfo(c, pcb.symbol, to_u32string(pcb.name), controlinfo.push_back(control_info::buttoninfo(c, pcb.symbol, utf8::to32(pcb.name),
idx, pid, i)); idx, pid, i));
last_multibyte = false; last_multibyte = false;
} else if(pcb.type == port_controller_button::TYPE_AXIS || } else if(pcb.type == port_controller_button::TYPE_AXIS ||
@ -217,7 +217,7 @@ void frame_controls::add_port(unsigned& c, unsigned pid, const port_type& p, con
pcb.type == port_controller_button::TYPE_LIGHTGUN) { pcb.type == port_controller_button::TYPE_LIGHTGUN) {
if(j) if(j)
c++; c++;
controlinfo.push_back(control_info::axisinfo(c, to_u32string(pcb.name), idx, pid, i, controlinfo.push_back(control_info::axisinfo(c, utf8::to32(pcb.name), idx, pid, i,
pcb.type, pcb.rmin, pcb.rmax)); pcb.type, pcb.rmin, pcb.rmax));
last_multibyte = true; last_multibyte = true;
} }
@ -340,7 +340,7 @@ namespace
char32_t tmp1[2]; char32_t tmp1[2];
tmp1[0] = i.ch; tmp1[0] = i.ch;
tmp1[1] = 0; tmp1[1] = 0;
x << to_u8string(std::u32string(tmp1)); x << utf8::to8(std::u32string(tmp1));
} else } else
x << "-"; x << "-";
last_axis = false; last_axis = false;
@ -376,7 +376,7 @@ namespace
void decode_line(frame_controls& info, controller_frame& f, std::string line, unsigned port, void decode_line(frame_controls& info, controller_frame& f, std::string line, unsigned port,
unsigned controller) unsigned controller)
{ {
std::u32string _line = to_u32string(line); std::u32string _line = utf8::to32(line);
bool last_axis = false; bool last_axis = false;
bool first = true; bool first = true;
short y; short y;
@ -579,9 +579,9 @@ namespace
std::string windowname(control_info X, control_info Y) std::string windowname(control_info X, control_info Y)
{ {
if(X.index == Y.index) if(X.index == Y.index)
return (stringfmt() << to_u8string(X.title)).str(); return (stringfmt() << utf8::to8(X.title)).str();
else else
return (stringfmt() << to_u8string(X.title) << "/" << to_u8string(Y.title)).str(); return (stringfmt() << utf8::to8(X.title) << "/" << utf8::to8(Y.title)).str();
} }
class window_prompt : public wxDialog class window_prompt : public wxDialog

View file

@ -285,12 +285,12 @@ std::string tostdstring(const wxString& str) throw(std::bad_alloc)
wxString towxstring(const std::u32string& str) throw(std::bad_alloc) wxString towxstring(const std::u32string& str) throw(std::bad_alloc)
{ {
return wxString(to_u8string(str).c_str(), wxConvUTF8); return wxString(utf8::to8(str).c_str(), wxConvUTF8);
} }
std::u32string tou32string(const wxString& str) throw(std::bad_alloc) std::u32string tou32string(const wxString& str) throw(std::bad_alloc)
{ {
return to_u32string(std::string(str.mb_str(wxConvUTF8))); return utf8::to32(std::string(str.mb_str(wxConvUTF8)));
} }
std::string pick_archive_member(wxWindow* parent, const std::string& filename) throw(std::bad_alloc) std::string pick_archive_member(wxWindow* parent, const std::string& filename) throw(std::bad_alloc)

View file

@ -142,12 +142,12 @@ size_t text_framebuffer::write(const std::string& str, size_t w, size_t x, size_
size_t spos = 0; size_t spos = 0;
size_t slen = str.length(); size_t slen = str.length();
size_t pused = 0; size_t pused = 0;
uint16_t state = utf8_initial_state; uint16_t state = utf8::initial_state;
if(y >= height) if(y >= height)
return 0; return 0;
while(true) { while(true) {
int ch = (spos < slen) ? (unsigned char)str[spos] : - 1; int ch = (spos < slen) ? (unsigned char)str[spos] : - 1;
int32_t u = utf8_parse_byte(ch, state); int32_t u = utf8::parse_byte(ch, state);
if(u < 0) { if(u < 0) {
if(ch < 0) if(ch < 0)
break; break;

View file

@ -544,7 +544,7 @@ test_x tests[] = {
return (x.as_string() == U""); return (x.as_string() == U"");
}},{"Simple string token", []() { }},{"Simple string token", []() {
JSON::node x("\"hello\""); JSON::node x("\"hello\"");
std::cout << to_u8string(x.as_string()) << "..." << std::flush; std::cout << utf8::to8(x.as_string()) << "..." << std::flush;
return (x.as_string() == U"hello"); return (x.as_string() == U"hello");
}},{"Simple number token #1", []() { }},{"Simple number token #1", []() {
JSON::node x("123"); JSON::node x("123");
@ -1015,7 +1015,7 @@ test_x tests[] = {
JSON::node x("{\"2\":\"ABC\",\"1\":\"DEF\",\"0\":\"GHI\"}"); JSON::node x("{\"2\":\"ABC\",\"1\":\"DEF\",\"0\":\"GHI\"}");
const char32_t* str[] = {U"GHI", U"DEF", U"ABC"}; const char32_t* str[] = {U"GHI", U"DEF", U"ABC"};
for(auto i = x.begin(); i != x.end(); i++) { for(auto i = x.begin(); i != x.end(); i++) {
size_t idx = parse_value<size_t>(to_u8string(i.key())); size_t idx = parse_value<size_t>(utf8::to8(i.key()));
if(i->as_string() != str[idx]) if(i->as_string() != str[idx])
return false; return false;
} }
@ -1025,7 +1025,7 @@ test_x tests[] = {
const JSON::node& x = _x; const JSON::node& x = _x;
const char32_t* str[] = {U"GHI", U"DEF", U"ABC"}; const char32_t* str[] = {U"GHI", U"DEF", U"ABC"};
for(auto i = x.begin(); i != x.end(); i++) { for(auto i = x.begin(); i != x.end(); i++) {
size_t idx = parse_value<size_t>(to_u8string(i.key())); size_t idx = parse_value<size_t>(utf8::to8(i.key()));
if(i->as_string() != str[idx]) if(i->as_string() != str[idx])
return false; return false;
} }