Lua interface to iconv(3), string.byteU and string.charU

This commit is contained in:
Ilari Liusvaara 2014-01-20 12:59:02 +02:00
parent 2bc837dbf5
commit ce0c625072
6 changed files with 262 additions and 1 deletions

View file

@ -37,6 +37,10 @@ $(error "Bad value for THREADS (expected NATIVE or BOOST)")
endif endif
endif endif
ifdef NEED_LIBICONV
LDFLAGS += -liconv
endif
ifdef USE_LIBGCRYPT ifdef USE_LIBGCRYPT
CFLAGS += -DUSE_LIBGCRYPT_SHA256 CFLAGS += -DUSE_LIBGCRYPT_SHA256
LDFLAGS += -lgcrypt -lgpg-error LDFLAGS += -lgcrypt -lgpg-error

64
lua.lyx
View file

@ -483,6 +483,39 @@ Handy for paint callback if one is using render queues updated in other
handle = callback.paint:register(render_queue_function(my_rq)); handle = callback.paint:register(render_queue_function(my_rq));
\end_layout \end_layout
\begin_layout Subsection
iconv_new: Create new character set converter
\end_layout
\begin_layout Itemize
Syntax: ICONV iconv_new(string from, string to)
\end_layout
\begin_layout Standard
Create a character set converter, converting from <from> to <to>.
\end_layout
\begin_layout Subsection
ICONV(): Convert string fragment from character set to another
\end_layout
\begin_layout Itemize
Syntax: boolean, string, (number, string) ICONV(string input)
\end_layout
\begin_layout Standard
Convert string fragment <input> from character set to another.
\end_layout
\begin_layout Itemize
If conversion is successful, returns (true, {converted-string}).
\end_layout
\begin_layout Itemize
If conversion was unsuccessful, returns (
\end_layout
\begin_layout Standard \begin_layout Standard
\begin_inset Newpage pagebreak \begin_inset Newpage pagebreak
\end_inset \end_inset
@ -4590,6 +4623,37 @@ CGRAM
If <first_trans> is true, first color is forced transparent. If <first_trans> is true, first color is forced transparent.
\end_layout \end_layout
\begin_layout Section
extensions to table string
\end_layout
\begin_layout Subsection
string.charU: string.char, UTF-8 version.
\end_layout
\begin_layout Itemize
Syntax: string string.charU(number n...)
\end_layout
\begin_layout Standard
Like Lua string.char(), but works in terms of Unicode codepoints.
The returned string is UTF-8.
\end_layout
\begin_layout Subsection
string.byteU: string.byte, UTF-8 version.
\end_layout
\begin_layout Itemize
Syntax: number...
string.byteU(string str[, number i[, number j]])
\end_layout
\begin_layout Standard
Like string.byte(), but works in terms of Unicode codepoints.
The input string <str> is assumed UTF-8.
\end_layout
\begin_layout Section \begin_layout Section
Table _SYSTEM Table _SYSTEM
\end_layout \end_layout

BIN
lua.pdf

Binary file not shown.

View file

@ -142,6 +142,9 @@ HOST_BOOST_POSTFIX=
# Set to non-empty value (e.g. 'yes') to support LZMA/XZ compression via liblzma (the XZ version). # Set to non-empty value (e.g. 'yes') to support LZMA/XZ compression via liblzma (the XZ version).
USE_LIBLZMA= USE_LIBLZMA=
# Set to non-empty value (e.g. 'yes') if iconv(3) needs libiconv.
NEED_LIBICONV=
# The target architecture # The target architecture
# I386 - I386/AMD64 # I386 - I386/AMD64
# Leave empty for generic/autodetect. # Leave empty for generic/autodetect.

188
src/lua/iconv.cpp Normal file
View file

@ -0,0 +1,188 @@
#include <iconv.h>
#include "lua/internal.hpp"
#include "library/string.hpp"
#include "library/utf8.hpp"
#include <cstring>
#include <cerrno>
namespace
{
struct buffer
{
buffer(bool _is_out, std::string& _str)
: str(_str), is_out(_is_out)
{
buffer_size = 0;
char_count = 0;
if(!is_out) {
while(buffer_size < sizeof(buf) && char_count < str.length())
buf[buffer_size++] = str[char_count++];
}
}
std::pair<char*,size_t> get()
{
return std::make_pair(buf, is_out ? sizeof(buf) : buffer_size);
}
void set(std::pair<char*,size_t> pos)
{
if(is_out) {
size_t emitted = sizeof(buf) - pos.second;
str.resize(str.length() + emitted);
std::copy(pos.first - emitted, pos.first, str.begin() + char_count);
char_count += emitted;
} else {
size_t eaten = buffer_size - pos.second;
memmove(buf, buf + eaten, buffer_size - eaten);
buffer_size -= eaten;
while(buffer_size < sizeof(buf) && char_count < str.length())
buf[buffer_size++] = str[char_count++];
}
}
size_t left()
{
return buffer_size + str.length() - char_count;
}
size_t unprocessed()
{
return buffer_size;
}
private:
char buf[1024];
size_t buffer_size;
size_t char_count;
std::string& str;
bool is_out;
};
struct lua_iconv
{
public:
lua_iconv(lua::state& L, const char* from, const char* to);
~lua_iconv() throw();
int call(lua::state& L, const std::string& fname);
std::string print()
{
return spec;
}
private:
iconv_t ctx;
buffer input();
std::string spec;
};
lua::_class<lua_iconv> class_iconv("ICONV");
lua_iconv::lua_iconv(lua::state& L, const char* from, const char* to)
{
lua::objclass<lua_iconv>().bind_multi(L, {
{"__call", &lua_iconv::call}
});
spec = std::string(from) + "->" + to;
errno = 0;
ctx = iconv_open(to, from);
if(errno) {
int err = errno;
(stringfmt() << "Error creating character set converter: " << strerror(err)).throwex();
}
}
lua_iconv::~lua_iconv() throw()
{
iconv_close(ctx);
}
int lua_iconv::call(lua::state& L, const std::string& fname)
{
std::string src = L.get_string(2, fname.c_str());
std::string dst;
buffer input(false, src);
buffer output(true, dst);
std::string code = "";
while(true) {
auto _input = input.get();
auto _output = output.get();
int r = iconv(ctx, &_input.first, &_input.second, &_output.first, &_output.second);
size_t unprocessed = _input.second;
input.set(_input);
output.set(_output);
if(r < 0) {
int err = errno;
switch(err) {
case E2BIG:
continue; //Just retry with new output bufer.
case EILSEQ:
code = "INVALID";
goto exit;
case EINVAL:
if(unprocessed != input.unprocessed())
continue; //Retry.
code = "INCOMPLETE";
goto exit;
default:
code = "INTERNALERR";
goto exit;
}
} else if(!input.unprocessed())
break;
}
exit:
L.pushboolean(!code.length());
L.pushlstring(dst);
if(code.length()) {
L.pushnumber(input.left());
L.pushlstring(code);
}
return code.length() ? 4 : 2;
}
lua::fnptr iconv_new(lua_func_load, "iconv_new", [](lua::state& L, const std::string& fname) -> int {
std::string from = L.get_string(1, fname.c_str());
std::string to = L.get_string(2, fname.c_str());
lua::_class<lua_iconv>::create(L, from.c_str(), to.c_str());
return 1;
});
lua::fnptr iconv_byteU(lua_func_bit, "_lsnes_string_byteU", [](lua::state& L, const std::string& fname)
-> int {
std::string _str = L.get_string(1, fname.c_str());
size_t i = 1;
L.get_numeric_argument<size_t>(2, i, fname.c_str());
size_t j = i;
L.get_numeric_argument<size_t>(3, j, fname.c_str());
std::u32string str = utf8::to32(_str);
if(i == 0) i = 1;
size_t p = 0;
for(size_t k = i - 1; k < j && k < str.length(); k++) {
L.pushnumber(str[k]);
p++;
}
return p;
});
lua::fnptr iconv_charU(lua_func_bit, "_lsnes_string_charU", [](lua::state& L, const std::string& fname)
-> int {
std::u32string str;
for(int i = 1; L.type(i) == LUA_TNUMBER; i++) {
uint32_t cp = L.get_numeric_argument<uint32_t>(i, fname.c_str());
//Surrogates are not valid unicode.
if((cp & 0xD800) == 0xD800)
throw std::runtime_error("Invalid character");
//Explicit noncharacters.
if(cp >= 0xFDD0 && cp < 0xFDF0)
throw std::runtime_error("Invalid character");
//The last two characters of each plane are noncharacters.
if((cp & 0xFFFE) == 0xFFFE)
throw std::runtime_error("Invalid character");
//Last valid plane is plane 16.
if((cp >> 16) > 16)
throw std::runtime_error("Invalid character");
//Ok.
str += std::u32string(1, cp);
}
L.pushlstring(utf8::to8(str));
return 1;
});
}

View file

@ -26,4 +26,6 @@ const char* lua_sysrc_script =
" return function()\n" " return function()\n"
" _rq:run();\n" " _rq:run();\n"
" end;\n" " end;\n"
"end;\n"; "end;\n"
"string.byteU=_lsnes_string_byteU;\n"
"string.charU=_lsnes_string_charU;\n";