Lua interface to iconv(3), string.byteU and string.charU
This commit is contained in:
parent
2bc837dbf5
commit
ce0c625072
6 changed files with 262 additions and 1 deletions
4
Makefile
4
Makefile
|
@ -37,6 +37,10 @@ $(error "Bad value for THREADS (expected NATIVE or BOOST)")
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifdef NEED_LIBICONV
|
||||||
|
LDFLAGS += -liconv
|
||||||
|
endif
|
||||||
|
|
||||||
ifdef USE_LIBGCRYPT
|
ifdef USE_LIBGCRYPT
|
||||||
CFLAGS += -DUSE_LIBGCRYPT_SHA256
|
CFLAGS += -DUSE_LIBGCRYPT_SHA256
|
||||||
LDFLAGS += -lgcrypt -lgpg-error
|
LDFLAGS += -lgcrypt -lgpg-error
|
||||||
|
|
64
lua.lyx
64
lua.lyx
|
@ -483,6 +483,39 @@ Handy for paint callback if one is using render queues updated in other
|
||||||
handle = callback.paint:register(render_queue_function(my_rq));
|
handle = callback.paint:register(render_queue_function(my_rq));
|
||||||
\end_layout
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Subsection
|
||||||
|
iconv_new: Create new character set converter
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Itemize
|
||||||
|
Syntax: ICONV iconv_new(string from, string to)
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
Create a character set converter, converting from <from> to <to>.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Subsection
|
||||||
|
ICONV(): Convert string fragment from character set to another
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Itemize
|
||||||
|
Syntax: boolean, string, (number, string) ICONV(string input)
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
Convert string fragment <input> from character set to another.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Itemize
|
||||||
|
If conversion is successful, returns (true, {converted-string}).
|
||||||
|
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Itemize
|
||||||
|
If conversion was unsuccessful, returns (
|
||||||
|
\end_layout
|
||||||
|
|
||||||
\begin_layout Standard
|
\begin_layout Standard
|
||||||
\begin_inset Newpage pagebreak
|
\begin_inset Newpage pagebreak
|
||||||
\end_inset
|
\end_inset
|
||||||
|
@ -4590,6 +4623,37 @@ CGRAM
|
||||||
If <first_trans> is true, first color is forced transparent.
|
If <first_trans> is true, first color is forced transparent.
|
||||||
\end_layout
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Section
|
||||||
|
extensions to table string
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Subsection
|
||||||
|
string.charU: string.char, UTF-8 version.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Itemize
|
||||||
|
Syntax: string string.charU(number n...)
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
Like Lua string.char(), but works in terms of Unicode codepoints.
|
||||||
|
The returned string is UTF-8.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Subsection
|
||||||
|
string.byteU: string.byte, UTF-8 version.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Itemize
|
||||||
|
Syntax: number...
|
||||||
|
string.byteU(string str[, number i[, number j]])
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
Like string.byte(), but works in terms of Unicode codepoints.
|
||||||
|
The input string <str> is assumed UTF-8.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
\begin_layout Section
|
\begin_layout Section
|
||||||
Table _SYSTEM
|
Table _SYSTEM
|
||||||
\end_layout
|
\end_layout
|
||||||
|
|
BIN
lua.pdf
BIN
lua.pdf
Binary file not shown.
|
@ -142,6 +142,9 @@ HOST_BOOST_POSTFIX=
|
||||||
# Set to non-empty value (e.g. 'yes') to support LZMA/XZ compression via liblzma (the XZ version).
|
# Set to non-empty value (e.g. 'yes') to support LZMA/XZ compression via liblzma (the XZ version).
|
||||||
USE_LIBLZMA=
|
USE_LIBLZMA=
|
||||||
|
|
||||||
|
# Set to non-empty value (e.g. 'yes') if iconv(3) needs libiconv.
|
||||||
|
NEED_LIBICONV=
|
||||||
|
|
||||||
# The target architecture
|
# The target architecture
|
||||||
# I386 - I386/AMD64
|
# I386 - I386/AMD64
|
||||||
# Leave empty for generic/autodetect.
|
# Leave empty for generic/autodetect.
|
||||||
|
|
188
src/lua/iconv.cpp
Normal file
188
src/lua/iconv.cpp
Normal file
|
@ -0,0 +1,188 @@
|
||||||
|
#include <iconv.h>
|
||||||
|
#include "lua/internal.hpp"
|
||||||
|
#include "library/string.hpp"
|
||||||
|
#include "library/utf8.hpp"
|
||||||
|
#include <cstring>
|
||||||
|
#include <cerrno>
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
struct buffer
|
||||||
|
{
|
||||||
|
buffer(bool _is_out, std::string& _str)
|
||||||
|
: str(_str), is_out(_is_out)
|
||||||
|
{
|
||||||
|
buffer_size = 0;
|
||||||
|
char_count = 0;
|
||||||
|
if(!is_out) {
|
||||||
|
while(buffer_size < sizeof(buf) && char_count < str.length())
|
||||||
|
buf[buffer_size++] = str[char_count++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::pair<char*,size_t> get()
|
||||||
|
{
|
||||||
|
return std::make_pair(buf, is_out ? sizeof(buf) : buffer_size);
|
||||||
|
}
|
||||||
|
void set(std::pair<char*,size_t> pos)
|
||||||
|
{
|
||||||
|
if(is_out) {
|
||||||
|
size_t emitted = sizeof(buf) - pos.second;
|
||||||
|
str.resize(str.length() + emitted);
|
||||||
|
std::copy(pos.first - emitted, pos.first, str.begin() + char_count);
|
||||||
|
char_count += emitted;
|
||||||
|
} else {
|
||||||
|
size_t eaten = buffer_size - pos.second;
|
||||||
|
memmove(buf, buf + eaten, buffer_size - eaten);
|
||||||
|
buffer_size -= eaten;
|
||||||
|
while(buffer_size < sizeof(buf) && char_count < str.length())
|
||||||
|
buf[buffer_size++] = str[char_count++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
size_t left()
|
||||||
|
{
|
||||||
|
return buffer_size + str.length() - char_count;
|
||||||
|
}
|
||||||
|
size_t unprocessed()
|
||||||
|
{
|
||||||
|
return buffer_size;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
char buf[1024];
|
||||||
|
size_t buffer_size;
|
||||||
|
size_t char_count;
|
||||||
|
std::string& str;
|
||||||
|
bool is_out;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct lua_iconv
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
lua_iconv(lua::state& L, const char* from, const char* to);
|
||||||
|
~lua_iconv() throw();
|
||||||
|
int call(lua::state& L, const std::string& fname);
|
||||||
|
std::string print()
|
||||||
|
{
|
||||||
|
return spec;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
iconv_t ctx;
|
||||||
|
buffer input();
|
||||||
|
std::string spec;
|
||||||
|
};
|
||||||
|
|
||||||
|
lua::_class<lua_iconv> class_iconv("ICONV");
|
||||||
|
|
||||||
|
|
||||||
|
lua_iconv::lua_iconv(lua::state& L, const char* from, const char* to)
|
||||||
|
{
|
||||||
|
lua::objclass<lua_iconv>().bind_multi(L, {
|
||||||
|
{"__call", &lua_iconv::call}
|
||||||
|
});
|
||||||
|
|
||||||
|
spec = std::string(from) + "->" + to;
|
||||||
|
errno = 0;
|
||||||
|
ctx = iconv_open(to, from);
|
||||||
|
if(errno) {
|
||||||
|
int err = errno;
|
||||||
|
(stringfmt() << "Error creating character set converter: " << strerror(err)).throwex();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lua_iconv::~lua_iconv() throw()
|
||||||
|
{
|
||||||
|
iconv_close(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
int lua_iconv::call(lua::state& L, const std::string& fname)
|
||||||
|
{
|
||||||
|
std::string src = L.get_string(2, fname.c_str());
|
||||||
|
std::string dst;
|
||||||
|
buffer input(false, src);
|
||||||
|
buffer output(true, dst);
|
||||||
|
std::string code = "";
|
||||||
|
while(true) {
|
||||||
|
auto _input = input.get();
|
||||||
|
auto _output = output.get();
|
||||||
|
int r = iconv(ctx, &_input.first, &_input.second, &_output.first, &_output.second);
|
||||||
|
size_t unprocessed = _input.second;
|
||||||
|
input.set(_input);
|
||||||
|
output.set(_output);
|
||||||
|
if(r < 0) {
|
||||||
|
int err = errno;
|
||||||
|
switch(err) {
|
||||||
|
case E2BIG:
|
||||||
|
continue; //Just retry with new output bufer.
|
||||||
|
case EILSEQ:
|
||||||
|
code = "INVALID";
|
||||||
|
goto exit;
|
||||||
|
case EINVAL:
|
||||||
|
if(unprocessed != input.unprocessed())
|
||||||
|
continue; //Retry.
|
||||||
|
code = "INCOMPLETE";
|
||||||
|
goto exit;
|
||||||
|
default:
|
||||||
|
code = "INTERNALERR";
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
} else if(!input.unprocessed())
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
exit:
|
||||||
|
L.pushboolean(!code.length());
|
||||||
|
L.pushlstring(dst);
|
||||||
|
if(code.length()) {
|
||||||
|
L.pushnumber(input.left());
|
||||||
|
L.pushlstring(code);
|
||||||
|
}
|
||||||
|
return code.length() ? 4 : 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
lua::fnptr iconv_new(lua_func_load, "iconv_new", [](lua::state& L, const std::string& fname) -> int {
|
||||||
|
std::string from = L.get_string(1, fname.c_str());
|
||||||
|
std::string to = L.get_string(2, fname.c_str());
|
||||||
|
lua::_class<lua_iconv>::create(L, from.c_str(), to.c_str());
|
||||||
|
return 1;
|
||||||
|
});
|
||||||
|
|
||||||
|
lua::fnptr iconv_byteU(lua_func_bit, "_lsnes_string_byteU", [](lua::state& L, const std::string& fname)
|
||||||
|
-> int {
|
||||||
|
std::string _str = L.get_string(1, fname.c_str());
|
||||||
|
size_t i = 1;
|
||||||
|
L.get_numeric_argument<size_t>(2, i, fname.c_str());
|
||||||
|
size_t j = i;
|
||||||
|
L.get_numeric_argument<size_t>(3, j, fname.c_str());
|
||||||
|
std::u32string str = utf8::to32(_str);
|
||||||
|
if(i == 0) i = 1;
|
||||||
|
size_t p = 0;
|
||||||
|
for(size_t k = i - 1; k < j && k < str.length(); k++) {
|
||||||
|
L.pushnumber(str[k]);
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
return p;
|
||||||
|
});
|
||||||
|
|
||||||
|
lua::fnptr iconv_charU(lua_func_bit, "_lsnes_string_charU", [](lua::state& L, const std::string& fname)
|
||||||
|
-> int {
|
||||||
|
std::u32string str;
|
||||||
|
for(int i = 1; L.type(i) == LUA_TNUMBER; i++) {
|
||||||
|
uint32_t cp = L.get_numeric_argument<uint32_t>(i, fname.c_str());
|
||||||
|
//Surrogates are not valid unicode.
|
||||||
|
if((cp & 0xD800) == 0xD800)
|
||||||
|
throw std::runtime_error("Invalid character");
|
||||||
|
//Explicit noncharacters.
|
||||||
|
if(cp >= 0xFDD0 && cp < 0xFDF0)
|
||||||
|
throw std::runtime_error("Invalid character");
|
||||||
|
//The last two characters of each plane are noncharacters.
|
||||||
|
if((cp & 0xFFFE) == 0xFFFE)
|
||||||
|
throw std::runtime_error("Invalid character");
|
||||||
|
//Last valid plane is plane 16.
|
||||||
|
if((cp >> 16) > 16)
|
||||||
|
throw std::runtime_error("Invalid character");
|
||||||
|
//Ok.
|
||||||
|
str += std::u32string(1, cp);
|
||||||
|
}
|
||||||
|
L.pushlstring(utf8::to8(str));
|
||||||
|
return 1;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
|
@ -26,4 +26,6 @@ const char* lua_sysrc_script =
|
||||||
" return function()\n"
|
" return function()\n"
|
||||||
" _rq:run();\n"
|
" _rq:run();\n"
|
||||||
" end;\n"
|
" end;\n"
|
||||||
"end;\n";
|
"end;\n"
|
||||||
|
"string.byteU=_lsnes_string_byteU;\n"
|
||||||
|
"string.charU=_lsnes_string_charU;\n";
|
||||||
|
|
Loading…
Add table
Reference in a new issue