Mesen-SX/Lua/llex.c

/*
** $Id: llex.c,v 2.96 2016/05/02 14:02:12 roberto Exp $
** Lexical Analyzer
** See Copyright Notice in lua.h
*/

#define llex_c
#define LUA_CORE

#include "lprefix.h"


#include <locale.h>
#include <string.h>

#include "lua.h"

#include "lctype.h"
#include "ldebug.h"
#include "ldo.h"
#include "lgc.h"
#include "llex.h"
#include "lobject.h"
#include "lparser.h"
#include "lstate.h"
#include "lstring.h"
#include "ltable.h"
#include "lzio.h"


#define next(ls) (ls->current = zgetc(ls->z))


#define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')


/* ORDER RESERVED */
static const char* const luaX_tokens[] = {
	"and", "break", "do", "else", "elseif",
	"end", "false", "for", "function", "goto", "if",
	"in", "local", "nil", "not", "or", "repeat",
	"return", "then", "true", "until", "while",
	"//", "..", "...", "==", ">=", "<=", "~=",
	"<<", ">>", "::", "<eof>",
	"<number>", "<integer>", "<name>", "<string>"
};


#define save_and_next(ls) (save(ls, ls->current), next(ls))


static l_noret lexerror(LexState* ls, const char* msg, int token);


static void save(LexState* ls, int c)
{
	Mbuffer* b = ls->buff;
	if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b))
	{
		size_t newsize;
		if (luaZ_sizebuffer(b) >= MAX_SIZE / 2)
			lexerror(ls, "lexical element too long", 0);
		newsize = luaZ_sizebuffer(b) * 2;
		luaZ_resizebuffer(ls->L, b, newsize);
	}
	b->buffer[luaZ_bufflen(b)++] = cast(char, c);
}


void luaX_init(lua_State* L)
{
	int i;
	TString* e = luaS_newliteral(L, LUA_ENV); /* create env name */
	luaC_fix(L, obj2gco(e)); /* never collect this name */
	for (i = 0; i < NUM_RESERVED; i++)
	{
		TString* ts = luaS_new(L, luaX_tokens[i]);
		luaC_fix(L, obj2gco(ts)); /* reserved words are never collected */
		ts->extra = cast_byte(i+1); /* reserved word */
	}
}


const char* luaX_token2str(LexState* ls, int token)
{
	if (token < FIRST_RESERVED)
	{
		/* single-byte symbols? */
		lua_assert(token == cast_uchar(token));
		return luaO_pushfstring(ls->L, "'%c'", token);
	}
	else
	{
		const char* s = luaX_tokens[token - FIRST_RESERVED];
		if (token < TK_EOS) /* fixed format (symbols and reserved words)? */
			return luaO_pushfstring(ls->L, "'%s'", s);
		else /* names, strings, and numerals */
			return s;
	}
}


static const char* txtToken(LexState* ls, int token)
{
	switch (token)
	{
	case TK_NAME:
	case TK_STRING:
	case TK_FLT:
	case TK_INT:
		save(ls, '\0');
		return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
	default:
		return luaX_token2str(ls, token);
	}
}


static l_noret lexerror(LexState* ls, const char* msg, int token)
{
	msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
	if (token)
		luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
	luaD_throw(ls->L, LUA_ERRSYNTAX);
}


l_noret luaX_syntaxerror(LexState* ls, const char* msg)
{
	lexerror(ls, msg, ls->t.token);
}


/*
** creates a new string and anchors it in scanner's table so that
** it will not be collected until the end of the compilation
** (by that time it should be anchored somewhere)
*/
TString* luaX_newstring(LexState* ls, const char* str, size_t l)
{
	lua_State* L = ls->L;
	TValue* o; /* entry for 'str' */
	TString* ts = luaS_newlstr(L, str, l); /* create new string */
	setsvalue2s(L, L->top++, ts); /* temporarily anchor it in stack */
	o = luaH_set(L, ls->h, L->top - 1);
	if (ttisnil(o))
	{
		/* not in use yet? */
		/* boolean value does not need GC barrier;
		   table has no metatable, so it does not need to invalidate cache */
		setbvalue(o, 1); /* t[string] = true */
		luaC_checkGC(L);
	}
	else
	{
		/* string already present */
		ts = tsvalue(keyfromval(o)); /* re-use value previously stored */
	}
	L->top--; /* remove string from stack */
	return ts;
}


/*
** increment line number and skips newline sequence (any of
** \n, \r, \n\r, or \r\n)
*/
static void inclinenumber(LexState* ls)
{
	int old = ls->current;
	lua_assert(currIsNewline(ls));
	next(ls); /* skip '\n' or '\r' */
	if (currIsNewline(ls) && ls->current != old)
		next(ls); /* skip '\n\r' or '\r\n' */
	if (++ls->linenumber >= MAX_INT)
		lexerror(ls, "chunk has too many lines", 0);
}


void luaX_setinput(lua_State* L, LexState* ls, ZIO* z, TString* source,
                   int firstchar)
{
	ls->t.token = 0;
	ls->L = L;
	ls->current = firstchar;
	ls->lookahead.token = TK_EOS; /* no look-ahead token */
	ls->z = z;
	ls->fs = NULL;
	ls->linenumber = 1;
	ls->lastline = 1;
	ls->source = source;
	ls->envn = luaS_newliteral(L, LUA_ENV); /* get env name */
	luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */
}


/*
** =======================================================
** LEXICAL ANALYZER
** =======================================================
*/


static int check_next1(LexState* ls, int c)
{
	if (ls->current == c)
	{
		next(ls);
		return 1;
	}
	else return 0;
}


/*
** Check whether current char is in set 'set' (with two chars) and
** saves it
*/
static int check_next2(LexState* ls, const char* set)
{
	lua_assert(set[2] == '\0');
	if (ls->current == set[0] || ls->current == set[1])
	{
		save_and_next(ls);
		return 1;
	}
	else return 0;
}


/* LUA_NUMBER */
/*
** this function is quite liberal in what it accepts, as 'luaO_str2num'
** will reject ill-formed numerals.
*/
static int read_numeral(LexState* ls, SemInfo* seminfo)
{
	TValue obj;
	const char* expo = "Ee";
	int first = ls->current;
	lua_assert(lisdigit(ls->current));
	save_and_next(ls);
	if (first == '0' && check_next2(ls, "xX")) /* hexadecimal? */
		expo = "Pp";
	for (;;)
	{
		if (check_next2(ls, expo)) /* exponent part? */
			check_next2(ls, "-+"); /* optional exponent sign */
		if (lisxdigit(ls->current))
			save_and_next(ls);
		else if (ls->current == '.')
			save_and_next(ls);
		else break;
	}
	save(ls, '\0');
	if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0) /* format error? */
		lexerror(ls, "malformed number", TK_FLT);
	if (ttisinteger(&obj))
	{
		seminfo->i = ivalue(&obj);
		return TK_INT;
	}
	else
	{
		lua_assert(ttisfloat(&obj));
		seminfo->r = fltvalue(&obj);
		return TK_FLT;
	}
}


/*
** skip a sequence '[=*[' or ']=*]'; if sequence is well formed, return
** its number of '='s; otherwise, return a negative number (-1 iff there
** are no '='s after initial bracket)
*/
static int skip_sep(LexState* ls)
{
	int count = 0;
	int s = ls->current;
	lua_assert(s == '[' || s == ']');
	save_and_next(ls);
	while (ls->current == '=')
	{
		save_and_next(ls);
		count++;
	}
	return (ls->current == s) ? count : (-count) - 1;
}


static void read_long_string(LexState* ls, SemInfo* seminfo, int sep)
{
	int line = ls->linenumber; /* initial line (for error message) */
	save_and_next(ls); /* skip 2nd '[' */
	if (currIsNewline(ls)) /* string starts with a newline? */
		inclinenumber(ls); /* skip it */
	for (;;)
	{
		switch (ls->current)
		{
		case EOZ:
			{
				/* error */
				const char* what = (seminfo ? "string" : "comment");
				const char* msg = luaO_pushfstring(ls->L,
				                                   "unfinished long %s (starting at line %d)", what, line);
				lexerror(ls, msg, TK_EOS);
				break; /* to avoid warnings */
			}
		case ']':
			{
				if (skip_sep(ls) == sep)
				{
					save_and_next(ls); /* skip 2nd ']' */
					goto endloop;
				}
				break;
			}
		case '\n':
		case '\r':
			{
				save(ls, '\n');
				inclinenumber(ls);
				if (!seminfo)
					luaZ_resetbuffer(ls->buff); /* avoid wasting space */
				break;
			}
		default:
			{
				if (seminfo)
					save_and_next(ls);
				else
					next(ls);
			}
		}
	}
endloop:
	if (seminfo)
		seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
		                             luaZ_bufflen(ls->buff) - 2 * (2 + sep));
}


static void esccheck(LexState* ls, int c, const char* msg)
{
	if (!c)
	{
		if (ls->current != EOZ)
			save_and_next(ls); /* add current to buffer for error message */
		lexerror(ls, msg, TK_STRING);
	}
}


static int gethexa(LexState* ls)
{
	save_and_next(ls);
	esccheck(ls, lisxdigit(ls->current), "hexadecimal digit expected");
	return luaO_hexavalue(ls->current);
}


static int readhexaesc(LexState* ls)
{
	int r = gethexa(ls);
	r = (r << 4) + gethexa(ls);
	luaZ_buffremove(ls->buff, 2); /* remove saved chars from buffer */
	return r;
}


static unsigned long readutf8esc(LexState* ls)
{
	unsigned long r;
	int i = 4; /* chars to be removed: '\', 'u', '{', and first digit */
	save_and_next(ls); /* skip 'u' */
	esccheck(ls, ls->current == '{', "missing '{'");
	r = gethexa(ls); /* must have at least one digit */
	while ((save_and_next(ls), lisxdigit(ls->current)))
	{
		i++;
		r = (r << 4) + luaO_hexavalue(ls->current);
		esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large");
	}
	esccheck(ls, ls->current == '}', "missing '}'");
	next(ls); /* skip '}' */
	luaZ_buffremove(ls->buff, i); /* remove saved chars from buffer */
	return r;
}


static void utf8esc(LexState* ls)
{
	char buff[UTF8BUFFSZ];
	int n = luaO_utf8esc(buff, readutf8esc(ls));
	for (; n > 0; n--) /* add 'buff' to string */
		save(ls, buff[UTF8BUFFSZ - n]);
}


static int readdecesc(LexState* ls)
{
	int i;
	int r = 0; /* result accumulator */
	for (i = 0; i < 3 && lisdigit(ls->current); i++)
	{
		/* read up to 3 digits */
		r = 10 * r + ls->current - '0';
		save_and_next(ls);
	}
	esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
	luaZ_buffremove(ls->buff, i); /* remove read digits from buffer */
	return r;
}


static void read_string(LexState* ls, int del, SemInfo* seminfo)
{
	save_and_next(ls); /* keep delimiter (for error messages) */
	while (ls->current != del)
	{
		switch (ls->current)
		{
		case EOZ:
			lexerror(ls, "unfinished string", TK_EOS);
			break; /* to avoid warnings */
		case '\n':
		case '\r':
			lexerror(ls, "unfinished string", TK_STRING);
			break; /* to avoid warnings */
		case '\\':
			{
				/* escape sequences */
				int c; /* final character to be saved */
				save_and_next(ls); /* keep '\\' for error messages */
				switch (ls->current)
				{
				case 'a': c = '\a';
					goto read_save;
				case 'b': c = '\b';
					goto read_save;
				case 'f': c = '\f';
					goto read_save;
				case 'n': c = '\n';
					goto read_save;
				case 'r': c = '\r';
					goto read_save;
				case 't': c = '\t';
					goto read_save;
				case 'v': c = '\v';
					goto read_save;
				case 'x': c = readhexaesc(ls);
					goto read_save;
				case 'u': utf8esc(ls);
					goto no_save;
				case '\n':
				case '\r':
					inclinenumber(ls);
					c = '\n';
					goto only_save;
				case '\\':
				case '\"':
				case '\'':
					c = ls->current;
					goto read_save;
				case EOZ: goto no_save; /* will raise an error next loop */
				case 'z':
					{
						/* zap following span of spaces */
						luaZ_buffremove(ls->buff, 1); /* remove '\\' */
						next(ls); /* skip the 'z' */
						while (lisspace(ls->current))
						{
							if (currIsNewline(ls)) inclinenumber(ls);
							else
								next(ls);
						}
						goto no_save;
					}
				default:
					{
						esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
						c = readdecesc(ls); /* digital escape '\ddd' */
						goto only_save;
					}
				}
			read_save:
				next(ls);
				/* go through */
			only_save:
				luaZ_buffremove(ls->buff, 1); /* remove '\\' */
				save(ls, c);
				/* go through */
			no_save: break;
			}
		default:
			save_and_next(ls);
		}
	}
	save_and_next(ls); /* skip delimiter */
	seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
	                             luaZ_bufflen(ls->buff) - 2);
}


static int llex(LexState* ls, SemInfo* seminfo)
{
	luaZ_resetbuffer(ls->buff);
	for (;;)
	{
		switch (ls->current)
		{
		case '\n':
		case '\r':
			{
				/* line breaks */
				inclinenumber(ls);
				break;
			}
		case ' ':
		case '\f':
		case '\t':
		case '\v':
			{
				/* spaces */
				next(ls);
				break;
			}
		case '-':
			{
				/* '-' or '--' (comment) */
				next(ls);
				if (ls->current != '-') return '-';
				/* else is a comment */
				next(ls);
				if (ls->current == '[')
				{
					/* long comment? */
					int sep = skip_sep(ls);
					luaZ_resetbuffer(ls->buff); /* 'skip_sep' may dirty the buffer */
					if (sep >= 0)
					{
						read_long_string(ls, NULL, sep); /* skip long comment */
						luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */
						break;
					}
				}
				/* else short comment */
				while (!currIsNewline(ls) && ls->current != EOZ)
					next(ls); /* skip until end of line (or end of file) */
				break;
			}
		case '[':
			{
				/* long string or simply '[' */
				int sep = skip_sep(ls);
				if (sep >= 0)
				{
					read_long_string(ls, seminfo, sep);
					return TK_STRING;
				}
				else if (sep != -1) /* '[=...' missing second bracket */
					lexerror(ls, "invalid long string delimiter", TK_STRING);
				return '[';
			}
		case '=':
			{
				next(ls);
				if (check_next1(ls, '=')) return TK_EQ;
				else return '=';
			}
		case '<':
			{
				next(ls);
				if (check_next1(ls, '=')) return TK_LE;
				else if (check_next1(ls, '<')) return TK_SHL;
				else return '<';
			}
		case '>':
			{
				next(ls);
				if (check_next1(ls, '=')) return TK_GE;
				else if (check_next1(ls, '>')) return TK_SHR;
				else return '>';
			}
		case '/':
			{
				next(ls);
				if (check_next1(ls, '/')) return TK_IDIV;
				else return '/';
			}
		case '~':
			{
				next(ls);
				if (check_next1(ls, '=')) return TK_NE;
				else return '~';
			}
		case ':':
			{
				next(ls);
				if (check_next1(ls, ':')) return TK_DBCOLON;
				else return ':';
			}
		case '"':
		case '\'':
			{
				/* short literal strings */
				read_string(ls, ls->current, seminfo);
				return TK_STRING;
			}
		case '.':
			{
				/* '.', '..', '...', or number */
				save_and_next(ls);
				if (check_next1(ls, '.'))
				{
					if (check_next1(ls, '.'))
						return TK_DOTS; /* '...' */
					else return TK_CONCAT; /* '..' */
				}
				else if (!lisdigit(ls->current)) return '.';
				else return read_numeral(ls, seminfo);
			}
		case '0':
		case '1':
		case '2':
		case '3':
		case '4':
		case '5':
		case '6':
		case '7':
		case '8':
		case '9':
			{
				return read_numeral(ls, seminfo);
			}
		case EOZ:
			{
				return TK_EOS;
			}
		default:
			{
				if (lislalpha(ls->current))
				{
					/* identifier or reserved word? */
					TString* ts;
					do
					{
						save_and_next(ls);
					}
					while (lislalnum(ls->current));
					ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
					                    luaZ_bufflen(ls->buff));
					seminfo->ts = ts;
					if (isreserved(ts)) /* reserved word? */
						return ts->extra - 1 + FIRST_RESERVED;
					else
					{
						return TK_NAME;
					}
				}
				else
				{
					/* single-char tokens (+ - / ...) */
					int c = ls->current;
					next(ls);
					return c;
				}
			}
		}
	}
}


void luaX_next(LexState* ls)
{
	ls->lastline = ls->linenumber;
	if (ls->lookahead.token != TK_EOS)
	{
		/* is there a look-ahead token? */
		ls->t = ls->lookahead; /* use this one */
		ls->lookahead.token = TK_EOS; /* and discharge it */
	}
	else
		ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */
}


int luaX_lookahead(LexState* ls)
{
	lua_assert(ls->lookahead.token == TK_EOS);
	ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
	return ls->lookahead.token;
}