dobrograd-13-06-2022/garrysmod/crypt/llex.lua
Jonny_Bro (Nikita) e4d5311906 first commit
2023-11-16 15:01:19 +05:00

355 lines
12 KiB
Lua

--[[--------------------------------------------------------------------
llex.lua: Lua 5.1 lexical analyzer in Lua
This file is part of LuaSrcDiet, based on Yueliang material.
Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
The COPYRIGHT file describes the conditions
under which this software may be distributed.
See the ChangeLog for more information.
----------------------------------------------------------------------]]
--[[--------------------------------------------------------------------
-- NOTES:
-- * This is a version of the native 5.1.x lexer from Yueliang 0.4.0,
-- with significant modifications to handle LuaSrcDiet's needs:
-- (1) llex.error is an optional error function handler
-- (2) seminfo for strings include their delimiters and no
-- translation operations are performed on them
-- * ADDED shbang handling has been added to support executable scripts
-- * NO localized decimal point replacement magic
-- * NO limit to number of lines
-- * NO support for compatible long strings (LUA_COMPAT_LSTR)
-- * Please read technotes.txt for more technical details.
----------------------------------------------------------------------]]
local base = _G
local string = require "string"
module "llex"
local find = string.find
local match = string.match
local sub = string.sub
----------------------------------------------------------------------
-- initialize keyword list, variables
----------------------------------------------------------------------
local kw = {}
for v in string.gmatch([[
and break do else elseif end false for function if in
local nil not or repeat return then true until while continue]], "%S+") do
kw[v] = true
end
-- NOTE: see init() for module variables (externally visible):
-- tok, seminfo, tokln
local z, -- source stream
sourceid, -- name of source
I, -- position of lexer
buff, -- buffer for strings
ln -- line number
----------------------------------------------------------------------
-- add information to token listing
----------------------------------------------------------------------
local function addtoken(token, info)
local i = #tok + 1
tok[i] = token
seminfo[i] = info
tokln[i] = ln
end
----------------------------------------------------------------------
-- handles line number incrementation and end-of-line characters
----------------------------------------------------------------------
local function inclinenumber(i, is_tok)
local sub = sub
local old = sub(z, i, i)
i = i + 1 -- skip '\n' or '\r'
local c = sub(z, i, i)
if (c == "\n" or c == "\r") and (c ~= old) then
i = i + 1 -- skip '\n\r' or '\r\n'
old = old..c
end
if is_tok then addtoken("TK_EOL", old) end
ln = ln + 1
I = i
return i
end
----------------------------------------------------------------------
-- initialize lexer for given source _z and source name _sourceid
----------------------------------------------------------------------
function init(_z, _sourceid)
z = _z -- source
sourceid = _sourceid -- name of source
I = 1 -- lexer's position in source
ln = 1 -- line number
tok = {} -- lexed token list*
seminfo = {} -- lexed semantic information list*
tokln = {} -- line numbers for messages*
-- (*) externally visible thru' module
--------------------------------------------------------------------
-- initial processing (shbang handling)
--------------------------------------------------------------------
local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)")
if p then -- skip first line
I = I + #q
addtoken("TK_COMMENT", q)
if #r > 0 then inclinenumber(I, true) end
end
end
----------------------------------------------------------------------
-- returns a chunk name or id, no truncation for long names
----------------------------------------------------------------------
function chunkid()
if sourceid and match(sourceid, "^[=@]") then
return sub(sourceid, 2) -- remove first char
end
return "[string]"
end
----------------------------------------------------------------------
-- formats error message and throws error
-- * a simplified version, does not report what token was responsible
----------------------------------------------------------------------
function errorline(s, line)
local e = error or base.error
e(string.format("%s:%d: %s", chunkid(), line or ln, s))
end
local errorline = errorline
------------------------------------------------------------------------
-- count separators ("=") in a long string delimiter
------------------------------------------------------------------------
local function skip_sep(i)
local sub = sub
local s = sub(z, i, i)
i = i + 1
local count = #match(z, "=*", i) -- note, take the length
i = i + count
I = i
return (sub(z, i, i) == s) and count or (-count) - 1
end
----------------------------------------------------------------------
-- reads a long string or long comment
----------------------------------------------------------------------
local function read_long_string(is_str, sep)
local i = I + 1 -- skip 2nd '['
local sub = sub
local c = sub(z, i, i)
if c == "\r" or c == "\n" then -- string starts with a newline?
i = inclinenumber(i) -- skip it
end
local j = i
while true do
local p, q, r = find(z, "([\r\n%]])", i) -- (long range)
if not p then
errorline(is_str and "unfinished long string" or
"unfinished long comment")
end
i = p
if r == "]" then -- delimiter test
if skip_sep(i) == sep then
buff = sub(z, buff, I)
I = I + 1 -- skip 2nd ']'
return buff
end
i = I
else -- newline
buff = buff.."\n"
i = inclinenumber(i)
end
end--while
end
----------------------------------------------------------------------
-- reads a string
----------------------------------------------------------------------
local function read_string(del)
local i = I
local find = find
local sub = sub
while true do
local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range)
if p then
if r == "\n" or r == "\r" then
errorline("unfinished string")
end
i = p
if r == "\\" then -- handle escapes
i = i + 1
r = sub(z, i, i)
if r == "" then break end -- (EOZ error)
p = find("abfnrtv\n\r", r, 1, true)
------------------------------------------------------
if p then -- special escapes
if p > 7 then
i = inclinenumber(i)
else
i = i + 1
end
------------------------------------------------------
elseif find(r, "%D") then -- other non-digits
i = i + 1
------------------------------------------------------
else -- \xxx sequence
local p, q, s = find(z, "^(%d%d?%d?)", i)
i = q + 1
if s + 1 > 256 then -- UCHAR_MAX
errorline("escape sequence too large")
end
------------------------------------------------------
end--if p
else
i = i + 1
if r == del then -- ending delimiter
I = i
return sub(z, buff, i - 1) -- return string
end
end--if r
else
break -- (error)
end--if p
end--while
errorline("unfinished string")
end
------------------------------------------------------------------------
-- main lexer function
------------------------------------------------------------------------
function llex()
local find = find
local match = match
while true do--outer
local i = I
-- inner loop allows break to be used to nicely section tests
while true do--inner
----------------------------------------------------------------
local p, _, r = find(z, "^([_%a][_%w]*)", i)
if p then
I = i + #r
if kw[r] then
addtoken("TK_KEYWORD", r) -- reserved word (keyword)
else
addtoken("TK_NAME", r) -- identifier
end
break -- (continue)
end
----------------------------------------------------------------
local p, _, r = find(z, "^(%.?)%d", i)
if p then -- numeral
if r == "." then i = i + 1 end
local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i)
i = q + 1
if #r == 1 then -- optional exponent
if match(z, "^[%+%-]", i) then -- optional sign
i = i + 1
end
end
local _, q = find(z, "^[_%w]*", i)
I = q + 1
local v = sub(z, p, q) -- string equivalent
if not base.tonumber(v) then -- handles hex test also
errorline("malformed number")
end
addtoken("TK_NUMBER", v)
break -- (continue)
end
----------------------------------------------------------------
local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i)
if p then
if t == "\n" or t == "\r" then -- newline
inclinenumber(i, true)
else
I = q + 1 -- whitespace
addtoken("TK_SPACE", r)
end
break -- (continue)
end
----------------------------------------------------------------
local r = match(z, "^%p", i)
if r then
buff = i
local p = find("-[\"\'.=<>~", r, 1, true)
if p then
-- two-level if block for punctuation/symbols
--------------------------------------------------------
if p <= 2 then
if p == 1 then -- minus
local c = match(z, "^%-%-(%[?)", i)
if c then
i = i + 2
local sep = -1
if c == "[" then
sep = skip_sep(i)
end
if sep >= 0 then -- long comment
addtoken("TK_LCOMMENT", read_long_string(false, sep))
else -- short comment
I = find(z, "[\n\r]", i) or (#z + 1)
addtoken("TK_COMMENT", sub(z, buff, I - 1))
end
break -- (continue)
end
-- (fall through for "-")
else -- [ or long string
local sep = skip_sep(i)
if sep >= 0 then
addtoken("TK_LSTRING", read_long_string(true, sep))
elseif sep == -1 then
addtoken("TK_OP", "[")
else
errorline("invalid long string delimiter")
end
break -- (continue)
end
--------------------------------------------------------
elseif p <= 5 then
if p < 5 then -- strings
I = i + 1
addtoken("TK_STRING", read_string(r))
break -- (continue)
end
r = match(z, "^%.%.?%.?", i) -- .|..|... dots
-- (fall through)
--------------------------------------------------------
else -- relational
r = match(z, "^%p=?", i)
-- (fall through)
end
end
I = i + #r
addtoken("TK_OP", r) -- for other symbols, fall through
break -- (continue)
end
----------------------------------------------------------------
local r = sub(z, i, i)
if r ~= "" then
I = i + 1
addtoken("TK_OP", r) -- other single-char tokens
break
end
addtoken("TK_EOS", "") -- end of stream,
return -- exit here
----------------------------------------------------------------
end--while inner
end--while outer
end
return base.getfenv()