Added lpeg for narrator
This commit is contained in:
109
src/lua/lpeg-1.1.0/HISTORY
Normal file
109
src/lua/lpeg-1.1.0/HISTORY
Normal file
@@ -0,0 +1,109 @@
|
||||
HISTORY for LPeg 1.1.0
|
||||
|
||||
* Changes from version 1.0.2 to 1.1.0
|
||||
---------------------------------
|
||||
+ accumulator capture
|
||||
+ UTF-8 ranges
|
||||
+ Larger limit for number of rules in a grammar
|
||||
+ Larger limit for number of captures in a match
|
||||
+ bug fixes
|
||||
+ other small improvements
|
||||
|
||||
* Changes from version 1.0.1 to 1.0.2
|
||||
---------------------------------
|
||||
+ some bugs fixed
|
||||
|
||||
* Changes from version 0.12 to 1.0.1
|
||||
---------------------------------
|
||||
+ group "names" can be any Lua value
|
||||
+ some bugs fixed
|
||||
+ other small improvements
|
||||
|
||||
* Changes from version 0.11 to 0.12
|
||||
---------------------------------
|
||||
+ no "unsigned short" limit for pattern sizes
|
||||
+ mathtime captures considered nullable
|
||||
+ some bugs fixed
|
||||
|
||||
* Changes from version 0.10 to 0.11
|
||||
-------------------------------
|
||||
+ complete reimplementation of the code generator
|
||||
+ new syntax for table captures
|
||||
+ new functions in module 're'
|
||||
+ other small improvements
|
||||
|
||||
* Changes from version 0.9 to 0.10
|
||||
-------------------------------
|
||||
+ backtrack stack has configurable size
|
||||
+ better error messages
|
||||
+ Notation for non-terminals in 're' back to A instead o <A>
|
||||
+ experimental look-behind pattern
|
||||
+ support for external extensions
|
||||
+ works with Lua 5.2
|
||||
+ consumes less C stack
|
||||
|
||||
- "and" predicates do not keep captures
|
||||
|
||||
* Changes from version 0.8 to 0.9
|
||||
-------------------------------
|
||||
+ The accumulator capture was replaced by a fold capture;
|
||||
programs that used the old 'lpeg.Ca' will need small changes.
|
||||
+ Some support for character classes from old C locales.
|
||||
+ A new named-group capture.
|
||||
|
||||
* Changes from version 0.7 to 0.8
|
||||
-------------------------------
|
||||
+ New "match-time" capture.
|
||||
+ New "argument capture" that allows passing arguments into the pattern.
|
||||
+ Better documentation for 're'.
|
||||
+ Several small improvements for 're'.
|
||||
+ The 're' module has an incompatibility with previous versions:
|
||||
now, any use of a non-terminal must be enclosed in angle brackets
|
||||
(like <B>).
|
||||
|
||||
* Changes from version 0.6 to 0.7
|
||||
-------------------------------
|
||||
+ Several improvements in module 're':
|
||||
- better documentation;
|
||||
- support for most captures (all but accumulator);
|
||||
- limited repetitions p{n,m}.
|
||||
+ Small improvements in efficiency.
|
||||
+ Several small bugs corrected (special thanks to Hans Hagen
|
||||
and Taco Hoekwater).
|
||||
|
||||
* Changes from version 0.5 to 0.6
|
||||
-------------------------------
|
||||
+ Support for non-numeric indices in grammars.
|
||||
+ Some bug fixes (thanks to the luatex team).
|
||||
+ Some new optimizations; (thanks to Mike Pall).
|
||||
+ A new page layout (thanks to Andre Carregal).
|
||||
+ Minimal documentation for module 're'.
|
||||
|
||||
* Changes from version 0.4 to 0.5
|
||||
-------------------------------
|
||||
+ Several optimizations.
|
||||
+ lpeg.P now accepts booleans.
|
||||
+ Some new examples.
|
||||
+ A proper license.
|
||||
+ Several small improvements.
|
||||
|
||||
* Changes from version 0.3 to 0.4
|
||||
-------------------------------
|
||||
+ Static check for loops in repetitions and grammars.
|
||||
+ Removed label option in captures.
|
||||
+ The implementation of captures uses less memory.
|
||||
|
||||
* Changes from version 0.2 to 0.3
|
||||
-------------------------------
|
||||
+ User-defined patterns in Lua.
|
||||
+ Several new captures.
|
||||
|
||||
* Changes from version 0.1 to 0.2
|
||||
-------------------------------
|
||||
+ Several small corrections.
|
||||
+ Handles embedded zeros like any other character.
|
||||
+ Capture "name" can be any Lua value.
|
||||
+ Unlimited number of captures.
|
||||
+ Match gets an optional initial position.
|
||||
|
||||
(end of HISTORY)
|
||||
4
src/lua/lpeg-1.1.0/README.md
Normal file
4
src/lua/lpeg-1.1.0/README.md
Normal file
@@ -0,0 +1,4 @@
|
||||
# LPeg - Parsing Expression Grammars For Lua
|
||||
|
||||
For more information,
|
||||
see [Lpeg](//www.inf.puc-rio.br/~roberto/lpeg/).
|
||||
612
src/lua/lpeg-1.1.0/lpcap.c
Normal file
612
src/lua/lpeg-1.1.0/lpcap.c
Normal file
@@ -0,0 +1,612 @@
|
||||
|
||||
#include "lua.h"
|
||||
#include "lauxlib.h"
|
||||
|
||||
#include "lpcap.h"
|
||||
#include "lpprint.h"
|
||||
#include "lptypes.h"
|
||||
|
||||
|
||||
#define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v)
|
||||
|
||||
#define pushluaval(cs) getfromktable(cs, (cs)->cap->idx)
|
||||
|
||||
|
||||
|
||||
#define skipclose(cs,head) \
|
||||
if (isopencap(head)) { assert(isclosecap(cs->cap)); cs->cap++; }
|
||||
|
||||
|
||||
/*
|
||||
** Return the size of capture 'cap'. If it is an open capture, 'close'
|
||||
** must be its corresponding close.
|
||||
*/
|
||||
static Index_t capsize (Capture *cap, Capture *close) {
|
||||
if (isopencap(cap)) {
|
||||
assert(isclosecap(close));
|
||||
return close->index - cap->index;
|
||||
}
|
||||
else
|
||||
return cap->siz - 1;
|
||||
}
|
||||
|
||||
|
||||
static Index_t closesize (CapState *cs, Capture *head) {
|
||||
return capsize(head, cs->cap);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Put at the cache for Lua values the value indexed by 'v' in ktable
|
||||
** of the running pattern (if it is not there yet); returns its index.
|
||||
*/
|
||||
static int updatecache (CapState *cs, int v) {
|
||||
int idx = cs->ptop + 1; /* stack index of cache for Lua values */
|
||||
if (v != cs->valuecached) { /* not there? */
|
||||
getfromktable(cs, v); /* get value from 'ktable' */
|
||||
lua_replace(cs->L, idx); /* put it at reserved stack position */
|
||||
cs->valuecached = v; /* keep track of what is there */
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
|
||||
|
||||
static int pushcapture (CapState *cs);
|
||||
|
||||
|
||||
/*
|
||||
** Goes back in a list of captures looking for an open capture
|
||||
** corresponding to a close one.
|
||||
*/
|
||||
static Capture *findopen (Capture *cap) {
|
||||
int n = 0; /* number of closes waiting an open */
|
||||
for (;;) {
|
||||
cap--;
|
||||
if (isclosecap(cap)) n++; /* one more open to skip */
|
||||
else if (isopencap(cap))
|
||||
if (n-- == 0) return cap;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Go to the next capture at the same level.
|
||||
*/
|
||||
static void nextcap (CapState *cs) {
|
||||
Capture *cap = cs->cap;
|
||||
if (isopencap(cap)) { /* must look for a close? */
|
||||
int n = 0; /* number of opens waiting a close */
|
||||
for (;;) { /* look for corresponding close */
|
||||
cap++;
|
||||
if (isopencap(cap)) n++;
|
||||
else if (isclosecap(cap))
|
||||
if (n-- == 0) break;
|
||||
}
|
||||
cs->cap = cap + 1; /* + 1 to skip last close */
|
||||
}
|
||||
else {
|
||||
Capture *next;
|
||||
for (next = cap + 1; capinside(cap, next); next++)
|
||||
; /* skip captures inside current one */
|
||||
cs->cap = next;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Push on the Lua stack all values generated by nested captures inside
|
||||
** the current capture. Returns number of values pushed. 'addextra'
|
||||
** makes it push the entire match after all captured values. The
|
||||
** entire match is pushed also if there are no other nested values,
|
||||
** so the function never returns zero.
|
||||
*/
|
||||
static int pushnestedvalues (CapState *cs, int addextra) {
|
||||
Capture *head = cs->cap++; /* original capture */
|
||||
int n = 0; /* number of pushed subvalues */
|
||||
/* repeat for all nested patterns */
|
||||
while (capinside(head, cs->cap))
|
||||
n += pushcapture(cs);
|
||||
if (addextra || n == 0) { /* need extra? */
|
||||
lua_pushlstring(cs->L, cs->s + head->index, closesize(cs, head));
|
||||
n++;
|
||||
}
|
||||
skipclose(cs, head);
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Push only the first value generated by nested captures
|
||||
*/
|
||||
static void pushonenestedvalue (CapState *cs) {
|
||||
int n = pushnestedvalues(cs, 0);
|
||||
if (n > 1)
|
||||
lua_pop(cs->L, n - 1); /* pop extra values */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Checks whether group 'grp' is visible to 'ref', that is, 'grp' is
|
||||
** not nested inside a full capture that does not contain 'ref'. (We
|
||||
** only need to care for full captures because the search at 'findback'
|
||||
** skips open-end blocks; so, if 'grp' is nested in a non-full capture,
|
||||
** 'ref' is also inside it.) To check this, we search backward for the
|
||||
** inner full capture enclosing 'grp'. A full capture cannot contain
|
||||
** non-full captures, so a close capture means we cannot be inside a
|
||||
** full capture anymore.
|
||||
*/
|
||||
static int capvisible (CapState *cs, Capture *grp, Capture *ref) {
|
||||
Capture *cap = grp;
|
||||
int i = MAXLOP; /* maximum distance for an 'open' */
|
||||
while (i-- > 0 && cap-- > cs->ocap) {
|
||||
if (isclosecap(cap))
|
||||
return 1; /* can stop the search */
|
||||
else if (grp->index - cap->index >= UCHAR_MAX)
|
||||
return 1; /* can stop the search */
|
||||
else if (capinside(cap, grp)) /* is 'grp' inside cap? */
|
||||
return capinside(cap, ref); /* ok iff cap also contains 'ref' */
|
||||
}
|
||||
return 1; /* 'grp' is not inside any capture */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Try to find a named group capture with the name given at the top of
|
||||
** the stack; goes backward from 'ref'.
|
||||
*/
|
||||
static Capture *findback (CapState *cs, Capture *ref) {
|
||||
lua_State *L = cs->L;
|
||||
Capture *cap = ref;
|
||||
while (cap-- > cs->ocap) { /* repeat until end of list */
|
||||
if (isclosecap(cap))
|
||||
cap = findopen(cap); /* skip nested captures */
|
||||
else if (capinside(cap, ref))
|
||||
continue; /* enclosing captures are not visible to 'ref' */
|
||||
if (captype(cap) == Cgroup && capvisible(cs, cap, ref)) {
|
||||
getfromktable(cs, cap->idx); /* get group name */
|
||||
if (lp_equal(L, -2, -1)) { /* right group? */
|
||||
lua_pop(L, 2); /* remove reference name and group name */
|
||||
return cap;
|
||||
}
|
||||
else lua_pop(L, 1); /* remove group name */
|
||||
}
|
||||
}
|
||||
luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1));
|
||||
return NULL; /* to avoid warnings */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Back-reference capture. Return number of values pushed.
|
||||
*/
|
||||
static int backrefcap (CapState *cs) {
|
||||
int n;
|
||||
Capture *curr = cs->cap;
|
||||
pushluaval(cs); /* reference name */
|
||||
cs->cap = findback(cs, curr); /* find corresponding group */
|
||||
n = pushnestedvalues(cs, 0); /* push group's values */
|
||||
cs->cap = curr + 1;
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Table capture: creates a new table and populates it with nested
|
||||
** captures.
|
||||
*/
|
||||
static int tablecap (CapState *cs) {
|
||||
lua_State *L = cs->L;
|
||||
Capture *head = cs->cap++;
|
||||
int n = 0;
|
||||
lua_newtable(L);
|
||||
while (capinside(head, cs->cap)) {
|
||||
if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */
|
||||
pushluaval(cs); /* push group name */
|
||||
pushonenestedvalue(cs);
|
||||
lua_settable(L, -3);
|
||||
}
|
||||
else { /* not a named group */
|
||||
int i;
|
||||
int k = pushcapture(cs);
|
||||
for (i = k; i > 0; i--) /* store all values into table */
|
||||
lua_rawseti(L, -(i + 1), n + i);
|
||||
n += k;
|
||||
}
|
||||
}
|
||||
skipclose(cs, head);
|
||||
return 1; /* number of values pushed (only the table) */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Table-query capture
|
||||
*/
|
||||
static int querycap (CapState *cs) {
|
||||
int idx = cs->cap->idx;
|
||||
pushonenestedvalue(cs); /* get nested capture */
|
||||
lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */
|
||||
if (!lua_isnil(cs->L, -1))
|
||||
return 1;
|
||||
else { /* no value */
|
||||
lua_pop(cs->L, 1); /* remove nil */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Fold capture
|
||||
*/
|
||||
static int foldcap (CapState *cs) {
|
||||
int n;
|
||||
lua_State *L = cs->L;
|
||||
Capture *head = cs->cap++;
|
||||
int idx = head->idx;
|
||||
if (isclosecap(cs->cap) || /* no nested captures (large subject)? */
|
||||
(n = pushcapture(cs)) == 0) /* nested captures with no values? */
|
||||
return luaL_error(L, "no initial value for fold capture");
|
||||
if (n > 1)
|
||||
lua_pop(L, n - 1); /* leave only one result for accumulator */
|
||||
while (capinside(head, cs->cap)) {
|
||||
lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */
|
||||
lua_insert(L, -2); /* put it before accumulator */
|
||||
n = pushcapture(cs); /* get next capture's values */
|
||||
lua_call(L, n + 1, 1); /* call folding function */
|
||||
}
|
||||
skipclose(cs, head);
|
||||
return 1; /* only accumulator left on the stack */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Function capture
|
||||
*/
|
||||
static int functioncap (CapState *cs) {
|
||||
int n;
|
||||
int top = lua_gettop(cs->L);
|
||||
pushluaval(cs); /* push function */
|
||||
n = pushnestedvalues(cs, 0); /* push nested captures */
|
||||
lua_call(cs->L, n, LUA_MULTRET); /* call function */
|
||||
return lua_gettop(cs->L) - top; /* return function's results */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Accumulator capture
|
||||
*/
|
||||
static int accumulatorcap (CapState *cs) {
|
||||
lua_State *L = cs->L;
|
||||
int n;
|
||||
if (lua_gettop(L) < cs->firstcap)
|
||||
luaL_error(L, "no previous value for accumulator capture");
|
||||
pushluaval(cs); /* push function */
|
||||
lua_insert(L, -2); /* previous value becomes first argument */
|
||||
n = pushnestedvalues(cs, 0); /* push nested captures */
|
||||
lua_call(L, n + 1, 1); /* call function */
|
||||
return 0; /* did not add any extra value */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Select capture
|
||||
*/
|
||||
static int numcap (CapState *cs) {
|
||||
int idx = cs->cap->idx; /* value to select */
|
||||
if (idx == 0) { /* no values? */
|
||||
nextcap(cs); /* skip entire capture */
|
||||
return 0; /* no value produced */
|
||||
}
|
||||
else {
|
||||
int n = pushnestedvalues(cs, 0);
|
||||
if (n < idx) /* invalid index? */
|
||||
return luaL_error(cs->L, "no capture '%d'", idx);
|
||||
else {
|
||||
lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */
|
||||
lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */
|
||||
lua_pop(cs->L, n - 1); /* remove other captures */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Return the stack index of the first runtime capture in the given
|
||||
** list of captures (or zero if no runtime captures)
|
||||
*/
|
||||
int finddyncap (Capture *cap, Capture *last) {
|
||||
for (; cap < last; cap++) {
|
||||
if (cap->kind == Cruntime)
|
||||
return cap->idx; /* stack position of first capture */
|
||||
}
|
||||
return 0; /* no dynamic captures in this segment */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Calls a runtime capture. Returns number of captures "removed" by the
|
||||
** call, that is, those inside the group capture. Captures to be added
|
||||
** are on the Lua stack.
|
||||
*/
|
||||
int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
|
||||
int n, id;
|
||||
lua_State *L = cs->L;
|
||||
int otop = lua_gettop(L);
|
||||
Capture *open = findopen(close); /* get open group capture */
|
||||
assert(captype(open) == Cgroup);
|
||||
id = finddyncap(open, close); /* get first dynamic capture argument */
|
||||
close->kind = Cclose; /* closes the group */
|
||||
close->index = s - cs->s;
|
||||
cs->cap = open; cs->valuecached = 0; /* prepare capture state */
|
||||
luaL_checkstack(L, 4, "too many runtime captures");
|
||||
pushluaval(cs); /* push function to be called */
|
||||
lua_pushvalue(L, SUBJIDX); /* push original subject */
|
||||
lua_pushinteger(L, s - cs->s + 1); /* push current position */
|
||||
n = pushnestedvalues(cs, 0); /* push nested captures */
|
||||
lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */
|
||||
if (id > 0) { /* are there old dynamic captures to be removed? */
|
||||
int i;
|
||||
for (i = id; i <= otop; i++)
|
||||
lua_remove(L, id); /* remove old dynamic captures */
|
||||
*rem = otop - id + 1; /* total number of dynamic captures removed */
|
||||
}
|
||||
else
|
||||
*rem = 0; /* no dynamic captures removed */
|
||||
return close - open - 1; /* number of captures to be removed */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Auxiliary structure for substitution and string captures: keep
|
||||
** information about nested captures for future use, avoiding to push
|
||||
** string results into Lua
|
||||
*/
|
||||
typedef struct StrAux {
|
||||
int isstring; /* whether capture is a string */
|
||||
union {
|
||||
Capture *cp; /* if not a string, respective capture */
|
||||
struct { /* if it is a string... */
|
||||
Index_t idx; /* starts here */
|
||||
Index_t siz; /* with this size */
|
||||
} s;
|
||||
} u;
|
||||
} StrAux;
|
||||
|
||||
#define MAXSTRCAPS 10
|
||||
|
||||
/*
|
||||
** Collect values from current capture into array 'cps'. Current
|
||||
** capture must be Cstring (first call) or Csimple (recursive calls).
|
||||
** (In first call, fills %0 with whole match for Cstring.)
|
||||
** Returns number of elements in the array that were filled.
|
||||
*/
|
||||
static int getstrcaps (CapState *cs, StrAux *cps, int n) {
|
||||
int k = n++;
|
||||
Capture *head = cs->cap++;
|
||||
cps[k].isstring = 1; /* get string value */
|
||||
cps[k].u.s.idx = head->index; /* starts here */
|
||||
while (capinside(head, cs->cap)) {
|
||||
if (n >= MAXSTRCAPS) /* too many captures? */
|
||||
nextcap(cs); /* skip extra captures (will not need them) */
|
||||
else if (captype(cs->cap) == Csimple) /* string? */
|
||||
n = getstrcaps(cs, cps, n); /* put info. into array */
|
||||
else {
|
||||
cps[n].isstring = 0; /* not a string */
|
||||
cps[n].u.cp = cs->cap; /* keep original capture */
|
||||
nextcap(cs);
|
||||
n++;
|
||||
}
|
||||
}
|
||||
cps[k].u.s.siz = closesize(cs, head);
|
||||
skipclose(cs, head);
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** add next capture value (which should be a string) to buffer 'b'
|
||||
*/
|
||||
static int addonestring (luaL_Buffer *b, CapState *cs, const char *what);
|
||||
|
||||
|
||||
/*
|
||||
** String capture: add result to buffer 'b' (instead of pushing
|
||||
** it into the stack)
|
||||
*/
|
||||
static void stringcap (luaL_Buffer *b, CapState *cs) {
|
||||
StrAux cps[MAXSTRCAPS];
|
||||
int n;
|
||||
size_t len, i;
|
||||
const char *fmt; /* format string */
|
||||
fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len);
|
||||
n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */
|
||||
for (i = 0; i < len; i++) { /* traverse format string */
|
||||
if (fmt[i] != '%') /* not an escape? */
|
||||
luaL_addchar(b, fmt[i]); /* add it to buffer */
|
||||
else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */
|
||||
luaL_addchar(b, fmt[i]); /* add to buffer */
|
||||
else {
|
||||
int l = fmt[i] - '0'; /* capture index */
|
||||
if (l > n)
|
||||
luaL_error(cs->L, "invalid capture index (%d)", l);
|
||||
else if (cps[l].isstring)
|
||||
luaL_addlstring(b, cs->s + cps[l].u.s.idx, cps[l].u.s.siz);
|
||||
else {
|
||||
Capture *curr = cs->cap;
|
||||
cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */
|
||||
if (!addonestring(b, cs, "capture"))
|
||||
luaL_error(cs->L, "no values in capture index %d", l);
|
||||
cs->cap = curr; /* continue from where it stopped */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Substitution capture: add result to buffer 'b'
|
||||
*/
|
||||
static void substcap (luaL_Buffer *b, CapState *cs) {
|
||||
const char *curr = cs->s + cs->cap->index;
|
||||
Capture *head = cs->cap++;
|
||||
while (capinside(head, cs->cap)) {
|
||||
Capture *cap = cs->cap;
|
||||
const char *caps = cs->s + cap->index;
|
||||
luaL_addlstring(b, curr, caps - curr); /* add text up to capture */
|
||||
if (addonestring(b, cs, "replacement"))
|
||||
curr = caps + capsize(cap, cs->cap - 1); /* continue after match */
|
||||
else /* no capture value */
|
||||
curr = caps; /* keep original text in final result */
|
||||
}
|
||||
/* add last piece of text */
|
||||
luaL_addlstring(b, curr, cs->s + head->index + closesize(cs, head) - curr);
|
||||
skipclose(cs, head);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Evaluates a capture and adds its first value to buffer 'b'; returns
|
||||
** whether there was a value
|
||||
*/
|
||||
static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
|
||||
switch (captype(cs->cap)) {
|
||||
case Cstring:
|
||||
stringcap(b, cs); /* add capture directly to buffer */
|
||||
return 1;
|
||||
case Csubst:
|
||||
substcap(b, cs); /* add capture directly to buffer */
|
||||
return 1;
|
||||
case Cacc: /* accumulator capture? */
|
||||
return luaL_error(cs->L, "invalid context for an accumulator capture");
|
||||
default: {
|
||||
lua_State *L = cs->L;
|
||||
int n = pushcapture(cs);
|
||||
if (n > 0) {
|
||||
if (n > 1) lua_pop(L, n - 1); /* only one result */
|
||||
if (!lua_isstring(L, -1))
|
||||
return luaL_error(L, "invalid %s value (a %s)",
|
||||
what, luaL_typename(L, -1));
|
||||
luaL_addvalue(b);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if !defined(MAXRECLEVEL)
|
||||
#define MAXRECLEVEL 200
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
** Push all values of the current capture into the stack; returns
|
||||
** number of values pushed
|
||||
*/
|
||||
static int pushcapture (CapState *cs) {
|
||||
lua_State *L = cs->L;
|
||||
int res;
|
||||
luaL_checkstack(L, 4, "too many captures");
|
||||
if (cs->reclevel++ > MAXRECLEVEL)
|
||||
return luaL_error(L, "subcapture nesting too deep");
|
||||
switch (captype(cs->cap)) {
|
||||
case Cposition: {
|
||||
lua_pushinteger(L, cs->cap->index + 1);
|
||||
cs->cap++;
|
||||
res = 1;
|
||||
break;
|
||||
}
|
||||
case Cconst: {
|
||||
pushluaval(cs);
|
||||
cs->cap++;
|
||||
res = 1;
|
||||
break;
|
||||
}
|
||||
case Carg: {
|
||||
int arg = (cs->cap++)->idx;
|
||||
if (arg + FIXEDARGS > cs->ptop)
|
||||
return luaL_error(L, "reference to absent extra argument #%d", arg);
|
||||
lua_pushvalue(L, arg + FIXEDARGS);
|
||||
res = 1;
|
||||
break;
|
||||
}
|
||||
case Csimple: {
|
||||
int k = pushnestedvalues(cs, 1);
|
||||
lua_insert(L, -k); /* make whole match be first result */
|
||||
res = k;
|
||||
break;
|
||||
}
|
||||
case Cruntime: {
|
||||
lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */
|
||||
res = 1;
|
||||
break;
|
||||
}
|
||||
case Cstring: {
|
||||
luaL_Buffer b;
|
||||
luaL_buffinit(L, &b);
|
||||
stringcap(&b, cs);
|
||||
luaL_pushresult(&b);
|
||||
res = 1;
|
||||
break;
|
||||
}
|
||||
case Csubst: {
|
||||
luaL_Buffer b;
|
||||
luaL_buffinit(L, &b);
|
||||
substcap(&b, cs);
|
||||
luaL_pushresult(&b);
|
||||
res = 1;
|
||||
break;
|
||||
}
|
||||
case Cgroup: {
|
||||
if (cs->cap->idx == 0) /* anonymous group? */
|
||||
res = pushnestedvalues(cs, 0); /* add all nested values */
|
||||
else { /* named group: add no values */
|
||||
nextcap(cs); /* skip capture */
|
||||
res = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Cbackref: res = backrefcap(cs); break;
|
||||
case Ctable: res = tablecap(cs); break;
|
||||
case Cfunction: res = functioncap(cs); break;
|
||||
case Cacc: res = accumulatorcap(cs); break;
|
||||
case Cnum: res = numcap(cs); break;
|
||||
case Cquery: res = querycap(cs); break;
|
||||
case Cfold: res = foldcap(cs); break;
|
||||
default: assert(0); res = 0;
|
||||
}
|
||||
cs->reclevel--;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Prepare a CapState structure and traverse the entire list of
|
||||
** captures in the stack pushing its results. 's' is the subject
|
||||
** string, 'r' is the final position of the match, and 'ptop'
|
||||
** the index in the stack where some useful values were pushed.
|
||||
** Returns the number of results pushed. (If the list produces no
|
||||
** results, push the final position of the match.)
|
||||
*/
|
||||
int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
|
||||
Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop));
|
||||
int n = 0;
|
||||
/* printcaplist(capture); */
|
||||
if (!isclosecap(capture)) { /* is there any capture? */
|
||||
CapState cs;
|
||||
cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0;
|
||||
cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
|
||||
cs.firstcap = lua_gettop(L) + 1; /* where first value (if any) will go */
|
||||
do { /* collect their values */
|
||||
n += pushcapture(&cs);
|
||||
} while (!isclosecap(cs.cap));
|
||||
assert(lua_gettop(L) - cs.firstcap == n - 1);
|
||||
}
|
||||
if (n == 0) { /* no capture values? */
|
||||
lua_pushinteger(L, r - s + 1); /* return only end position */
|
||||
n = 1;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
86
src/lua/lpeg-1.1.0/lpcap.h
Normal file
86
src/lua/lpeg-1.1.0/lpcap.h
Normal file
@@ -0,0 +1,86 @@
|
||||
|
||||
#if !defined(lpcap_h)
|
||||
#define lpcap_h
|
||||
|
||||
|
||||
#include "lptypes.h"
|
||||
|
||||
|
||||
/* kinds of captures */
|
||||
typedef enum CapKind {
|
||||
Cclose, /* not used in trees */
|
||||
Cposition,
|
||||
Cconst, /* ktable[key] is Lua constant */
|
||||
Cbackref, /* ktable[key] is "name" of group to get capture */
|
||||
Carg, /* 'key' is arg's number */
|
||||
Csimple, /* next node is pattern */
|
||||
Ctable, /* next node is pattern */
|
||||
Cfunction, /* ktable[key] is function; next node is pattern */
|
||||
Cacc, /* ktable[key] is function; next node is pattern */
|
||||
Cquery, /* ktable[key] is table; next node is pattern */
|
||||
Cstring, /* ktable[key] is string; next node is pattern */
|
||||
Cnum, /* numbered capture; 'key' is number of value to return */
|
||||
Csubst, /* substitution capture; next node is pattern */
|
||||
Cfold, /* ktable[key] is function; next node is pattern */
|
||||
Cruntime, /* not used in trees (is uses another type for tree) */
|
||||
Cgroup /* ktable[key] is group's "name" */
|
||||
} CapKind;
|
||||
|
||||
|
||||
/*
|
||||
** An unsigned integer large enough to index any subject entirely.
|
||||
** It can be size_t, but that will double the size of the array
|
||||
** of captures in a 64-bit machine.
|
||||
*/
|
||||
#if !defined(Index_t)
|
||||
typedef uint Index_t;
|
||||
#endif
|
||||
|
||||
#define MAXINDT (~(Index_t)0)
|
||||
|
||||
|
||||
typedef struct Capture {
|
||||
Index_t index; /* subject position */
|
||||
unsigned short idx; /* extra info (group name, arg index, etc.) */
|
||||
byte kind; /* kind of capture */
|
||||
byte siz; /* size of full capture + 1 (0 = not a full capture) */
|
||||
} Capture;
|
||||
|
||||
|
||||
typedef struct CapState {
|
||||
Capture *cap; /* current capture */
|
||||
Capture *ocap; /* (original) capture list */
|
||||
lua_State *L;
|
||||
int ptop; /* stack index of last argument to 'match' */
|
||||
int firstcap; /* stack index of first capture pushed in the stack */
|
||||
const char *s; /* original string */
|
||||
int valuecached; /* value stored in cache slot */
|
||||
int reclevel; /* recursion level */
|
||||
} CapState;
|
||||
|
||||
|
||||
#define captype(cap) ((cap)->kind)
|
||||
|
||||
#define isclosecap(cap) (captype(cap) == Cclose)
|
||||
#define isopencap(cap) ((cap)->siz == 0)
|
||||
|
||||
/* true if c2 is (any number of levels) inside c1 */
|
||||
#define capinside(c1,c2) \
|
||||
(isopencap(c1) ? !isclosecap(c2) \
|
||||
: (c2)->index < (c1)->index + (c1)->siz - 1)
|
||||
|
||||
|
||||
/**
|
||||
** Maximum number of captures to visit when looking for an 'open'.
|
||||
*/
|
||||
#define MAXLOP 20
|
||||
|
||||
|
||||
|
||||
int runtimecap (CapState *cs, Capture *close, const char *s, int *rem);
|
||||
int getcaptures (lua_State *L, const char *s, const char *r, int ptop);
|
||||
int finddyncap (Capture *cap, Capture *last);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
1051
src/lua/lpeg-1.1.0/lpcode.c
Normal file
1051
src/lua/lpeg-1.1.0/lpcode.c
Normal file
File diff suppressed because it is too large
Load Diff
36
src/lua/lpeg-1.1.0/lpcode.h
Normal file
36
src/lua/lpeg-1.1.0/lpcode.h
Normal file
@@ -0,0 +1,36 @@
|
||||
|
||||
#if !defined(lpcode_h)
|
||||
#define lpcode_h
|
||||
|
||||
#include "lua.h"
|
||||
|
||||
#include "lptypes.h"
|
||||
#include "lptree.h"
|
||||
#include "lpvm.h"
|
||||
|
||||
int checkaux (TTree *tree, int pred);
|
||||
int fixedlen (TTree *tree);
|
||||
int hascaptures (TTree *tree);
|
||||
int lp_gc (lua_State *L);
|
||||
Instruction *compile (lua_State *L, Pattern *p, uint size);
|
||||
void freecode (lua_State *L, Pattern *p);
|
||||
int sizei (const Instruction *i);
|
||||
|
||||
|
||||
#define PEnullable 0
|
||||
#define PEnofail 1
|
||||
|
||||
/*
|
||||
** nofail(t) implies that 't' cannot fail with any input
|
||||
*/
|
||||
#define nofail(t) checkaux(t, PEnofail)
|
||||
|
||||
/*
|
||||
** (not nullable(t)) implies 't' cannot match without consuming
|
||||
** something
|
||||
*/
|
||||
#define nullable(t) checkaux(t, PEnullable)
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
110
src/lua/lpeg-1.1.0/lpcset.c
Normal file
110
src/lua/lpeg-1.1.0/lpcset.c
Normal file
@@ -0,0 +1,110 @@
|
||||
|
||||
#include "lptypes.h"
|
||||
#include "lpcset.h"
|
||||
|
||||
|
||||
/*
|
||||
** Add to 'c' the index of the (only) bit set in byte 'b'
|
||||
*/
|
||||
static int onlybit (int c, int b) {
|
||||
if ((b & 0xF0) != 0) { c += 4; b >>= 4; }
|
||||
if ((b & 0x0C) != 0) { c += 2; b >>= 2; }
|
||||
if ((b & 0x02) != 0) { c += 1; }
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Check whether a charset is empty (returns IFail), singleton (IChar),
|
||||
** full (IAny), or none of those (ISet). When singleton, 'info.offset'
|
||||
** returns which character it is. When generic set, 'info' returns
|
||||
** information about its range.
|
||||
*/
|
||||
Opcode charsettype (const byte *cs, charsetinfo *info) {
|
||||
int low0, low1, high0, high1;
|
||||
for (low1 = 0; low1 < CHARSETSIZE && cs[low1] == 0; low1++)
|
||||
/* find lowest byte with a 1-bit */;
|
||||
if (low1 == CHARSETSIZE)
|
||||
return IFail; /* no characters in set */
|
||||
for (high1 = CHARSETSIZE - 1; cs[high1] == 0; high1--)
|
||||
/* find highest byte with a 1-bit; low1 is a sentinel */;
|
||||
if (low1 == high1) { /* only one byte with 1-bits? */
|
||||
int b = cs[low1];
|
||||
if ((b & (b - 1)) == 0) { /* does byte has only one 1-bit? */
|
||||
info->offset = onlybit(low1 * BITSPERCHAR, b); /* get that bit */
|
||||
return IChar; /* single character */
|
||||
}
|
||||
}
|
||||
for (low0 = 0; low0 < CHARSETSIZE && cs[low0] == 0xFF; low0++)
|
||||
/* find lowest byte with a 0-bit */;
|
||||
if (low0 == CHARSETSIZE)
|
||||
return IAny; /* set has all bits set */
|
||||
for (high0 = CHARSETSIZE - 1; cs[high0] == 0xFF; high0--)
|
||||
/* find highest byte with a 0-bit; low0 is a sentinel */;
|
||||
if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */
|
||||
info->offset = low1;
|
||||
info->size = high1 - low1 + 1;
|
||||
info->deflt = 0; /* all discharged bits were 0 */
|
||||
}
|
||||
else {
|
||||
info->offset = low0;
|
||||
info->size = high0 - low0 + 1;
|
||||
info->deflt = 0xFF; /* all discharged bits were 1 */
|
||||
}
|
||||
info->cs = cs + info->offset;
|
||||
return ISet;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Get a byte from a compact charset. If index is inside the charset
|
||||
** range, get the byte from the supporting charset (correcting it
|
||||
** by the offset). Otherwise, return the default for the set.
|
||||
*/
|
||||
byte getbytefromcharset (const charsetinfo *info, int index) {
|
||||
if (index < info->size)
|
||||
return info->cs[index];
|
||||
else return info->deflt;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** If 'tree' is a 'char' pattern (TSet, TChar, TAny, TFalse), convert it
|
||||
** into a charset and return 1; else return 0.
|
||||
*/
|
||||
int tocharset (TTree *tree, Charset *cs) {
|
||||
switch (tree->tag) {
|
||||
case TChar: { /* only one char */
|
||||
assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX);
|
||||
clearset(cs->cs); /* erase all chars */
|
||||
setchar(cs->cs, tree->u.n); /* add that one */
|
||||
return 1;
|
||||
}
|
||||
case TAny: {
|
||||
fillset(cs->cs, 0xFF); /* add all characters to the set */
|
||||
return 1;
|
||||
}
|
||||
case TFalse: {
|
||||
clearset(cs->cs); /* empty set */
|
||||
return 1;
|
||||
}
|
||||
case TSet: { /* fill set */
|
||||
int i;
|
||||
fillset(cs->cs, tree->u.set.deflt);
|
||||
for (i = 0; i < tree->u.set.size; i++)
|
||||
cs->cs[tree->u.set.offset + i] = treebuffer(tree)[i];
|
||||
return 1;
|
||||
}
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void tree2cset (TTree *tree, charsetinfo *info) {
|
||||
assert(tree->tag == TSet);
|
||||
info->offset = tree->u.set.offset;
|
||||
info->size = tree->u.set.size;
|
||||
info->deflt = tree->u.set.deflt;
|
||||
info->cs = treebuffer(tree);
|
||||
}
|
||||
|
||||
30
src/lua/lpeg-1.1.0/lpcset.h
Normal file
30
src/lua/lpeg-1.1.0/lpcset.h
Normal file
@@ -0,0 +1,30 @@
|
||||
|
||||
#if !defined(lpset_h)
|
||||
#define lpset_h
|
||||
|
||||
#include "lpcset.h"
|
||||
#include "lpcode.h"
|
||||
#include "lptree.h"
|
||||
|
||||
|
||||
/*
|
||||
** Extra information for the result of 'charsettype'. When result is
|
||||
** IChar, 'offset' is the character. When result is ISet, 'cs' is the
|
||||
** supporting bit array (with offset included), 'offset' is the offset
|
||||
** (in bytes), 'size' is the size (in bytes), and 'delt' is the default
|
||||
** value for bytes outside the set.
|
||||
*/
|
||||
typedef struct {
|
||||
const byte *cs;
|
||||
int offset;
|
||||
int size;
|
||||
int deflt;
|
||||
} charsetinfo;
|
||||
|
||||
|
||||
int tocharset (TTree *tree, Charset *cs);
|
||||
Opcode charsettype (const byte *cs, charsetinfo *info);
|
||||
byte getbytefromcharset (const charsetinfo *info, int index);
|
||||
void tree2cset (TTree *tree, charsetinfo *info);
|
||||
|
||||
#endif
|
||||
BIN
src/lua/lpeg-1.1.0/lpeg-128.gif
Normal file
BIN
src/lua/lpeg-1.1.0/lpeg-128.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 4.8 KiB |
1430
src/lua/lpeg-1.1.0/lpeg.html
Normal file
1430
src/lua/lpeg-1.1.0/lpeg.html
Normal file
File diff suppressed because it is too large
Load Diff
298
src/lua/lpeg-1.1.0/lpprint.c
Normal file
298
src/lua/lpeg-1.1.0/lpprint.c
Normal file
@@ -0,0 +1,298 @@
|
||||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
#include "lptypes.h"
|
||||
#include "lpprint.h"
|
||||
#include "lpcode.h"
|
||||
|
||||
|
||||
#if defined(LPEG_DEBUG)
|
||||
|
||||
/*
|
||||
** {======================================================
|
||||
** Printing patterns (for debugging)
|
||||
** =======================================================
|
||||
*/
|
||||
|
||||
|
||||
void printcharset (const byte *st) {
|
||||
int i;
|
||||
printf("[");
|
||||
for (i = 0; i <= UCHAR_MAX; i++) {
|
||||
int first = i;
|
||||
while (i <= UCHAR_MAX && testchar(st, i)) i++;
|
||||
if (i - 1 == first) /* unary range? */
|
||||
printf("(%02x)", first);
|
||||
else if (i - 1 > first) /* non-empty range? */
|
||||
printf("(%02x-%02x)", first, i - 1);
|
||||
}
|
||||
printf("]");
|
||||
}
|
||||
|
||||
|
||||
static void printIcharset (const Instruction *inst, const byte *buff) {
|
||||
byte cs[CHARSETSIZE];
|
||||
int i;
|
||||
printf("(%02x-%d) ", inst->i.aux2.set.offset, inst->i.aux2.set.size);
|
||||
clearset(cs);
|
||||
for (i = 0; i < CHARSETSIZE * 8; i++) {
|
||||
if (charinset(inst, buff, i))
|
||||
setchar(cs, i);
|
||||
}
|
||||
printcharset(cs);
|
||||
}
|
||||
|
||||
|
||||
static void printTcharset (TTree *tree) {
|
||||
byte cs[CHARSETSIZE];
|
||||
int i;
|
||||
printf("(%02x-%d) ", tree->u.set.offset, tree->u.set.size);
|
||||
fillset(cs, tree->u.set.deflt);
|
||||
for (i = 0; i < tree->u.set.size; i++)
|
||||
cs[tree->u.set.offset + i] = treebuffer(tree)[i];
|
||||
printcharset(cs);
|
||||
}
|
||||
|
||||
|
||||
static const char *capkind (int kind) {
|
||||
const char *const modes[] = {
|
||||
"close", "position", "constant", "backref",
|
||||
"argument", "simple", "table", "function", "accumulator",
|
||||
"query", "string", "num", "substitution", "fold",
|
||||
"runtime", "group"};
|
||||
return modes[kind];
|
||||
}
|
||||
|
||||
|
||||
static void printjmp (const Instruction *op, const Instruction *p) {
|
||||
printf("-> %d", (int)(p + (p + 1)->offset - op));
|
||||
}
|
||||
|
||||
|
||||
void printinst (const Instruction *op, const Instruction *p) {
|
||||
const char *const names[] = {
|
||||
"any", "char", "set",
|
||||
"testany", "testchar", "testset",
|
||||
"span", "utf-range", "behind",
|
||||
"ret", "end",
|
||||
"choice", "jmp", "call", "open_call",
|
||||
"commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
|
||||
"fullcapture", "opencapture", "closecapture", "closeruntime",
|
||||
"--"
|
||||
};
|
||||
printf("%02ld: %s ", (long)(p - op), names[p->i.code]);
|
||||
switch ((Opcode)p->i.code) {
|
||||
case IChar: {
|
||||
printf("'%c' (%02x)", p->i.aux1, p->i.aux1);
|
||||
break;
|
||||
}
|
||||
case ITestChar: {
|
||||
printf("'%c' (%02x)", p->i.aux1, p->i.aux1); printjmp(op, p);
|
||||
break;
|
||||
}
|
||||
case IUTFR: {
|
||||
printf("%d - %d", p[1].offset, utf_to(p));
|
||||
break;
|
||||
}
|
||||
case IFullCapture: {
|
||||
printf("%s (size = %d) (idx = %d)",
|
||||
capkind(getkind(p)), getoff(p), p->i.aux2.key);
|
||||
break;
|
||||
}
|
||||
case IOpenCapture: {
|
||||
printf("%s (idx = %d)", capkind(getkind(p)), p->i.aux2.key);
|
||||
break;
|
||||
}
|
||||
case ISet: {
|
||||
printIcharset(p, (p+1)->buff);
|
||||
break;
|
||||
}
|
||||
case ITestSet: {
|
||||
printIcharset(p, (p+2)->buff); printjmp(op, p);
|
||||
break;
|
||||
}
|
||||
case ISpan: {
|
||||
printIcharset(p, (p+1)->buff);
|
||||
break;
|
||||
}
|
||||
case IOpenCall: {
|
||||
printf("-> %d", (p + 1)->offset);
|
||||
break;
|
||||
}
|
||||
case IBehind: {
|
||||
printf("%d", p->i.aux1);
|
||||
break;
|
||||
}
|
||||
case IJmp: case ICall: case ICommit: case IChoice:
|
||||
case IPartialCommit: case IBackCommit: case ITestAny: {
|
||||
printjmp(op, p);
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
void printpatt (Instruction *p) {
|
||||
Instruction *op = p;
|
||||
uint n = op[-1].codesize - 1;
|
||||
while (p < op + n) {
|
||||
printinst(op, p);
|
||||
p += sizei(p);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void printcap (Capture *cap, int ident) {
|
||||
while (ident--) printf(" ");
|
||||
printf("%s (idx: %d - size: %d) -> %lu (%p)\n",
|
||||
capkind(cap->kind), cap->idx, cap->siz, (long)cap->index, (void*)cap);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Print a capture and its nested captures
|
||||
*/
|
||||
static Capture *printcap2close (Capture *cap, int ident) {
|
||||
Capture *head = cap++;
|
||||
printcap(head, ident); /* print head capture */
|
||||
while (capinside(head, cap))
|
||||
cap = printcap2close(cap, ident + 2); /* print nested captures */
|
||||
if (isopencap(head)) {
|
||||
assert(isclosecap(cap));
|
||||
printcap(cap++, ident); /* print and skip close capture */
|
||||
}
|
||||
return cap;
|
||||
}
|
||||
|
||||
|
||||
void printcaplist (Capture *cap) {
|
||||
{ /* for debugging, print first a raw list of captures */
|
||||
Capture *c = cap;
|
||||
while (c->index != MAXINDT) { printcap(c, 0); c++; }
|
||||
}
|
||||
printf(">======\n");
|
||||
while (!isclosecap(cap))
|
||||
cap = printcap2close(cap, 0);
|
||||
printf("=======\n");
|
||||
}
|
||||
|
||||
/* }====================================================== */
|
||||
|
||||
|
||||
/*
|
||||
** {======================================================
|
||||
** Printing trees (for debugging)
|
||||
** =======================================================
|
||||
*/
|
||||
|
||||
static const char *tagnames[] = {
|
||||
"char", "set", "any",
|
||||
"true", "false", "utf8.range",
|
||||
"rep",
|
||||
"seq", "choice",
|
||||
"not", "and",
|
||||
"call", "opencall", "rule", "xinfo", "grammar",
|
||||
"behind",
|
||||
"capture", "run-time"
|
||||
};
|
||||
|
||||
|
||||
void printtree (TTree *tree, int ident) {
|
||||
int i;
|
||||
int sibs = numsiblings[tree->tag];
|
||||
for (i = 0; i < ident; i++) printf(" ");
|
||||
printf("%s", tagnames[tree->tag]);
|
||||
switch (tree->tag) {
|
||||
case TChar: {
|
||||
int c = tree->u.n;
|
||||
if (isprint(c))
|
||||
printf(" '%c'\n", c);
|
||||
else
|
||||
printf(" (%02X)\n", c);
|
||||
break;
|
||||
}
|
||||
case TSet: {
|
||||
printTcharset(tree);
|
||||
printf("\n");
|
||||
break;
|
||||
}
|
||||
case TUTFR: {
|
||||
assert(sib1(tree)->tag == TXInfo);
|
||||
printf(" %d (%02x %d) - %d (%02x %d) \n",
|
||||
tree->u.n, tree->key, tree->cap,
|
||||
sib1(tree)->u.n, sib1(tree)->key, sib1(tree)->cap);
|
||||
break;
|
||||
}
|
||||
case TOpenCall: case TCall: {
|
||||
assert(sib1(sib2(tree))->tag == TXInfo);
|
||||
printf(" key: %d (rule: %d)\n", tree->key, sib1(sib2(tree))->u.n);
|
||||
break;
|
||||
}
|
||||
case TBehind: {
|
||||
printf(" %d\n", tree->u.n);
|
||||
break;
|
||||
}
|
||||
case TCapture: {
|
||||
printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key);
|
||||
break;
|
||||
}
|
||||
case TRule: {
|
||||
printf(" key: %d\n", tree->key);
|
||||
sibs = 1; /* do not print 'sib2' (next rule) as a sibling */
|
||||
break;
|
||||
}
|
||||
case TXInfo: {
|
||||
printf(" n: %d\n", tree->u.n);
|
||||
break;
|
||||
}
|
||||
case TGrammar: {
|
||||
TTree *rule = sib1(tree);
|
||||
printf(" %d\n", tree->u.n); /* number of rules */
|
||||
for (i = 0; i < tree->u.n; i++) {
|
||||
printtree(rule, ident + 2);
|
||||
rule = sib2(rule);
|
||||
}
|
||||
assert(rule->tag == TTrue); /* sentinel */
|
||||
sibs = 0; /* siblings already handled */
|
||||
break;
|
||||
}
|
||||
default:
|
||||
printf("\n");
|
||||
break;
|
||||
}
|
||||
if (sibs >= 1) {
|
||||
printtree(sib1(tree), ident + 2);
|
||||
if (sibs >= 2)
|
||||
printtree(sib2(tree), ident + 2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void printktable (lua_State *L, int idx) {
|
||||
int n, i;
|
||||
lua_getuservalue(L, idx);
|
||||
if (lua_isnil(L, -1)) /* no ktable? */
|
||||
return;
|
||||
n = lua_rawlen(L, -1);
|
||||
printf("[");
|
||||
for (i = 1; i <= n; i++) {
|
||||
printf("%d = ", i);
|
||||
lua_rawgeti(L, -1, i);
|
||||
if (lua_isstring(L, -1))
|
||||
printf("%s ", lua_tostring(L, -1));
|
||||
else
|
||||
printf("%s ", lua_typename(L, lua_type(L, -1)));
|
||||
lua_pop(L, 1);
|
||||
}
|
||||
printf("]\n");
|
||||
/* leave ktable at the stack */
|
||||
}
|
||||
|
||||
/* }====================================================== */
|
||||
|
||||
#endif
|
||||
32
src/lua/lpeg-1.1.0/lpprint.h
Normal file
32
src/lua/lpeg-1.1.0/lpprint.h
Normal file
@@ -0,0 +1,32 @@
|
||||
|
||||
#if !defined(lpprint_h)
|
||||
#define lpprint_h
|
||||
|
||||
|
||||
#include "lptree.h"
|
||||
#include "lpvm.h"
|
||||
|
||||
|
||||
#if defined(LPEG_DEBUG)
|
||||
|
||||
void printpatt (Instruction *p);
|
||||
void printtree (TTree *tree, int ident);
|
||||
void printktable (lua_State *L, int idx);
|
||||
void printcharset (const byte *st);
|
||||
void printcaplist (Capture *cap);
|
||||
void printinst (const Instruction *op, const Instruction *p);
|
||||
|
||||
#else
|
||||
|
||||
#define printktable(L,idx) \
|
||||
luaL_error(L, "function only implemented in debug mode")
|
||||
#define printtree(tree,i) \
|
||||
luaL_error(L, "function only implemented in debug mode")
|
||||
#define printpatt(p) \
|
||||
luaL_error(L, "function only implemented in debug mode")
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
1399
src/lua/lpeg-1.1.0/lptree.c
Normal file
1399
src/lua/lpeg-1.1.0/lptree.c
Normal file
File diff suppressed because it is too large
Load Diff
92
src/lua/lpeg-1.1.0/lptree.h
Normal file
92
src/lua/lpeg-1.1.0/lptree.h
Normal file
@@ -0,0 +1,92 @@
|
||||
|
||||
#if !defined(lptree_h)
|
||||
#define lptree_h
|
||||
|
||||
|
||||
#include "lptypes.h"
|
||||
|
||||
|
||||
/*
|
||||
** types of trees
|
||||
*/
|
||||
typedef enum TTag {
|
||||
TChar = 0, /* 'n' = char */
|
||||
TSet, /* the set is encoded in 'u.set' and the next 'u.set.size' bytes */
|
||||
TAny,
|
||||
TTrue,
|
||||
TFalse,
|
||||
TUTFR, /* range of UTF-8 codepoints; 'n' has initial codepoint;
|
||||
'cap' has length; 'key' has first byte;
|
||||
extra info is similar for end codepoint */
|
||||
TRep, /* 'sib1'* */
|
||||
TSeq, /* 'sib1' 'sib2' */
|
||||
TChoice, /* 'sib1' / 'sib2' */
|
||||
TNot, /* !'sib1' */
|
||||
TAnd, /* &'sib1' */
|
||||
TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */
|
||||
TOpenCall, /* ktable[key] is rule's key */
|
||||
TRule, /* ktable[key] is rule's key (but key == 0 for unused rules);
|
||||
'sib1' is rule's pattern pre-rule; 'sib2' is next rule;
|
||||
extra info 'n' is rule's sequential number */
|
||||
TXInfo, /* extra info */
|
||||
TGrammar, /* 'sib1' is initial (and first) rule */
|
||||
TBehind, /* 'sib1' is pattern, 'n' is how much to go back */
|
||||
TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind');
|
||||
ktable[key] is Lua value associated with capture;
|
||||
'sib1' is capture body */
|
||||
TRunTime /* run-time capture: 'key' is Lua function;
|
||||
'sib1' is capture body */
|
||||
} TTag;
|
||||
|
||||
|
||||
/*
|
||||
** Tree trees
|
||||
** The first child of a tree (if there is one) is immediately after
|
||||
** the tree. A reference to a second child (ps) is its position
|
||||
** relative to the position of the tree itself.
|
||||
*/
|
||||
typedef struct TTree {
|
||||
byte tag;
|
||||
byte cap; /* kind of capture (if it is a capture) */
|
||||
unsigned short key; /* key in ktable for Lua data (0 if no key) */
|
||||
union {
|
||||
int ps; /* occasional second child */
|
||||
int n; /* occasional counter */
|
||||
struct {
|
||||
byte offset; /* compact set offset (in bytes) */
|
||||
byte size; /* compact set size (in bytes) */
|
||||
byte deflt; /* default value */
|
||||
byte bitmap[1]; /* bitmap (open array) */
|
||||
} set; /* for compact sets */
|
||||
} u;
|
||||
} TTree;
|
||||
|
||||
|
||||
/* access to charset */
|
||||
#define treebuffer(t) ((t)->u.set.bitmap)
|
||||
|
||||
|
||||
/*
|
||||
** A complete pattern has its tree plus, if already compiled,
|
||||
** its corresponding code
|
||||
*/
|
||||
typedef struct Pattern {
|
||||
union Instruction *code;
|
||||
TTree tree[1];
|
||||
} Pattern;
|
||||
|
||||
|
||||
/* number of children for each tree */
|
||||
extern const byte numsiblings[];
|
||||
|
||||
/* access to children */
|
||||
#define sib1(t) ((t) + 1)
|
||||
#define sib2(t) ((t) + (t)->u.ps)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
150
src/lua/lpeg-1.1.0/lptypes.h
Normal file
150
src/lua/lpeg-1.1.0/lptypes.h
Normal file
@@ -0,0 +1,150 @@
|
||||
/*
|
||||
** LPeg - PEG pattern matching for Lua
|
||||
** Copyright 2007-2023, Lua.org & PUC-Rio (see 'lpeg.html' for license)
|
||||
** written by Roberto Ierusalimschy
|
||||
*/
|
||||
|
||||
#if !defined(lptypes_h)
|
||||
#define lptypes_h
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "lua.h"
|
||||
|
||||
|
||||
#define VERSION "1.1.0"
|
||||
|
||||
|
||||
#define PATTERN_T "lpeg-pattern"
|
||||
#define MAXSTACKIDX "lpeg-maxstack"
|
||||
|
||||
|
||||
/*
|
||||
** compatibility with Lua 5.1
|
||||
*/
|
||||
#if (LUA_VERSION_NUM == 501)
|
||||
|
||||
#define lp_equal lua_equal
|
||||
|
||||
#define lua_getuservalue lua_getfenv
|
||||
#define lua_setuservalue lua_setfenv
|
||||
|
||||
#define lua_rawlen lua_objlen
|
||||
|
||||
#define luaL_setfuncs(L,f,n) luaL_register(L,NULL,f)
|
||||
#define luaL_newlib(L,f) luaL_register(L,"lpeg",f)
|
||||
|
||||
typedef size_t lua_Unsigned;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(lp_equal)
|
||||
#define lp_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ)
|
||||
#endif
|
||||
|
||||
|
||||
/* default maximum size for call/backtrack stack */
|
||||
#if !defined(MAXBACK)
|
||||
#define MAXBACK 400
|
||||
#endif
|
||||
|
||||
|
||||
/* maximum number of rules in a grammar (limited by 'unsigned short') */
|
||||
#if !defined(MAXRULES)
|
||||
#define MAXRULES 1000
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* initial size for capture's list */
|
||||
#define INITCAPSIZE 32
|
||||
|
||||
|
||||
/* index, on Lua stack, for subject */
|
||||
#define SUBJIDX 2
|
||||
|
||||
/* number of fixed arguments to 'match' (before capture arguments) */
|
||||
#define FIXEDARGS 3
|
||||
|
||||
/* index, on Lua stack, for capture list */
|
||||
#define caplistidx(ptop) ((ptop) + 2)
|
||||
|
||||
/* index, on Lua stack, for pattern's ktable */
|
||||
#define ktableidx(ptop) ((ptop) + 3)
|
||||
|
||||
/* index, on Lua stack, for backtracking stack */
|
||||
#define stackidx(ptop) ((ptop) + 4)
|
||||
|
||||
|
||||
|
||||
typedef unsigned char byte;
|
||||
|
||||
typedef unsigned int uint;
|
||||
|
||||
|
||||
#define BITSPERCHAR 8
|
||||
|
||||
#define CHARSETSIZE ((UCHAR_MAX/BITSPERCHAR) + 1)
|
||||
|
||||
|
||||
|
||||
typedef struct Charset {
|
||||
byte cs[CHARSETSIZE];
|
||||
} Charset;
|
||||
|
||||
|
||||
|
||||
#define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} }
|
||||
|
||||
#define fillset(s,c) memset(s,c,CHARSETSIZE)
|
||||
#define clearset(s) fillset(s,0)
|
||||
|
||||
/* number of slots needed for 'n' bytes */
|
||||
#define bytes2slots(n) (((n) - 1u) / (uint)sizeof(TTree) + 1u)
|
||||
|
||||
/* set 'b' bit in charset 'cs' */
|
||||
#define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7)))
|
||||
|
||||
|
||||
/*
|
||||
** in capture instructions, 'kind' of capture and its offset are
|
||||
** packed in field 'aux', 4 bits for each
|
||||
*/
|
||||
#define getkind(op) ((op)->i.aux1 & 0xF)
|
||||
#define getoff(op) (((op)->i.aux1 >> 4) & 0xF)
|
||||
#define joinkindoff(k,o) ((k) | ((o) << 4))
|
||||
|
||||
#define MAXOFF 0xF
|
||||
#define MAXAUX 0xFF
|
||||
|
||||
|
||||
/* maximum number of bytes to look behind */
|
||||
#define MAXBEHIND MAXAUX
|
||||
|
||||
|
||||
/* maximum size (in elements) for a pattern */
|
||||
#define MAXPATTSIZE (SHRT_MAX - 10)
|
||||
|
||||
|
||||
/* size (in instructions) for l bytes (l > 0) */
|
||||
#define instsize(l) ((int)(((l) + (uint)sizeof(Instruction) - 1u) \
|
||||
/ (uint)sizeof(Instruction)))
|
||||
|
||||
|
||||
/* size (in elements) for a ISet instruction */
|
||||
#define CHARSETINSTSIZE (1 + instsize(CHARSETSIZE))
|
||||
|
||||
/* size (in elements) for a IFunc instruction */
|
||||
#define funcinstsize(p) ((p)->i.aux + 2)
|
||||
|
||||
|
||||
|
||||
#define testchar(st,c) ((((uint)(st)[((c) >> 3)]) >> ((c) & 7)) & 1)
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
455
src/lua/lpeg-1.1.0/lpvm.c
Normal file
455
src/lua/lpeg-1.1.0/lpvm.c
Normal file
@@ -0,0 +1,455 @@
|
||||
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
#include "lua.h"
|
||||
#include "lauxlib.h"
|
||||
|
||||
#include "lpcap.h"
|
||||
#include "lptypes.h"
|
||||
#include "lpvm.h"
|
||||
#include "lpprint.h"
|
||||
|
||||
|
||||
/* initial size for call/backtrack stack */
|
||||
#if !defined(INITBACK)
|
||||
#define INITBACK MAXBACK
|
||||
#endif
|
||||
|
||||
|
||||
#define getoffset(p) (((p) + 1)->offset)
|
||||
|
||||
static const Instruction giveup = {{IGiveup, 0, {0}}};
|
||||
|
||||
|
||||
int charinset (const Instruction *i, const byte *buff, uint c) {
|
||||
c -= i->i.aux2.set.offset;
|
||||
if (c >= ((uint)i->i.aux2.set.size /* size in instructions... */
|
||||
* (uint)sizeof(Instruction) /* in bytes... */
|
||||
* 8u)) /* in bits */
|
||||
return i->i.aux1; /* out of range; return default value */
|
||||
return testchar(buff, c);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Decode one UTF-8 sequence, returning NULL if byte sequence is invalid.
|
||||
*/
|
||||
static const char *utf8_decode (const char *o, int *val) {
|
||||
static const uint limits[] = {0xFF, 0x7F, 0x7FF, 0xFFFFu};
|
||||
const unsigned char *s = (const unsigned char *)o;
|
||||
uint c = s[0]; /* first byte */
|
||||
uint res = 0; /* final result */
|
||||
if (c < 0x80) /* ascii? */
|
||||
res = c;
|
||||
else {
|
||||
int count = 0; /* to count number of continuation bytes */
|
||||
while (c & 0x40) { /* still have continuation bytes? */
|
||||
int cc = s[++count]; /* read next byte */
|
||||
if ((cc & 0xC0) != 0x80) /* not a continuation byte? */
|
||||
return NULL; /* invalid byte sequence */
|
||||
res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */
|
||||
c <<= 1; /* to test next bit */
|
||||
}
|
||||
res |= (c & 0x7F) << (count * 5); /* add first byte */
|
||||
if (count > 3 || res > 0x10FFFFu || res <= limits[count])
|
||||
return NULL; /* invalid byte sequence */
|
||||
s += count; /* skip continuation bytes read */
|
||||
}
|
||||
*val = res;
|
||||
return (const char *)s + 1; /* +1 to include first byte */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** {======================================================
|
||||
** Virtual Machine
|
||||
** =======================================================
|
||||
*/
|
||||
|
||||
|
||||
typedef struct Stack {
|
||||
const char *s; /* saved position (or NULL for calls) */
|
||||
const Instruction *p; /* next instruction */
|
||||
int caplevel;
|
||||
} Stack;
|
||||
|
||||
|
||||
#define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop)))
|
||||
|
||||
|
||||
/*
|
||||
** Ensures the size of array 'capture' (with size '*capsize' and
|
||||
** 'captop' elements being used) is enough to accomodate 'n' extra
|
||||
** elements plus one. (Because several opcodes add stuff to the capture
|
||||
** array, it is simpler to ensure the array always has at least one free
|
||||
** slot upfront and check its size later.)
|
||||
*/
|
||||
|
||||
/* new size in number of elements cannot overflow integers, and new
|
||||
size in bytes cannot overflow size_t. */
|
||||
#define MAXNEWSIZE \
|
||||
(((size_t)INT_MAX) <= (~(size_t)0 / sizeof(Capture)) ? \
|
||||
((size_t)INT_MAX) : (~(size_t)0 / sizeof(Capture)))
|
||||
|
||||
static Capture *growcap (lua_State *L, Capture *capture, int *capsize,
|
||||
int captop, int n, int ptop) {
|
||||
if (*capsize - captop > n)
|
||||
return capture; /* no need to grow array */
|
||||
else { /* must grow */
|
||||
Capture *newc;
|
||||
uint newsize = captop + n + 1; /* minimum size needed */
|
||||
if (newsize < (MAXNEWSIZE / 3) * 2)
|
||||
newsize += newsize / 2; /* 1.5 that size, if not too big */
|
||||
else if (newsize < (MAXNEWSIZE / 9) * 8)
|
||||
newsize += newsize / 8; /* else, try 9/8 that size */
|
||||
else
|
||||
luaL_error(L, "too many captures");
|
||||
newc = (Capture *)lua_newuserdata(L, newsize * sizeof(Capture));
|
||||
memcpy(newc, capture, captop * sizeof(Capture));
|
||||
*capsize = newsize;
|
||||
lua_replace(L, caplistidx(ptop));
|
||||
return newc;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Double the size of the stack
|
||||
*/
|
||||
static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
|
||||
Stack *stack = getstackbase(L, ptop);
|
||||
Stack *newstack;
|
||||
int n = *stacklimit - stack; /* current stack size */
|
||||
int max, newn;
|
||||
lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
|
||||
max = lua_tointeger(L, -1); /* maximum allowed size */
|
||||
lua_pop(L, 1);
|
||||
if (n >= max) /* already at maximum size? */
|
||||
luaL_error(L, "backtrack stack overflow (current limit is %d)", max);
|
||||
newn = 2 * n; /* new size */
|
||||
if (newn > max) newn = max;
|
||||
newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack));
|
||||
memcpy(newstack, stack, n * sizeof(Stack));
|
||||
lua_replace(L, stackidx(ptop));
|
||||
*stacklimit = newstack + newn;
|
||||
return newstack + n; /* return next position */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Interpret the result of a dynamic capture: false -> fail;
|
||||
** true -> keep current position; number -> next position.
|
||||
** Return new subject position. 'fr' is stack index where
|
||||
** is the result; 'curr' is current subject position; 'limit'
|
||||
** is subject's size.
|
||||
*/
|
||||
static int resdyncaptures (lua_State *L, int fr, int curr, int limit) {
|
||||
lua_Integer res;
|
||||
if (!lua_toboolean(L, fr)) { /* false value? */
|
||||
lua_settop(L, fr - 1); /* remove results */
|
||||
return -1; /* and fail */
|
||||
}
|
||||
else if (lua_isboolean(L, fr)) /* true? */
|
||||
res = curr; /* keep current position */
|
||||
else {
|
||||
res = lua_tointeger(L, fr) - 1; /* new position */
|
||||
if (res < curr || res > limit)
|
||||
luaL_error(L, "invalid position returned by match-time capture");
|
||||
}
|
||||
lua_remove(L, fr); /* remove first result (offset) */
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Add capture values returned by a dynamic capture to the list
|
||||
** 'capture', nested inside a group. 'fd' indexes the first capture
|
||||
** value, 'n' is the number of values (at least 1). The open group
|
||||
** capture is already in 'capture', before the place for the new entries.
|
||||
*/
|
||||
static void adddyncaptures (Index_t index, Capture *capture, int n, int fd) {
|
||||
int i;
|
||||
assert(capture[-1].kind == Cgroup && capture[-1].siz == 0);
|
||||
capture[-1].idx = 0; /* make group capture an anonymous group */
|
||||
for (i = 0; i < n; i++) { /* add runtime captures */
|
||||
capture[i].kind = Cruntime;
|
||||
capture[i].siz = 1; /* mark it as closed */
|
||||
capture[i].idx = fd + i; /* stack index of capture value */
|
||||
capture[i].index = index;
|
||||
}
|
||||
capture[n].kind = Cclose; /* close group */
|
||||
capture[n].siz = 1;
|
||||
capture[n].index = index;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Remove dynamic captures from the Lua stack (called in case of failure)
|
||||
*/
|
||||
static int removedyncap (lua_State *L, Capture *capture,
|
||||
int level, int last) {
|
||||
int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */
|
||||
int top = lua_gettop(L);
|
||||
if (id == 0) return 0; /* no dynamic captures? */
|
||||
lua_settop(L, id - 1); /* remove captures */
|
||||
return top - id + 1; /* number of values removed */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Find the corresponding 'open' capture before 'cap', when that capture
|
||||
** can become a full capture. If a full capture c1 is followed by an
|
||||
** empty capture c2, there is no way to know whether c2 is inside
|
||||
** c1. So, full captures can enclose only captures that start *before*
|
||||
** its end.
|
||||
*/
|
||||
static Capture *findopen (Capture *cap, Index_t currindex) {
|
||||
int i;
|
||||
cap--; /* check last capture */
|
||||
/* Must it be inside current one, but starts where current one ends? */
|
||||
if (!isopencap(cap) && cap->index == currindex)
|
||||
return NULL; /* current one cannot be a full capture */
|
||||
/* else, look for an 'open' capture */
|
||||
for (i = 0; i < MAXLOP; i++, cap--) {
|
||||
if (currindex - cap->index >= UCHAR_MAX)
|
||||
return NULL; /* capture too long for a full capture */
|
||||
else if (isopencap(cap)) /* open capture? */
|
||||
return cap; /* that's the one to be closed */
|
||||
else if (cap->kind == Cclose)
|
||||
return NULL; /* a full capture should not nest a non-full one */
|
||||
}
|
||||
return NULL; /* not found within allowed search limit */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Opcode interpreter
|
||||
*/
|
||||
const char *match (lua_State *L, const char *o, const char *s, const char *e,
|
||||
Instruction *op, Capture *capture, int ptop) {
|
||||
Stack stackbase[INITBACK];
|
||||
Stack *stacklimit = stackbase + INITBACK;
|
||||
Stack *stack = stackbase; /* point to first empty slot in stack */
|
||||
int capsize = INITCAPSIZE;
|
||||
int captop = 0; /* point to first empty slot in captures */
|
||||
int ndyncap = 0; /* number of dynamic captures (in Lua stack) */
|
||||
const Instruction *p = op; /* current instruction */
|
||||
stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++;
|
||||
lua_pushlightuserdata(L, stackbase);
|
||||
for (;;) {
|
||||
#if defined(DEBUG)
|
||||
printf("-------------------------------------\n");
|
||||
printcaplist(capture, capture + captop);
|
||||
printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ",
|
||||
s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop);
|
||||
printinst(op, p);
|
||||
#endif
|
||||
assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop);
|
||||
switch ((Opcode)p->i.code) {
|
||||
case IEnd: {
|
||||
assert(stack == getstackbase(L, ptop) + 1);
|
||||
capture[captop].kind = Cclose;
|
||||
capture[captop].index = MAXINDT;
|
||||
return s;
|
||||
}
|
||||
case IGiveup: {
|
||||
assert(stack == getstackbase(L, ptop));
|
||||
return NULL;
|
||||
}
|
||||
case IRet: {
|
||||
assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL);
|
||||
p = (--stack)->p;
|
||||
continue;
|
||||
}
|
||||
case IAny: {
|
||||
if (s < e) { p++; s++; }
|
||||
else goto fail;
|
||||
continue;
|
||||
}
|
||||
case IUTFR: {
|
||||
int codepoint;
|
||||
if (s >= e)
|
||||
goto fail;
|
||||
s = utf8_decode (s, &codepoint);
|
||||
if (s && p[1].offset <= codepoint && codepoint <= utf_to(p))
|
||||
p += 2;
|
||||
else
|
||||
goto fail;
|
||||
continue;
|
||||
}
|
||||
case ITestAny: {
|
||||
if (s < e) p += 2;
|
||||
else p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case IChar: {
|
||||
if ((byte)*s == p->i.aux1 && s < e) { p++; s++; }
|
||||
else goto fail;
|
||||
continue;
|
||||
}
|
||||
case ITestChar: {
|
||||
if ((byte)*s == p->i.aux1 && s < e) p += 2;
|
||||
else p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case ISet: {
|
||||
uint c = (byte)*s;
|
||||
if (charinset(p, (p+1)->buff, c) && s < e)
|
||||
{ p += 1 + p->i.aux2.set.size; s++; }
|
||||
else goto fail;
|
||||
continue;
|
||||
}
|
||||
case ITestSet: {
|
||||
uint c = (byte)*s;
|
||||
if (charinset(p, (p + 2)->buff, c) && s < e)
|
||||
p += 2 + p->i.aux2.set.size;
|
||||
else p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case IBehind: {
|
||||
int n = p->i.aux1;
|
||||
if (n > s - o) goto fail;
|
||||
s -= n; p++;
|
||||
continue;
|
||||
}
|
||||
case ISpan: {
|
||||
for (; s < e; s++) {
|
||||
uint c = (byte)*s;
|
||||
if (!charinset(p, (p+1)->buff, c)) break;
|
||||
}
|
||||
p += 1 + p->i.aux2.set.size;
|
||||
continue;
|
||||
}
|
||||
case IJmp: {
|
||||
p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case IChoice: {
|
||||
if (stack == stacklimit)
|
||||
stack = doublestack(L, &stacklimit, ptop);
|
||||
stack->p = p + getoffset(p);
|
||||
stack->s = s;
|
||||
stack->caplevel = captop;
|
||||
stack++;
|
||||
p += 2;
|
||||
continue;
|
||||
}
|
||||
case ICall: {
|
||||
if (stack == stacklimit)
|
||||
stack = doublestack(L, &stacklimit, ptop);
|
||||
stack->s = NULL;
|
||||
stack->p = p + 2; /* save return address */
|
||||
stack++;
|
||||
p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case ICommit: {
|
||||
assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
|
||||
stack--;
|
||||
p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case IPartialCommit: {
|
||||
assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
|
||||
(stack - 1)->s = s;
|
||||
(stack - 1)->caplevel = captop;
|
||||
p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case IBackCommit: {
|
||||
assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
|
||||
s = (--stack)->s;
|
||||
if (ndyncap > 0) /* are there matchtime captures? */
|
||||
ndyncap -= removedyncap(L, capture, stack->caplevel, captop);
|
||||
captop = stack->caplevel;
|
||||
p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case IFailTwice:
|
||||
assert(stack > getstackbase(L, ptop));
|
||||
stack--;
|
||||
/* FALLTHROUGH */
|
||||
case IFail:
|
||||
fail: { /* pattern failed: try to backtrack */
|
||||
do { /* remove pending calls */
|
||||
assert(stack > getstackbase(L, ptop));
|
||||
s = (--stack)->s;
|
||||
} while (s == NULL);
|
||||
if (ndyncap > 0) /* is there matchtime captures? */
|
||||
ndyncap -= removedyncap(L, capture, stack->caplevel, captop);
|
||||
captop = stack->caplevel;
|
||||
p = stack->p;
|
||||
#if defined(DEBUG)
|
||||
printf("**FAIL**\n");
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
case ICloseRunTime: {
|
||||
CapState cs;
|
||||
int rem, res, n;
|
||||
int fr = lua_gettop(L) + 1; /* stack index of first result */
|
||||
cs.reclevel = 0; cs.L = L;
|
||||
cs.s = o; cs.ocap = capture; cs.ptop = ptop;
|
||||
n = runtimecap(&cs, capture + captop, s, &rem); /* call function */
|
||||
captop -= n; /* remove nested captures */
|
||||
ndyncap -= rem; /* update number of dynamic captures */
|
||||
fr -= rem; /* 'rem' items were popped from Lua stack */
|
||||
res = resdyncaptures(L, fr, s - o, e - o); /* get result */
|
||||
if (res == -1) /* fail? */
|
||||
goto fail;
|
||||
s = o + res; /* else update current position */
|
||||
n = lua_gettop(L) - fr + 1; /* number of new captures */
|
||||
ndyncap += n; /* update number of dynamic captures */
|
||||
if (n == 0) /* no new captures? */
|
||||
captop--; /* remove open group */
|
||||
else { /* new captures; keep original open group */
|
||||
if (fr + n >= SHRT_MAX)
|
||||
luaL_error(L, "too many results in match-time capture");
|
||||
/* add new captures + close group to 'capture' list */
|
||||
capture = growcap(L, capture, &capsize, captop, n + 1, ptop);
|
||||
adddyncaptures(s - o, capture + captop, n, fr);
|
||||
captop += n + 1; /* new captures + close group */
|
||||
}
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
case ICloseCapture: {
|
||||
Capture *open = findopen(capture + captop, s - o);
|
||||
assert(captop > 0);
|
||||
if (open) { /* if possible, turn capture into a full capture */
|
||||
open->siz = (s - o) - open->index + 1;
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
else { /* must create a close capture */
|
||||
capture[captop].siz = 1; /* mark entry as closed */
|
||||
capture[captop].index = s - o;
|
||||
goto pushcapture;
|
||||
}
|
||||
}
|
||||
case IOpenCapture:
|
||||
capture[captop].siz = 0; /* mark entry as open */
|
||||
capture[captop].index = s - o;
|
||||
goto pushcapture;
|
||||
case IFullCapture:
|
||||
capture[captop].siz = getoff(p) + 1; /* save capture size */
|
||||
capture[captop].index = s - o - getoff(p);
|
||||
/* goto pushcapture; */
|
||||
pushcapture: {
|
||||
capture[captop].idx = p->i.aux2.key;
|
||||
capture[captop].kind = getkind(p);
|
||||
captop++;
|
||||
capture = growcap(L, capture, &capsize, captop, 0, ptop);
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
default: assert(0); return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* }====================================================== */
|
||||
|
||||
|
||||
79
src/lua/lpeg-1.1.0/lpvm.h
Normal file
79
src/lua/lpeg-1.1.0/lpvm.h
Normal file
@@ -0,0 +1,79 @@
|
||||
|
||||
#if !defined(lpvm_h)
|
||||
#define lpvm_h
|
||||
|
||||
#include "lpcap.h"
|
||||
|
||||
|
||||
/*
|
||||
** About Character sets in instructions: a set is a bit map with an
|
||||
** initial offset, in bits, and a size, in number of instructions.
|
||||
** aux1 has the default value for the bits outsize that range.
|
||||
*/
|
||||
|
||||
|
||||
/* Virtual Machine's instructions */
|
||||
typedef enum Opcode {
|
||||
IAny, /* if no char, fail */
|
||||
IChar, /* if char != aux1, fail */
|
||||
ISet, /* if char not in set, fail */
|
||||
ITestAny, /* in no char, jump to 'offset' */
|
||||
ITestChar, /* if char != aux1, jump to 'offset' */
|
||||
ITestSet, /* if char not in set, jump to 'offset' */
|
||||
ISpan, /* read a span of chars in set */
|
||||
IUTFR, /* if codepoint not in range [offset, utf_to], fail */
|
||||
IBehind, /* walk back 'aux1' characters (fail if not possible) */
|
||||
IRet, /* return from a rule */
|
||||
IEnd, /* end of pattern */
|
||||
IChoice, /* stack a choice; next fail will jump to 'offset' */
|
||||
IJmp, /* jump to 'offset' */
|
||||
ICall, /* call rule at 'offset' */
|
||||
IOpenCall, /* call rule number 'key' (must be closed to a ICall) */
|
||||
ICommit, /* pop choice and jump to 'offset' */
|
||||
IPartialCommit, /* update top choice to current position and jump */
|
||||
IBackCommit, /* backtrack like "fail" but jump to its own 'offset' */
|
||||
IFailTwice, /* pop one choice and then fail */
|
||||
IFail, /* go back to saved state on choice and jump to saved offset */
|
||||
IGiveup, /* internal use */
|
||||
IFullCapture, /* complete capture of last 'off' chars */
|
||||
IOpenCapture, /* start a capture */
|
||||
ICloseCapture,
|
||||
ICloseRunTime,
|
||||
IEmpty /* to fill empty slots left by optimizations */
|
||||
} Opcode;
|
||||
|
||||
|
||||
/*
|
||||
** All array of instructions has a 'codesize' as its first element
|
||||
** and is referred by a pointer to its second element, which is the
|
||||
** first actual opcode.
|
||||
*/
|
||||
typedef union Instruction {
|
||||
struct Inst {
|
||||
byte code;
|
||||
byte aux1;
|
||||
union {
|
||||
short key;
|
||||
struct {
|
||||
byte offset;
|
||||
byte size;
|
||||
} set;
|
||||
} aux2;
|
||||
} i;
|
||||
int offset;
|
||||
uint codesize;
|
||||
byte buff[1];
|
||||
} Instruction;
|
||||
|
||||
|
||||
/* extract 24-bit value from an instruction */
|
||||
#define utf_to(inst) (((inst)->i.aux2.key << 8) | (inst)->i.aux1)
|
||||
|
||||
|
||||
int charinset (const Instruction *i, const byte *buff, uint c);
|
||||
const char *match (lua_State *L, const char *o, const char *s, const char *e,
|
||||
Instruction *op, Capture *capture, int ptop);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
59
src/lua/lpeg-1.1.0/makefile
Normal file
59
src/lua/lpeg-1.1.0/makefile
Normal file
@@ -0,0 +1,59 @@
|
||||
LIBNAME = lpeg
|
||||
LUADIR = ./lua/
|
||||
|
||||
COPT = -O2 -DNDEBUG
|
||||
# COPT = -O0 -DLPEG_DEBUG -g
|
||||
|
||||
CWARNS = -Wall -Wextra -pedantic \
|
||||
-Waggregate-return \
|
||||
-Wcast-align \
|
||||
-Wcast-qual \
|
||||
-Wdisabled-optimization \
|
||||
-Wpointer-arith \
|
||||
-Wshadow \
|
||||
-Wredundant-decls \
|
||||
-Wsign-compare \
|
||||
-Wundef \
|
||||
-Wwrite-strings \
|
||||
-Wbad-function-cast \
|
||||
-Wdeclaration-after-statement \
|
||||
-Wmissing-prototypes \
|
||||
-Wmissing-declarations \
|
||||
-Wnested-externs \
|
||||
-Wstrict-prototypes \
|
||||
-Wc++-compat \
|
||||
# -Wunreachable-code \
|
||||
|
||||
|
||||
CFLAGS = $(CWARNS) $(COPT) -std=c99 -I$(LUADIR) -fPIC
|
||||
CC = gcc
|
||||
|
||||
FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o lpcset.o
|
||||
|
||||
# For Linux
|
||||
linux:
|
||||
$(MAKE) lpeg.so "DLLFLAGS = -shared -fPIC"
|
||||
|
||||
# For Mac OS
|
||||
macosx:
|
||||
$(MAKE) lpeg.so "DLLFLAGS = -bundle -undefined dynamic_lookup"
|
||||
|
||||
lpeg.so: $(FILES)
|
||||
env $(CC) $(DLLFLAGS) $(FILES) -o lpeg.so
|
||||
|
||||
$(FILES): makefile
|
||||
|
||||
test: test.lua re.lua lpeg.so
|
||||
./test.lua
|
||||
|
||||
clean:
|
||||
rm -f $(FILES) lpeg.so
|
||||
|
||||
|
||||
lpcap.o: lpcap.c lpcap.h lptypes.h
|
||||
lpcode.o: lpcode.c lptypes.h lpcode.h lptree.h lpvm.h lpcap.h lpcset.h
|
||||
lpcset.o: lpcset.c lptypes.h lpcset.h lpcode.h lptree.h lpvm.h lpcap.h
|
||||
lpprint.o: lpprint.c lptypes.h lpprint.h lptree.h lpvm.h lpcap.h lpcode.h
|
||||
lptree.o: lptree.c lptypes.h lpcap.h lpcode.h lptree.h lpvm.h lpprint.h \
|
||||
lpcset.h
|
||||
lpvm.o: lpvm.c lpcap.h lptypes.h lpvm.h lpprint.h lptree.h
|
||||
472
src/lua/lpeg-1.1.0/re.html
Normal file
472
src/lua/lpeg-1.1.0/re.html
Normal file
@@ -0,0 +1,472 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"//www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>LPeg.re - Regex syntax for LPEG</title>
|
||||
<link rel="stylesheet"
|
||||
href="//www.inf.puc-rio.br/~roberto/lpeg/doc.css"
|
||||
type="text/css"/>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
|
||||
<div id="container">
|
||||
|
||||
<div id="product">
|
||||
<div id="product_logo">
|
||||
<a href="//www.inf.puc-rio.br/~roberto/lpeg/">
|
||||
<img alt="LPeg logo" src="lpeg-128.gif"/>
|
||||
</a>
|
||||
</div>
|
||||
<div id="product_name"><big><strong>LPeg.re</strong></big></div>
|
||||
<div id="product_description">
|
||||
Regex syntax for LPEG
|
||||
</div>
|
||||
</div> <!-- id="product" -->
|
||||
|
||||
<div id="main">
|
||||
|
||||
<div id="navigation">
|
||||
<h1>re</h1>
|
||||
|
||||
<ul>
|
||||
<li><a href="#basic">Basic Constructions</a></li>
|
||||
<li><a href="#func">Functions</a></li>
|
||||
<li><a href="#ex">Some Examples</a></li>
|
||||
<li><a href="#license">License</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div> <!-- id="navigation" -->
|
||||
|
||||
<div id="content">
|
||||
|
||||
<h2><a name="basic"></a>The <code>re</code> Module</h2>
|
||||
|
||||
<p>
|
||||
The <code>re</code> module
|
||||
(provided by file <code>re.lua</code> in the distribution)
|
||||
supports a somewhat conventional regex syntax
|
||||
for pattern usage within <a href="lpeg.html">LPeg</a>.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
The next table summarizes <code>re</code>'s syntax.
|
||||
A <code>p</code> represents an arbitrary pattern;
|
||||
<code>num</code> represents a number (<code>[0-9]+</code>);
|
||||
<code>name</code> represents an identifier
|
||||
(<code>[a-zA-Z][a-zA-Z0-9_]*</code>).
|
||||
Constructions are listed in order of decreasing precedence.
|
||||
<table border="1">
|
||||
<tbody><tr><td><b>Syntax</b></td><td><b>Description</b></td></tr>
|
||||
<tr><td><code>( p )</code></td> <td>grouping</td></tr>
|
||||
<tr><td><code>& p</code></td> <td>and predicate</td></tr>
|
||||
<tr><td><code>! p</code></td> <td>not predicate</td></tr>
|
||||
<tr><td><code>p1 p2</code></td> <td>concatenation</td></tr>
|
||||
<tr><td><code>p1 / p2</code></td> <td>ordered choice</td></tr>
|
||||
<tr><td><code>p ?</code></td> <td>optional match</td></tr>
|
||||
<tr><td><code>p *</code></td> <td>zero or more repetitions</td></tr>
|
||||
<tr><td><code>p +</code></td> <td>one or more repetitions</td></tr>
|
||||
<tr><td><code>p^num</code></td>
|
||||
<td>exactly <code>num</code> repetitions</td></tr>
|
||||
<tr><td><code>p^+num</code></td>
|
||||
<td>at least <code>num</code> repetitions</td></tr>
|
||||
<tr><td><code>p^-num</code></td>
|
||||
<td>at most <code>num</code> repetitions</td></tr>
|
||||
<tr><td>(<code>name <- p</code>)<sup>+</sup></td> <td>grammar</td></tr>
|
||||
<tr><td><code>'string'</code></td> <td>literal string</td></tr>
|
||||
<tr><td><code>"string"</code></td> <td>literal string</td></tr>
|
||||
<tr><td><code>[class]</code></td> <td>character class</td></tr>
|
||||
<tr><td><code>.</code></td> <td>any character</td></tr>
|
||||
<tr><td><code>%name</code></td>
|
||||
<td>pattern <code>defs[name]</code> or a pre-defined pattern</td></tr>
|
||||
<tr><td><code>name</code></td><td>non terminal</td></tr>
|
||||
<tr><td><code><name></code></td><td>non terminal</td></tr>
|
||||
|
||||
<tr><td><code>{}</code></td> <td>position capture</td></tr>
|
||||
<tr><td><code>{ p }</code></td> <td>simple capture</td></tr>
|
||||
<tr><td><code>{: p :}</code></td> <td>anonymous group capture</td></tr>
|
||||
<tr><td><code>{:name: p :}</code></td> <td>named group capture</td></tr>
|
||||
<tr><td><code>{~ p ~}</code></td> <td>substitution capture</td></tr>
|
||||
<tr><td><code>{| p |}</code></td> <td>table capture</td></tr>
|
||||
<tr><td><code>=name</code></td> <td>back reference</td></tr>
|
||||
|
||||
<tr><td><code>p -> 'string'</code></td> <td>string capture</td></tr>
|
||||
<tr><td><code>p -> "string"</code></td> <td>string capture</td></tr>
|
||||
<tr><td><code>p -> num</code></td> <td>numbered capture</td></tr>
|
||||
<tr><td><code>p -> name</code></td> <td>function/query/string capture
|
||||
equivalent to <code>p / defs[name]</code></td></tr>
|
||||
<tr><td><code>p => name</code></td> <td>match-time capture
|
||||
equivalent to <code>lpeg.Cmt(p, defs[name])</code></td></tr>
|
||||
<tr><td><code>p ~> name</code></td> <td>fold capture
|
||||
(deprecated)</td></tr>
|
||||
<tr><td><code>p >> name</code></td> <td>accumulator capture
|
||||
equivalent to <code>(p % defs[name])</code></td></tr>
|
||||
</tbody></table>
|
||||
<p>
|
||||
Any space appearing in a syntax description can be
|
||||
replaced by zero or more space characters and Lua-style short comments
|
||||
(<code>--</code> until end of line).
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Character classes define sets of characters.
|
||||
An initial <code>^</code> complements the resulting set.
|
||||
A range <em>x</em><code>-</code><em>y</em> includes in the set
|
||||
all characters with codes between the codes of <em>x</em> and <em>y</em>.
|
||||
A pre-defined class <code>%</code><em>name</em> includes all
|
||||
characters of that class.
|
||||
A simple character includes itself in the set.
|
||||
The only special characters inside a class are <code>^</code>
|
||||
(special only if it is the first character);
|
||||
<code>]</code>
|
||||
(can be included in the set as the first character,
|
||||
after the optional <code>^</code>);
|
||||
<code>%</code> (special only if followed by a letter);
|
||||
and <code>-</code>
|
||||
(can be included in the set as the first or the last character).
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Currently the pre-defined classes are similar to those from the
|
||||
Lua's string library
|
||||
(<code>%a</code> for letters,
|
||||
<code>%A</code> for non letters, etc.).
|
||||
There is also a class <code>%nl</code>
|
||||
containing only the newline character,
|
||||
which is particularly handy for grammars written inside long strings,
|
||||
as long strings do not interpret escape sequences like <code>\n</code>.
|
||||
</p>
|
||||
|
||||
|
||||
<h2><a name="func">Functions</a></h2>
|
||||
|
||||
<h3><code>re.compile (string, [, defs])</code></h3>
|
||||
<p>
|
||||
Compiles the given string and
|
||||
returns an equivalent LPeg pattern.
|
||||
The given string may define either an expression or a grammar.
|
||||
The optional <code>defs</code> table provides extra Lua values
|
||||
to be used by the pattern.
|
||||
</p>
|
||||
|
||||
<h3><code>re.find (subject, pattern [, init])</code></h3>
|
||||
<p>
|
||||
Searches the given pattern in the given subject.
|
||||
If it finds a match,
|
||||
returns the index where this occurrence starts and
|
||||
the index where it ends.
|
||||
Otherwise, returns nil.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
An optional numeric argument <code>init</code> makes the search
|
||||
starts at that position in the subject string.
|
||||
As usual in Lua libraries,
|
||||
a negative value counts from the end.
|
||||
</p>
|
||||
|
||||
<h3><code>re.gsub (subject, pattern, replacement)</code></h3>
|
||||
<p>
|
||||
Does a <em>global substitution</em>,
|
||||
replacing all occurrences of <code>pattern</code>
|
||||
in the given <code>subject</code> by <code>replacement</code>.
|
||||
|
||||
<h3><code>re.match (subject, pattern)</code></h3>
|
||||
<p>
|
||||
Matches the given pattern against the given subject,
|
||||
returning all captures.
|
||||
</p>
|
||||
|
||||
<h3><code>re.updatelocale ()</code></h3>
|
||||
<p>
|
||||
Updates the pre-defined character classes to the current locale.
|
||||
</p>
|
||||
|
||||
|
||||
<h2><a name="ex">Some Examples</a></h2>
|
||||
|
||||
<h3>A complete simple program</h3>
|
||||
<p>
|
||||
The next code shows a simple complete Lua program using
|
||||
the <code>re</code> module:
|
||||
</p>
|
||||
<pre class="example">
|
||||
local re = require"re"
|
||||
|
||||
-- find the position of the first numeral in a string
|
||||
print(re.find("the number 423 is odd", "[0-9]+")) --> 12 14
|
||||
|
||||
-- returns all words in a string
|
||||
print(re.match("the number 423 is odd", "({%a+} / .)*"))
|
||||
--> the number is odd
|
||||
|
||||
-- returns the first numeral in a string
|
||||
print(re.match("the number 423 is odd", "s <- {%d+} / . s"))
|
||||
--> 423
|
||||
|
||||
-- substitutes a dot for each vowel in a string
|
||||
print(re.gsub("hello World", "[aeiou]", "."))
|
||||
--> h.ll. W.rld
|
||||
</pre>
|
||||
|
||||
|
||||
<h3>Balanced parentheses</h3>
|
||||
<p>
|
||||
The following call will produce the same pattern produced by the
|
||||
Lua expression in the
|
||||
<a href="lpeg.html#balanced">balanced parentheses</a> example:
|
||||
</p>
|
||||
<pre class="example">
|
||||
b = re.compile[[ balanced <- "(" ([^()] / balanced)* ")" ]]
|
||||
</pre>
|
||||
|
||||
<h3>String reversal</h3>
|
||||
<p>
|
||||
The next example reverses a string:
|
||||
</p>
|
||||
<pre class="example">
|
||||
rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
|
||||
print(rev:match"0123456789") --> 9876543210
|
||||
</pre>
|
||||
|
||||
<h3>CSV decoder</h3>
|
||||
<p>
|
||||
The next example replicates the <a href="lpeg.html#CSV">CSV decoder</a>:
|
||||
</p>
|
||||
<pre class="example">
|
||||
record = re.compile[[
|
||||
record <- {| field (',' field)* |} (%nl / !.)
|
||||
field <- escaped / nonescaped
|
||||
nonescaped <- { [^,"%nl]* }
|
||||
escaped <- '"' {~ ([^"] / '""' -> '"')* ~} '"'
|
||||
]]
|
||||
</pre>
|
||||
|
||||
<h3>Lua's long strings</h3>
|
||||
<p>
|
||||
The next example matches Lua long strings:
|
||||
</p>
|
||||
<pre class="example">
|
||||
c = re.compile([[
|
||||
longstring <- ('[' {:eq: '='* :} '[' close)
|
||||
close <- ']' =eq ']' / . close
|
||||
]])
|
||||
|
||||
print(c:match'[==[]]===]]]]==]===[]') --> 17
|
||||
</pre>
|
||||
|
||||
<h3>Abstract Syntax Trees</h3>
|
||||
<p>
|
||||
This example shows a simple way to build an
|
||||
abstract syntax tree (AST) for a given grammar.
|
||||
To keep our example simple,
|
||||
let us consider the following grammar
|
||||
for lists of names:
|
||||
</p>
|
||||
<pre class="example">
|
||||
p = re.compile[[
|
||||
listname <- (name s)*
|
||||
name <- [a-z][a-z]*
|
||||
s <- %s*
|
||||
]]
|
||||
</pre>
|
||||
<p>
|
||||
Now, we will add captures to build a corresponding AST.
|
||||
As a first step, the pattern will build a table to
|
||||
represent each non terminal;
|
||||
terminals will be represented by their corresponding strings:
|
||||
</p>
|
||||
<pre class="example">
|
||||
c = re.compile[[
|
||||
listname <- {| (name s)* |}
|
||||
name <- {| {[a-z][a-z]*} |}
|
||||
s <- %s*
|
||||
]]
|
||||
</pre>
|
||||
<p>
|
||||
Now, a match against <code>"hi hello bye"</code>
|
||||
results in the table
|
||||
<code>{{"hi"}, {"hello"}, {"bye"}}</code>.
|
||||
</p>
|
||||
<p>
|
||||
For such a simple grammar,
|
||||
this AST is more than enough;
|
||||
actually, the tables around each single name
|
||||
are already overkilling.
|
||||
More complex grammars,
|
||||
however, may need some more structure.
|
||||
Specifically,
|
||||
it would be useful if each table had
|
||||
a <code>tag</code> field telling what non terminal
|
||||
that table represents.
|
||||
We can add such a tag using
|
||||
<a href="lpeg.html#cap-g">named group captures</a>:
|
||||
</p>
|
||||
<pre class="example">
|
||||
x = re.compile[[
|
||||
listname <- {| {:tag: '' -> 'list':} (name s)* |}
|
||||
name <- {| {:tag: '' -> 'id':} {[a-z][a-z]*} |}
|
||||
s <- ' '*
|
||||
]]
|
||||
</pre>
|
||||
<p>
|
||||
With these group captures,
|
||||
a match against <code>"hi hello bye"</code>
|
||||
results in the following table:
|
||||
</p>
|
||||
<pre class="example">
|
||||
{tag="list",
|
||||
{tag="id", "hi"},
|
||||
{tag="id", "hello"},
|
||||
{tag="id", "bye"}
|
||||
}
|
||||
</pre>
|
||||
|
||||
|
||||
<h3>Indented blocks</h3>
|
||||
<p>
|
||||
This example breaks indented blocks into tables,
|
||||
respecting the indentation:
|
||||
</p>
|
||||
<pre class="example">
|
||||
p = re.compile[[
|
||||
block <- {| {:ident:' '*:} line
|
||||
((=ident !' ' line) / &(=ident ' ') block)* |}
|
||||
line <- {[^%nl]*} %nl
|
||||
]]
|
||||
</pre>
|
||||
<p>
|
||||
As an example,
|
||||
consider the following text:
|
||||
</p>
|
||||
<pre class="example">
|
||||
t = p:match[[
|
||||
first line
|
||||
subline 1
|
||||
subline 2
|
||||
second line
|
||||
third line
|
||||
subline 3.1
|
||||
subline 3.1.1
|
||||
subline 3.2
|
||||
]]
|
||||
</pre>
|
||||
<p>
|
||||
The resulting table <code>t</code> will be like this:
|
||||
</p>
|
||||
<pre class="example">
|
||||
{'first line'; {'subline 1'; 'subline 2'; ident = ' '};
|
||||
'second line';
|
||||
'third line'; { 'subline 3.1'; {'subline 3.1.1'; ident = ' '};
|
||||
'subline 3.2'; ident = ' '};
|
||||
ident = ''}
|
||||
</pre>
|
||||
|
||||
<h3>Macro expander</h3>
|
||||
<p>
|
||||
This example implements a simple macro expander.
|
||||
Macros must be defined as part of the pattern,
|
||||
following some simple rules:
|
||||
</p>
|
||||
<pre class="example">
|
||||
p = re.compile[[
|
||||
text <- {~ item* ~}
|
||||
item <- macro / [^()] / '(' item* ')'
|
||||
arg <- ' '* {~ (!',' item)* ~}
|
||||
args <- '(' arg (',' arg)* ')'
|
||||
-- now we define some macros
|
||||
macro <- ('apply' args) -> '%1(%2)'
|
||||
/ ('add' args) -> '%1 + %2'
|
||||
/ ('mul' args) -> '%1 * %2'
|
||||
]]
|
||||
|
||||
print(p:match"add(mul(a,b), apply(f,x))") --> a * b + f(x)
|
||||
</pre>
|
||||
<p>
|
||||
A <code>text</code> is a sequence of items,
|
||||
wherein we apply a substitution capture to expand any macros.
|
||||
An <code>item</code> is either a macro,
|
||||
any character different from parentheses,
|
||||
or a parenthesized expression.
|
||||
A macro argument (<code>arg</code>) is a sequence
|
||||
of items different from a comma.
|
||||
(Note that a comma may appear inside an item,
|
||||
e.g., inside a parenthesized expression.)
|
||||
Again we do a substitution capture to expand any macro
|
||||
in the argument before expanding the outer macro.
|
||||
<code>args</code> is a list of arguments separated by commas.
|
||||
Finally we define the macros.
|
||||
Each macro is a string substitution;
|
||||
it replaces the macro name and its arguments by its corresponding string,
|
||||
with each <code>%</code><em>n</em> replaced by the <em>n</em>-th argument.
|
||||
</p>
|
||||
|
||||
<h3>Patterns</h3>
|
||||
<p>
|
||||
This example shows the complete syntax
|
||||
of patterns accepted by <code>re</code>.
|
||||
</p>
|
||||
<pre class="example">
|
||||
p = [=[
|
||||
|
||||
pattern <- exp !.
|
||||
exp <- S (grammar / alternative)
|
||||
|
||||
alternative <- seq ('/' S seq)*
|
||||
seq <- prefix*
|
||||
prefix <- '&' S prefix / '!' S prefix / suffix
|
||||
suffix <- primary S (([+*?]
|
||||
/ '^' [+-]? num
|
||||
/ '->' S (string / '{}' / name)
|
||||
/ '>>' S name
|
||||
/ '=>' S name) S)*
|
||||
|
||||
primary <- '(' exp ')' / string / class / defined
|
||||
/ '{:' (name ':')? exp ':}'
|
||||
/ '=' name
|
||||
/ '{}'
|
||||
/ '{~' exp '~}'
|
||||
/ '{|' exp '|}'
|
||||
/ '{' exp '}'
|
||||
/ '.'
|
||||
/ name S !arrow
|
||||
/ '<' name '>' -- old-style non terminals
|
||||
|
||||
grammar <- definition+
|
||||
definition <- name S arrow exp
|
||||
|
||||
class <- '[' '^'? item (!']' item)* ']'
|
||||
item <- defined / range / .
|
||||
range <- . '-' [^]]
|
||||
|
||||
S <- (%s / '--' [^%nl]*)* -- spaces and comments
|
||||
name <- [A-Za-z_][A-Za-z0-9_]*
|
||||
arrow <- '<-'
|
||||
num <- [0-9]+
|
||||
string <- '"' [^"]* '"' / "'" [^']* "'"
|
||||
defined <- '%' name
|
||||
|
||||
]=]
|
||||
|
||||
print(re.match(p, p)) -- a self description must match itself
|
||||
</pre>
|
||||
|
||||
|
||||
|
||||
<h2><a name="license">License</a></h2>
|
||||
|
||||
<p>
|
||||
This module is part of the <a href="lpeg.html">LPeg</a> package and shares
|
||||
its <a href="lpeg.html#license">license</a>.
|
||||
|
||||
|
||||
</div> <!-- id="content" -->
|
||||
|
||||
</div> <!-- id="main" -->
|
||||
|
||||
</div> <!-- id="container" -->
|
||||
|
||||
</body>
|
||||
</html>
|
||||
270
src/lua/lpeg-1.1.0/re.lua
Normal file
270
src/lua/lpeg-1.1.0/re.lua
Normal file
@@ -0,0 +1,270 @@
|
||||
--
|
||||
-- Copyright 2007-2023, Lua.org & PUC-Rio (see 'lpeg.html' for license)
|
||||
-- written by Roberto Ierusalimschy
|
||||
--
|
||||
|
||||
-- imported functions and modules
|
||||
local tonumber, type, print, error = tonumber, type, print, error
|
||||
local setmetatable = setmetatable
|
||||
local m = require"lpeg"
|
||||
|
||||
-- 'm' will be used to parse expressions, and 'mm' will be used to
|
||||
-- create expressions; that is, 're' runs on 'm', creating patterns
|
||||
-- on 'mm'
|
||||
local mm = m
|
||||
|
||||
-- patterns' metatable
|
||||
local mt = getmetatable(mm.P(0))
|
||||
|
||||
|
||||
local version = _VERSION
|
||||
|
||||
-- No more global accesses after this point
|
||||
_ENV = nil -- does no harm in Lua 5.1
|
||||
|
||||
|
||||
local any = m.P(1)
|
||||
|
||||
|
||||
-- Pre-defined names
|
||||
local Predef = { nl = m.P"\n" }
|
||||
|
||||
|
||||
local mem
|
||||
local fmem
|
||||
local gmem
|
||||
|
||||
|
||||
local function updatelocale ()
|
||||
mm.locale(Predef)
|
||||
Predef.a = Predef.alpha
|
||||
Predef.c = Predef.cntrl
|
||||
Predef.d = Predef.digit
|
||||
Predef.g = Predef.graph
|
||||
Predef.l = Predef.lower
|
||||
Predef.p = Predef.punct
|
||||
Predef.s = Predef.space
|
||||
Predef.u = Predef.upper
|
||||
Predef.w = Predef.alnum
|
||||
Predef.x = Predef.xdigit
|
||||
Predef.A = any - Predef.a
|
||||
Predef.C = any - Predef.c
|
||||
Predef.D = any - Predef.d
|
||||
Predef.G = any - Predef.g
|
||||
Predef.L = any - Predef.l
|
||||
Predef.P = any - Predef.p
|
||||
Predef.S = any - Predef.s
|
||||
Predef.U = any - Predef.u
|
||||
Predef.W = any - Predef.w
|
||||
Predef.X = any - Predef.x
|
||||
mem = {} -- restart memoization
|
||||
fmem = {}
|
||||
gmem = {}
|
||||
local mt = {__mode = "v"}
|
||||
setmetatable(mem, mt)
|
||||
setmetatable(fmem, mt)
|
||||
setmetatable(gmem, mt)
|
||||
end
|
||||
|
||||
|
||||
updatelocale()
|
||||
|
||||
|
||||
|
||||
local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end)
|
||||
|
||||
|
||||
local function patt_error (s, i)
|
||||
local msg = (#s < i + 20) and s:sub(i)
|
||||
or s:sub(i,i+20) .. "..."
|
||||
msg = ("pattern error near '%s'"):format(msg)
|
||||
error(msg, 2)
|
||||
end
|
||||
|
||||
local function mult (p, n)
|
||||
local np = mm.P(true)
|
||||
while n >= 1 do
|
||||
if n%2 >= 1 then np = np * p end
|
||||
p = p * p
|
||||
n = n/2
|
||||
end
|
||||
return np
|
||||
end
|
||||
|
||||
local function equalcap (s, i, c)
|
||||
if type(c) ~= "string" then return nil end
|
||||
local e = #c + i
|
||||
if s:sub(i, e - 1) == c then return e else return nil end
|
||||
end
|
||||
|
||||
|
||||
local S = (Predef.space + "--" * (any - Predef.nl)^0)^0
|
||||
|
||||
local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0
|
||||
|
||||
local arrow = S * "<-"
|
||||
|
||||
local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1
|
||||
|
||||
name = m.C(name)
|
||||
|
||||
|
||||
-- a defined name only have meaning in a given environment
|
||||
local Def = name * m.Carg(1)
|
||||
|
||||
|
||||
local function getdef (id, defs)
|
||||
local c = defs and defs[id]
|
||||
if not c then error("undefined name: " .. id) end
|
||||
return c
|
||||
end
|
||||
|
||||
-- match a name and return a group of its corresponding definition
|
||||
-- and 'f' (to be folded in 'Suffix')
|
||||
local function defwithfunc (f)
|
||||
return m.Cg(Def / getdef * m.Cc(f))
|
||||
end
|
||||
|
||||
|
||||
local num = m.C(m.R"09"^1) * S / tonumber
|
||||
|
||||
local String = "'" * m.C((any - "'")^0) * "'" +
|
||||
'"' * m.C((any - '"')^0) * '"'
|
||||
|
||||
|
||||
local defined = "%" * Def / function (c,Defs)
|
||||
local cat = Defs and Defs[c] or Predef[c]
|
||||
if not cat then error ("name '" .. c .. "' undefined") end
|
||||
return cat
|
||||
end
|
||||
|
||||
local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R
|
||||
|
||||
local item = (defined + Range + m.C(any)) / m.P
|
||||
|
||||
local Class =
|
||||
"["
|
||||
* (m.C(m.P"^"^-1)) -- optional complement symbol
|
||||
* (item * ((item % mt.__add) - "]")^0) /
|
||||
function (c, p) return c == "^" and any - p or p end
|
||||
* "]"
|
||||
|
||||
local function adddef (t, k, exp)
|
||||
if t[k] then
|
||||
error("'"..k.."' already defined as a rule")
|
||||
else
|
||||
t[k] = exp
|
||||
end
|
||||
return t
|
||||
end
|
||||
|
||||
local function firstdef (n, r) return adddef({n}, n, r) end
|
||||
|
||||
|
||||
local function NT (n, b)
|
||||
if not b then
|
||||
error("rule '"..n.."' used outside a grammar")
|
||||
else return mm.V(n)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
local exp = m.P{ "Exp",
|
||||
Exp = S * ( m.V"Grammar"
|
||||
+ m.V"Seq" * ("/" * S * m.V"Seq" % mt.__add)^0 );
|
||||
Seq = (m.Cc(m.P"") * (m.V"Prefix" % mt.__mul)^0)
|
||||
* (#seq_follow + patt_error);
|
||||
Prefix = "&" * S * m.V"Prefix" / mt.__len
|
||||
+ "!" * S * m.V"Prefix" / mt.__unm
|
||||
+ m.V"Suffix";
|
||||
Suffix = m.V"Primary" * S *
|
||||
( ( m.P"+" * m.Cc(1, mt.__pow)
|
||||
+ m.P"*" * m.Cc(0, mt.__pow)
|
||||
+ m.P"?" * m.Cc(-1, mt.__pow)
|
||||
+ "^" * ( m.Cg(num * m.Cc(mult))
|
||||
+ m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow))
|
||||
)
|
||||
+ "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div))
|
||||
+ m.P"{}" * m.Cc(nil, m.Ct)
|
||||
+ defwithfunc(mt.__div)
|
||||
)
|
||||
+ "=>" * S * defwithfunc(mm.Cmt)
|
||||
+ ">>" * S * defwithfunc(mt.__mod)
|
||||
+ "~>" * S * defwithfunc(mm.Cf)
|
||||
) % function (a,b,f) return f(a,b) end * S
|
||||
)^0;
|
||||
Primary = "(" * m.V"Exp" * ")"
|
||||
+ String / mm.P
|
||||
+ Class
|
||||
+ defined
|
||||
+ "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" /
|
||||
function (n, p) return mm.Cg(p, n) end
|
||||
+ "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end
|
||||
+ m.P"{}" / mm.Cp
|
||||
+ "{~" * m.V"Exp" * "~}" / mm.Cs
|
||||
+ "{|" * m.V"Exp" * "|}" / mm.Ct
|
||||
+ "{" * m.V"Exp" * "}" / mm.C
|
||||
+ m.P"." * m.Cc(any)
|
||||
+ (name * -arrow + "<" * name * ">") * m.Cb("G") / NT;
|
||||
Definition = name * arrow * m.V"Exp";
|
||||
Grammar = m.Cg(m.Cc(true), "G") *
|
||||
((m.V"Definition" / firstdef) * (m.V"Definition" % adddef)^0) / mm.P
|
||||
}
|
||||
|
||||
local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error)
|
||||
|
||||
|
||||
local function compile (p, defs)
|
||||
if mm.type(p) == "pattern" then return p end -- already compiled
|
||||
local cp = pattern:match(p, 1, defs)
|
||||
if not cp then error("incorrect pattern", 3) end
|
||||
return cp
|
||||
end
|
||||
|
||||
local function match (s, p, i)
|
||||
local cp = mem[p]
|
||||
if not cp then
|
||||
cp = compile(p)
|
||||
mem[p] = cp
|
||||
end
|
||||
return cp:match(s, i or 1)
|
||||
end
|
||||
|
||||
local function find (s, p, i)
|
||||
local cp = fmem[p]
|
||||
if not cp then
|
||||
cp = compile(p) / 0
|
||||
cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) }
|
||||
fmem[p] = cp
|
||||
end
|
||||
local i, e = cp:match(s, i or 1)
|
||||
if i then return i, e - 1
|
||||
else return i
|
||||
end
|
||||
end
|
||||
|
||||
local function gsub (s, p, rep)
|
||||
local g = gmem[p] or {} -- ensure gmem[p] is not collected while here
|
||||
gmem[p] = g
|
||||
local cp = g[rep]
|
||||
if not cp then
|
||||
cp = compile(p)
|
||||
cp = mm.Cs((cp / rep + 1)^0)
|
||||
g[rep] = cp
|
||||
end
|
||||
return cp:match(s)
|
||||
end
|
||||
|
||||
|
||||
-- exported names
|
||||
local re = {
|
||||
compile = compile,
|
||||
match = match,
|
||||
find = find,
|
||||
gsub = gsub,
|
||||
updatelocale = updatelocale,
|
||||
}
|
||||
|
||||
if version == "Lua 5.1" then _G.re = re end
|
||||
|
||||
return re
|
||||
1667
src/lua/lpeg-1.1.0/test.lua
Executable file
1667
src/lua/lpeg-1.1.0/test.lua
Executable file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user