Added lpeg for narrator

This commit is contained in:
2025-09-16 22:31:16 +03:00
parent 190318e5c4
commit 1977a12d8b
21 changed files with 8441 additions and 0 deletions

109
src/lua/lpeg-1.1.0/HISTORY Normal file
View File

@@ -0,0 +1,109 @@
HISTORY for LPeg 1.1.0
* Changes from version 1.0.2 to 1.1.0
---------------------------------
+ accumulator capture
+ UTF-8 ranges
+ Larger limit for number of rules in a grammar
+ Larger limit for number of captures in a match
+ bug fixes
+ other small improvements
* Changes from version 1.0.1 to 1.0.2
---------------------------------
+ some bugs fixed
* Changes from version 0.12 to 1.0.1
---------------------------------
+ group "names" can be any Lua value
+ some bugs fixed
+ other small improvements
* Changes from version 0.11 to 0.12
---------------------------------
+ no "unsigned short" limit for pattern sizes
+ mathtime captures considered nullable
+ some bugs fixed
* Changes from version 0.10 to 0.11
-------------------------------
+ complete reimplementation of the code generator
+ new syntax for table captures
+ new functions in module 're'
+ other small improvements
* Changes from version 0.9 to 0.10
-------------------------------
+ backtrack stack has configurable size
+ better error messages
+ Notation for non-terminals in 're' back to A instead o <A>
+ experimental look-behind pattern
+ support for external extensions
+ works with Lua 5.2
+ consumes less C stack
- "and" predicates do not keep captures
* Changes from version 0.8 to 0.9
-------------------------------
+ The accumulator capture was replaced by a fold capture;
programs that used the old 'lpeg.Ca' will need small changes.
+ Some support for character classes from old C locales.
+ A new named-group capture.
* Changes from version 0.7 to 0.8
-------------------------------
+ New "match-time" capture.
+ New "argument capture" that allows passing arguments into the pattern.
+ Better documentation for 're'.
+ Several small improvements for 're'.
+ The 're' module has an incompatibility with previous versions:
now, any use of a non-terminal must be enclosed in angle brackets
(like <B>).
* Changes from version 0.6 to 0.7
-------------------------------
+ Several improvements in module 're':
- better documentation;
- support for most captures (all but accumulator);
- limited repetitions p{n,m}.
+ Small improvements in efficiency.
+ Several small bugs corrected (special thanks to Hans Hagen
and Taco Hoekwater).
* Changes from version 0.5 to 0.6
-------------------------------
+ Support for non-numeric indices in grammars.
+ Some bug fixes (thanks to the luatex team).
+ Some new optimizations; (thanks to Mike Pall).
+ A new page layout (thanks to Andre Carregal).
+ Minimal documentation for module 're'.
* Changes from version 0.4 to 0.5
-------------------------------
+ Several optimizations.
+ lpeg.P now accepts booleans.
+ Some new examples.
+ A proper license.
+ Several small improvements.
* Changes from version 0.3 to 0.4
-------------------------------
+ Static check for loops in repetitions and grammars.
+ Removed label option in captures.
+ The implementation of captures uses less memory.
* Changes from version 0.2 to 0.3
-------------------------------
+ User-defined patterns in Lua.
+ Several new captures.
* Changes from version 0.1 to 0.2
-------------------------------
+ Several small corrections.
+ Handles embedded zeros like any other character.
+ Capture "name" can be any Lua value.
+ Unlimited number of captures.
+ Match gets an optional initial position.
(end of HISTORY)

View File

@@ -0,0 +1,4 @@
# LPeg - Parsing Expression Grammars For Lua
For more information,
see [Lpeg](//www.inf.puc-rio.br/~roberto/lpeg/).

612
src/lua/lpeg-1.1.0/lpcap.c Normal file
View File

@@ -0,0 +1,612 @@
#include "lua.h"
#include "lauxlib.h"
#include "lpcap.h"
#include "lpprint.h"
#include "lptypes.h"
#define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v)
#define pushluaval(cs) getfromktable(cs, (cs)->cap->idx)
#define skipclose(cs,head) \
if (isopencap(head)) { assert(isclosecap(cs->cap)); cs->cap++; }
/*
** Return the size of capture 'cap'. If it is an open capture, 'close'
** must be its corresponding close.
*/
static Index_t capsize (Capture *cap, Capture *close) {
if (isopencap(cap)) {
assert(isclosecap(close));
return close->index - cap->index;
}
else
return cap->siz - 1;
}
static Index_t closesize (CapState *cs, Capture *head) {
return capsize(head, cs->cap);
}
/*
** Put at the cache for Lua values the value indexed by 'v' in ktable
** of the running pattern (if it is not there yet); returns its index.
*/
static int updatecache (CapState *cs, int v) {
int idx = cs->ptop + 1; /* stack index of cache for Lua values */
if (v != cs->valuecached) { /* not there? */
getfromktable(cs, v); /* get value from 'ktable' */
lua_replace(cs->L, idx); /* put it at reserved stack position */
cs->valuecached = v; /* keep track of what is there */
}
return idx;
}
static int pushcapture (CapState *cs);
/*
** Goes back in a list of captures looking for an open capture
** corresponding to a close one.
*/
static Capture *findopen (Capture *cap) {
int n = 0; /* number of closes waiting an open */
for (;;) {
cap--;
if (isclosecap(cap)) n++; /* one more open to skip */
else if (isopencap(cap))
if (n-- == 0) return cap;
}
}
/*
** Go to the next capture at the same level.
*/
static void nextcap (CapState *cs) {
Capture *cap = cs->cap;
if (isopencap(cap)) { /* must look for a close? */
int n = 0; /* number of opens waiting a close */
for (;;) { /* look for corresponding close */
cap++;
if (isopencap(cap)) n++;
else if (isclosecap(cap))
if (n-- == 0) break;
}
cs->cap = cap + 1; /* + 1 to skip last close */
}
else {
Capture *next;
for (next = cap + 1; capinside(cap, next); next++)
; /* skip captures inside current one */
cs->cap = next;
}
}
/*
** Push on the Lua stack all values generated by nested captures inside
** the current capture. Returns number of values pushed. 'addextra'
** makes it push the entire match after all captured values. The
** entire match is pushed also if there are no other nested values,
** so the function never returns zero.
*/
static int pushnestedvalues (CapState *cs, int addextra) {
Capture *head = cs->cap++; /* original capture */
int n = 0; /* number of pushed subvalues */
/* repeat for all nested patterns */
while (capinside(head, cs->cap))
n += pushcapture(cs);
if (addextra || n == 0) { /* need extra? */
lua_pushlstring(cs->L, cs->s + head->index, closesize(cs, head));
n++;
}
skipclose(cs, head);
return n;
}
/*
** Push only the first value generated by nested captures
*/
static void pushonenestedvalue (CapState *cs) {
int n = pushnestedvalues(cs, 0);
if (n > 1)
lua_pop(cs->L, n - 1); /* pop extra values */
}
/*
** Checks whether group 'grp' is visible to 'ref', that is, 'grp' is
** not nested inside a full capture that does not contain 'ref'. (We
** only need to care for full captures because the search at 'findback'
** skips open-end blocks; so, if 'grp' is nested in a non-full capture,
** 'ref' is also inside it.) To check this, we search backward for the
** inner full capture enclosing 'grp'. A full capture cannot contain
** non-full captures, so a close capture means we cannot be inside a
** full capture anymore.
*/
static int capvisible (CapState *cs, Capture *grp, Capture *ref) {
Capture *cap = grp;
int i = MAXLOP; /* maximum distance for an 'open' */
while (i-- > 0 && cap-- > cs->ocap) {
if (isclosecap(cap))
return 1; /* can stop the search */
else if (grp->index - cap->index >= UCHAR_MAX)
return 1; /* can stop the search */
else if (capinside(cap, grp)) /* is 'grp' inside cap? */
return capinside(cap, ref); /* ok iff cap also contains 'ref' */
}
return 1; /* 'grp' is not inside any capture */
}
/*
** Try to find a named group capture with the name given at the top of
** the stack; goes backward from 'ref'.
*/
static Capture *findback (CapState *cs, Capture *ref) {
lua_State *L = cs->L;
Capture *cap = ref;
while (cap-- > cs->ocap) { /* repeat until end of list */
if (isclosecap(cap))
cap = findopen(cap); /* skip nested captures */
else if (capinside(cap, ref))
continue; /* enclosing captures are not visible to 'ref' */
if (captype(cap) == Cgroup && capvisible(cs, cap, ref)) {
getfromktable(cs, cap->idx); /* get group name */
if (lp_equal(L, -2, -1)) { /* right group? */
lua_pop(L, 2); /* remove reference name and group name */
return cap;
}
else lua_pop(L, 1); /* remove group name */
}
}
luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1));
return NULL; /* to avoid warnings */
}
/*
** Back-reference capture. Return number of values pushed.
*/
static int backrefcap (CapState *cs) {
int n;
Capture *curr = cs->cap;
pushluaval(cs); /* reference name */
cs->cap = findback(cs, curr); /* find corresponding group */
n = pushnestedvalues(cs, 0); /* push group's values */
cs->cap = curr + 1;
return n;
}
/*
** Table capture: creates a new table and populates it with nested
** captures.
*/
static int tablecap (CapState *cs) {
lua_State *L = cs->L;
Capture *head = cs->cap++;
int n = 0;
lua_newtable(L);
while (capinside(head, cs->cap)) {
if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */
pushluaval(cs); /* push group name */
pushonenestedvalue(cs);
lua_settable(L, -3);
}
else { /* not a named group */
int i;
int k = pushcapture(cs);
for (i = k; i > 0; i--) /* store all values into table */
lua_rawseti(L, -(i + 1), n + i);
n += k;
}
}
skipclose(cs, head);
return 1; /* number of values pushed (only the table) */
}
/*
** Table-query capture
*/
static int querycap (CapState *cs) {
int idx = cs->cap->idx;
pushonenestedvalue(cs); /* get nested capture */
lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */
if (!lua_isnil(cs->L, -1))
return 1;
else { /* no value */
lua_pop(cs->L, 1); /* remove nil */
return 0;
}
}
/*
** Fold capture
*/
static int foldcap (CapState *cs) {
int n;
lua_State *L = cs->L;
Capture *head = cs->cap++;
int idx = head->idx;
if (isclosecap(cs->cap) || /* no nested captures (large subject)? */
(n = pushcapture(cs)) == 0) /* nested captures with no values? */
return luaL_error(L, "no initial value for fold capture");
if (n > 1)
lua_pop(L, n - 1); /* leave only one result for accumulator */
while (capinside(head, cs->cap)) {
lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */
lua_insert(L, -2); /* put it before accumulator */
n = pushcapture(cs); /* get next capture's values */
lua_call(L, n + 1, 1); /* call folding function */
}
skipclose(cs, head);
return 1; /* only accumulator left on the stack */
}
/*
** Function capture
*/
static int functioncap (CapState *cs) {
int n;
int top = lua_gettop(cs->L);
pushluaval(cs); /* push function */
n = pushnestedvalues(cs, 0); /* push nested captures */
lua_call(cs->L, n, LUA_MULTRET); /* call function */
return lua_gettop(cs->L) - top; /* return function's results */
}
/*
** Accumulator capture
*/
static int accumulatorcap (CapState *cs) {
lua_State *L = cs->L;
int n;
if (lua_gettop(L) < cs->firstcap)
luaL_error(L, "no previous value for accumulator capture");
pushluaval(cs); /* push function */
lua_insert(L, -2); /* previous value becomes first argument */
n = pushnestedvalues(cs, 0); /* push nested captures */
lua_call(L, n + 1, 1); /* call function */
return 0; /* did not add any extra value */
}
/*
** Select capture
*/
static int numcap (CapState *cs) {
int idx = cs->cap->idx; /* value to select */
if (idx == 0) { /* no values? */
nextcap(cs); /* skip entire capture */
return 0; /* no value produced */
}
else {
int n = pushnestedvalues(cs, 0);
if (n < idx) /* invalid index? */
return luaL_error(cs->L, "no capture '%d'", idx);
else {
lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */
lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */
lua_pop(cs->L, n - 1); /* remove other captures */
return 1;
}
}
}
/*
** Return the stack index of the first runtime capture in the given
** list of captures (or zero if no runtime captures)
*/
int finddyncap (Capture *cap, Capture *last) {
for (; cap < last; cap++) {
if (cap->kind == Cruntime)
return cap->idx; /* stack position of first capture */
}
return 0; /* no dynamic captures in this segment */
}
/*
** Calls a runtime capture. Returns number of captures "removed" by the
** call, that is, those inside the group capture. Captures to be added
** are on the Lua stack.
*/
int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
int n, id;
lua_State *L = cs->L;
int otop = lua_gettop(L);
Capture *open = findopen(close); /* get open group capture */
assert(captype(open) == Cgroup);
id = finddyncap(open, close); /* get first dynamic capture argument */
close->kind = Cclose; /* closes the group */
close->index = s - cs->s;
cs->cap = open; cs->valuecached = 0; /* prepare capture state */
luaL_checkstack(L, 4, "too many runtime captures");
pushluaval(cs); /* push function to be called */
lua_pushvalue(L, SUBJIDX); /* push original subject */
lua_pushinteger(L, s - cs->s + 1); /* push current position */
n = pushnestedvalues(cs, 0); /* push nested captures */
lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */
if (id > 0) { /* are there old dynamic captures to be removed? */
int i;
for (i = id; i <= otop; i++)
lua_remove(L, id); /* remove old dynamic captures */
*rem = otop - id + 1; /* total number of dynamic captures removed */
}
else
*rem = 0; /* no dynamic captures removed */
return close - open - 1; /* number of captures to be removed */
}
/*
** Auxiliary structure for substitution and string captures: keep
** information about nested captures for future use, avoiding to push
** string results into Lua
*/
typedef struct StrAux {
int isstring; /* whether capture is a string */
union {
Capture *cp; /* if not a string, respective capture */
struct { /* if it is a string... */
Index_t idx; /* starts here */
Index_t siz; /* with this size */
} s;
} u;
} StrAux;
#define MAXSTRCAPS 10
/*
** Collect values from current capture into array 'cps'. Current
** capture must be Cstring (first call) or Csimple (recursive calls).
** (In first call, fills %0 with whole match for Cstring.)
** Returns number of elements in the array that were filled.
*/
static int getstrcaps (CapState *cs, StrAux *cps, int n) {
int k = n++;
Capture *head = cs->cap++;
cps[k].isstring = 1; /* get string value */
cps[k].u.s.idx = head->index; /* starts here */
while (capinside(head, cs->cap)) {
if (n >= MAXSTRCAPS) /* too many captures? */
nextcap(cs); /* skip extra captures (will not need them) */
else if (captype(cs->cap) == Csimple) /* string? */
n = getstrcaps(cs, cps, n); /* put info. into array */
else {
cps[n].isstring = 0; /* not a string */
cps[n].u.cp = cs->cap; /* keep original capture */
nextcap(cs);
n++;
}
}
cps[k].u.s.siz = closesize(cs, head);
skipclose(cs, head);
return n;
}
/*
** add next capture value (which should be a string) to buffer 'b'
*/
static int addonestring (luaL_Buffer *b, CapState *cs, const char *what);
/*
** String capture: add result to buffer 'b' (instead of pushing
** it into the stack)
*/
static void stringcap (luaL_Buffer *b, CapState *cs) {
StrAux cps[MAXSTRCAPS];
int n;
size_t len, i;
const char *fmt; /* format string */
fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len);
n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */
for (i = 0; i < len; i++) { /* traverse format string */
if (fmt[i] != '%') /* not an escape? */
luaL_addchar(b, fmt[i]); /* add it to buffer */
else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */
luaL_addchar(b, fmt[i]); /* add to buffer */
else {
int l = fmt[i] - '0'; /* capture index */
if (l > n)
luaL_error(cs->L, "invalid capture index (%d)", l);
else if (cps[l].isstring)
luaL_addlstring(b, cs->s + cps[l].u.s.idx, cps[l].u.s.siz);
else {
Capture *curr = cs->cap;
cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */
if (!addonestring(b, cs, "capture"))
luaL_error(cs->L, "no values in capture index %d", l);
cs->cap = curr; /* continue from where it stopped */
}
}
}
}
/*
** Substitution capture: add result to buffer 'b'
*/
static void substcap (luaL_Buffer *b, CapState *cs) {
const char *curr = cs->s + cs->cap->index;
Capture *head = cs->cap++;
while (capinside(head, cs->cap)) {
Capture *cap = cs->cap;
const char *caps = cs->s + cap->index;
luaL_addlstring(b, curr, caps - curr); /* add text up to capture */
if (addonestring(b, cs, "replacement"))
curr = caps + capsize(cap, cs->cap - 1); /* continue after match */
else /* no capture value */
curr = caps; /* keep original text in final result */
}
/* add last piece of text */
luaL_addlstring(b, curr, cs->s + head->index + closesize(cs, head) - curr);
skipclose(cs, head);
}
/*
** Evaluates a capture and adds its first value to buffer 'b'; returns
** whether there was a value
*/
static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
switch (captype(cs->cap)) {
case Cstring:
stringcap(b, cs); /* add capture directly to buffer */
return 1;
case Csubst:
substcap(b, cs); /* add capture directly to buffer */
return 1;
case Cacc: /* accumulator capture? */
return luaL_error(cs->L, "invalid context for an accumulator capture");
default: {
lua_State *L = cs->L;
int n = pushcapture(cs);
if (n > 0) {
if (n > 1) lua_pop(L, n - 1); /* only one result */
if (!lua_isstring(L, -1))
return luaL_error(L, "invalid %s value (a %s)",
what, luaL_typename(L, -1));
luaL_addvalue(b);
}
return n;
}
}
}
#if !defined(MAXRECLEVEL)
#define MAXRECLEVEL 200
#endif
/*
** Push all values of the current capture into the stack; returns
** number of values pushed
*/
static int pushcapture (CapState *cs) {
lua_State *L = cs->L;
int res;
luaL_checkstack(L, 4, "too many captures");
if (cs->reclevel++ > MAXRECLEVEL)
return luaL_error(L, "subcapture nesting too deep");
switch (captype(cs->cap)) {
case Cposition: {
lua_pushinteger(L, cs->cap->index + 1);
cs->cap++;
res = 1;
break;
}
case Cconst: {
pushluaval(cs);
cs->cap++;
res = 1;
break;
}
case Carg: {
int arg = (cs->cap++)->idx;
if (arg + FIXEDARGS > cs->ptop)
return luaL_error(L, "reference to absent extra argument #%d", arg);
lua_pushvalue(L, arg + FIXEDARGS);
res = 1;
break;
}
case Csimple: {
int k = pushnestedvalues(cs, 1);
lua_insert(L, -k); /* make whole match be first result */
res = k;
break;
}
case Cruntime: {
lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */
res = 1;
break;
}
case Cstring: {
luaL_Buffer b;
luaL_buffinit(L, &b);
stringcap(&b, cs);
luaL_pushresult(&b);
res = 1;
break;
}
case Csubst: {
luaL_Buffer b;
luaL_buffinit(L, &b);
substcap(&b, cs);
luaL_pushresult(&b);
res = 1;
break;
}
case Cgroup: {
if (cs->cap->idx == 0) /* anonymous group? */
res = pushnestedvalues(cs, 0); /* add all nested values */
else { /* named group: add no values */
nextcap(cs); /* skip capture */
res = 0;
}
break;
}
case Cbackref: res = backrefcap(cs); break;
case Ctable: res = tablecap(cs); break;
case Cfunction: res = functioncap(cs); break;
case Cacc: res = accumulatorcap(cs); break;
case Cnum: res = numcap(cs); break;
case Cquery: res = querycap(cs); break;
case Cfold: res = foldcap(cs); break;
default: assert(0); res = 0;
}
cs->reclevel--;
return res;
}
/*
** Prepare a CapState structure and traverse the entire list of
** captures in the stack pushing its results. 's' is the subject
** string, 'r' is the final position of the match, and 'ptop'
** the index in the stack where some useful values were pushed.
** Returns the number of results pushed. (If the list produces no
** results, push the final position of the match.)
*/
int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop));
int n = 0;
/* printcaplist(capture); */
if (!isclosecap(capture)) { /* is there any capture? */
CapState cs;
cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0;
cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
cs.firstcap = lua_gettop(L) + 1; /* where first value (if any) will go */
do { /* collect their values */
n += pushcapture(&cs);
} while (!isclosecap(cs.cap));
assert(lua_gettop(L) - cs.firstcap == n - 1);
}
if (n == 0) { /* no capture values? */
lua_pushinteger(L, r - s + 1); /* return only end position */
n = 1;
}
return n;
}

View File

@@ -0,0 +1,86 @@
#if !defined(lpcap_h)
#define lpcap_h
#include "lptypes.h"
/* kinds of captures */
typedef enum CapKind {
Cclose, /* not used in trees */
Cposition,
Cconst, /* ktable[key] is Lua constant */
Cbackref, /* ktable[key] is "name" of group to get capture */
Carg, /* 'key' is arg's number */
Csimple, /* next node is pattern */
Ctable, /* next node is pattern */
Cfunction, /* ktable[key] is function; next node is pattern */
Cacc, /* ktable[key] is function; next node is pattern */
Cquery, /* ktable[key] is table; next node is pattern */
Cstring, /* ktable[key] is string; next node is pattern */
Cnum, /* numbered capture; 'key' is number of value to return */
Csubst, /* substitution capture; next node is pattern */
Cfold, /* ktable[key] is function; next node is pattern */
Cruntime, /* not used in trees (is uses another type for tree) */
Cgroup /* ktable[key] is group's "name" */
} CapKind;
/*
** An unsigned integer large enough to index any subject entirely.
** It can be size_t, but that will double the size of the array
** of captures in a 64-bit machine.
*/
#if !defined(Index_t)
typedef uint Index_t;
#endif
#define MAXINDT (~(Index_t)0)
typedef struct Capture {
Index_t index; /* subject position */
unsigned short idx; /* extra info (group name, arg index, etc.) */
byte kind; /* kind of capture */
byte siz; /* size of full capture + 1 (0 = not a full capture) */
} Capture;
typedef struct CapState {
Capture *cap; /* current capture */
Capture *ocap; /* (original) capture list */
lua_State *L;
int ptop; /* stack index of last argument to 'match' */
int firstcap; /* stack index of first capture pushed in the stack */
const char *s; /* original string */
int valuecached; /* value stored in cache slot */
int reclevel; /* recursion level */
} CapState;
#define captype(cap) ((cap)->kind)
#define isclosecap(cap) (captype(cap) == Cclose)
#define isopencap(cap) ((cap)->siz == 0)
/* true if c2 is (any number of levels) inside c1 */
#define capinside(c1,c2) \
(isopencap(c1) ? !isclosecap(c2) \
: (c2)->index < (c1)->index + (c1)->siz - 1)
/**
** Maximum number of captures to visit when looking for an 'open'.
*/
#define MAXLOP 20
int runtimecap (CapState *cs, Capture *close, const char *s, int *rem);
int getcaptures (lua_State *L, const char *s, const char *r, int ptop);
int finddyncap (Capture *cap, Capture *last);
#endif

1051
src/lua/lpeg-1.1.0/lpcode.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,36 @@
#if !defined(lpcode_h)
#define lpcode_h
#include "lua.h"
#include "lptypes.h"
#include "lptree.h"
#include "lpvm.h"
int checkaux (TTree *tree, int pred);
int fixedlen (TTree *tree);
int hascaptures (TTree *tree);
int lp_gc (lua_State *L);
Instruction *compile (lua_State *L, Pattern *p, uint size);
void freecode (lua_State *L, Pattern *p);
int sizei (const Instruction *i);
#define PEnullable 0
#define PEnofail 1
/*
** nofail(t) implies that 't' cannot fail with any input
*/
#define nofail(t) checkaux(t, PEnofail)
/*
** (not nullable(t)) implies 't' cannot match without consuming
** something
*/
#define nullable(t) checkaux(t, PEnullable)
#endif

110
src/lua/lpeg-1.1.0/lpcset.c Normal file
View File

@@ -0,0 +1,110 @@
#include "lptypes.h"
#include "lpcset.h"
/*
** Add to 'c' the index of the (only) bit set in byte 'b'
*/
static int onlybit (int c, int b) {
if ((b & 0xF0) != 0) { c += 4; b >>= 4; }
if ((b & 0x0C) != 0) { c += 2; b >>= 2; }
if ((b & 0x02) != 0) { c += 1; }
return c;
}
/*
** Check whether a charset is empty (returns IFail), singleton (IChar),
** full (IAny), or none of those (ISet). When singleton, 'info.offset'
** returns which character it is. When generic set, 'info' returns
** information about its range.
*/
Opcode charsettype (const byte *cs, charsetinfo *info) {
int low0, low1, high0, high1;
for (low1 = 0; low1 < CHARSETSIZE && cs[low1] == 0; low1++)
/* find lowest byte with a 1-bit */;
if (low1 == CHARSETSIZE)
return IFail; /* no characters in set */
for (high1 = CHARSETSIZE - 1; cs[high1] == 0; high1--)
/* find highest byte with a 1-bit; low1 is a sentinel */;
if (low1 == high1) { /* only one byte with 1-bits? */
int b = cs[low1];
if ((b & (b - 1)) == 0) { /* does byte has only one 1-bit? */
info->offset = onlybit(low1 * BITSPERCHAR, b); /* get that bit */
return IChar; /* single character */
}
}
for (low0 = 0; low0 < CHARSETSIZE && cs[low0] == 0xFF; low0++)
/* find lowest byte with a 0-bit */;
if (low0 == CHARSETSIZE)
return IAny; /* set has all bits set */
for (high0 = CHARSETSIZE - 1; cs[high0] == 0xFF; high0--)
/* find highest byte with a 0-bit; low0 is a sentinel */;
if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */
info->offset = low1;
info->size = high1 - low1 + 1;
info->deflt = 0; /* all discharged bits were 0 */
}
else {
info->offset = low0;
info->size = high0 - low0 + 1;
info->deflt = 0xFF; /* all discharged bits were 1 */
}
info->cs = cs + info->offset;
return ISet;
}
/*
** Get a byte from a compact charset. If index is inside the charset
** range, get the byte from the supporting charset (correcting it
** by the offset). Otherwise, return the default for the set.
*/
byte getbytefromcharset (const charsetinfo *info, int index) {
if (index < info->size)
return info->cs[index];
else return info->deflt;
}
/*
** If 'tree' is a 'char' pattern (TSet, TChar, TAny, TFalse), convert it
** into a charset and return 1; else return 0.
*/
int tocharset (TTree *tree, Charset *cs) {
switch (tree->tag) {
case TChar: { /* only one char */
assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX);
clearset(cs->cs); /* erase all chars */
setchar(cs->cs, tree->u.n); /* add that one */
return 1;
}
case TAny: {
fillset(cs->cs, 0xFF); /* add all characters to the set */
return 1;
}
case TFalse: {
clearset(cs->cs); /* empty set */
return 1;
}
case TSet: { /* fill set */
int i;
fillset(cs->cs, tree->u.set.deflt);
for (i = 0; i < tree->u.set.size; i++)
cs->cs[tree->u.set.offset + i] = treebuffer(tree)[i];
return 1;
}
default: return 0;
}
}
void tree2cset (TTree *tree, charsetinfo *info) {
assert(tree->tag == TSet);
info->offset = tree->u.set.offset;
info->size = tree->u.set.size;
info->deflt = tree->u.set.deflt;
info->cs = treebuffer(tree);
}

View File

@@ -0,0 +1,30 @@
#if !defined(lpset_h)
#define lpset_h
#include "lpcset.h"
#include "lpcode.h"
#include "lptree.h"
/*
** Extra information for the result of 'charsettype'. When result is
** IChar, 'offset' is the character. When result is ISet, 'cs' is the
** supporting bit array (with offset included), 'offset' is the offset
** (in bytes), 'size' is the size (in bytes), and 'delt' is the default
** value for bytes outside the set.
*/
typedef struct {
const byte *cs;
int offset;
int size;
int deflt;
} charsetinfo;
int tocharset (TTree *tree, Charset *cs);
Opcode charsettype (const byte *cs, charsetinfo *info);
byte getbytefromcharset (const charsetinfo *info, int index);
void tree2cset (TTree *tree, charsetinfo *info);
#endif

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.8 KiB

1430
src/lua/lpeg-1.1.0/lpeg.html Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,298 @@
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include "lptypes.h"
#include "lpprint.h"
#include "lpcode.h"
#if defined(LPEG_DEBUG)
/*
** {======================================================
** Printing patterns (for debugging)
** =======================================================
*/
void printcharset (const byte *st) {
int i;
printf("[");
for (i = 0; i <= UCHAR_MAX; i++) {
int first = i;
while (i <= UCHAR_MAX && testchar(st, i)) i++;
if (i - 1 == first) /* unary range? */
printf("(%02x)", first);
else if (i - 1 > first) /* non-empty range? */
printf("(%02x-%02x)", first, i - 1);
}
printf("]");
}
static void printIcharset (const Instruction *inst, const byte *buff) {
byte cs[CHARSETSIZE];
int i;
printf("(%02x-%d) ", inst->i.aux2.set.offset, inst->i.aux2.set.size);
clearset(cs);
for (i = 0; i < CHARSETSIZE * 8; i++) {
if (charinset(inst, buff, i))
setchar(cs, i);
}
printcharset(cs);
}
static void printTcharset (TTree *tree) {
byte cs[CHARSETSIZE];
int i;
printf("(%02x-%d) ", tree->u.set.offset, tree->u.set.size);
fillset(cs, tree->u.set.deflt);
for (i = 0; i < tree->u.set.size; i++)
cs[tree->u.set.offset + i] = treebuffer(tree)[i];
printcharset(cs);
}
static const char *capkind (int kind) {
const char *const modes[] = {
"close", "position", "constant", "backref",
"argument", "simple", "table", "function", "accumulator",
"query", "string", "num", "substitution", "fold",
"runtime", "group"};
return modes[kind];
}
static void printjmp (const Instruction *op, const Instruction *p) {
printf("-> %d", (int)(p + (p + 1)->offset - op));
}
void printinst (const Instruction *op, const Instruction *p) {
const char *const names[] = {
"any", "char", "set",
"testany", "testchar", "testset",
"span", "utf-range", "behind",
"ret", "end",
"choice", "jmp", "call", "open_call",
"commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
"fullcapture", "opencapture", "closecapture", "closeruntime",
"--"
};
printf("%02ld: %s ", (long)(p - op), names[p->i.code]);
switch ((Opcode)p->i.code) {
case IChar: {
printf("'%c' (%02x)", p->i.aux1, p->i.aux1);
break;
}
case ITestChar: {
printf("'%c' (%02x)", p->i.aux1, p->i.aux1); printjmp(op, p);
break;
}
case IUTFR: {
printf("%d - %d", p[1].offset, utf_to(p));
break;
}
case IFullCapture: {
printf("%s (size = %d) (idx = %d)",
capkind(getkind(p)), getoff(p), p->i.aux2.key);
break;
}
case IOpenCapture: {
printf("%s (idx = %d)", capkind(getkind(p)), p->i.aux2.key);
break;
}
case ISet: {
printIcharset(p, (p+1)->buff);
break;
}
case ITestSet: {
printIcharset(p, (p+2)->buff); printjmp(op, p);
break;
}
case ISpan: {
printIcharset(p, (p+1)->buff);
break;
}
case IOpenCall: {
printf("-> %d", (p + 1)->offset);
break;
}
case IBehind: {
printf("%d", p->i.aux1);
break;
}
case IJmp: case ICall: case ICommit: case IChoice:
case IPartialCommit: case IBackCommit: case ITestAny: {
printjmp(op, p);
break;
}
default: break;
}
printf("\n");
}
void printpatt (Instruction *p) {
Instruction *op = p;
uint n = op[-1].codesize - 1;
while (p < op + n) {
printinst(op, p);
p += sizei(p);
}
}
static void printcap (Capture *cap, int ident) {
while (ident--) printf(" ");
printf("%s (idx: %d - size: %d) -> %lu (%p)\n",
capkind(cap->kind), cap->idx, cap->siz, (long)cap->index, (void*)cap);
}
/*
** Print a capture and its nested captures
*/
static Capture *printcap2close (Capture *cap, int ident) {
Capture *head = cap++;
printcap(head, ident); /* print head capture */
while (capinside(head, cap))
cap = printcap2close(cap, ident + 2); /* print nested captures */
if (isopencap(head)) {
assert(isclosecap(cap));
printcap(cap++, ident); /* print and skip close capture */
}
return cap;
}
void printcaplist (Capture *cap) {
{ /* for debugging, print first a raw list of captures */
Capture *c = cap;
while (c->index != MAXINDT) { printcap(c, 0); c++; }
}
printf(">======\n");
while (!isclosecap(cap))
cap = printcap2close(cap, 0);
printf("=======\n");
}
/* }====================================================== */
/*
** {======================================================
** Printing trees (for debugging)
** =======================================================
*/
static const char *tagnames[] = {
"char", "set", "any",
"true", "false", "utf8.range",
"rep",
"seq", "choice",
"not", "and",
"call", "opencall", "rule", "xinfo", "grammar",
"behind",
"capture", "run-time"
};
void printtree (TTree *tree, int ident) {
int i;
int sibs = numsiblings[tree->tag];
for (i = 0; i < ident; i++) printf(" ");
printf("%s", tagnames[tree->tag]);
switch (tree->tag) {
case TChar: {
int c = tree->u.n;
if (isprint(c))
printf(" '%c'\n", c);
else
printf(" (%02X)\n", c);
break;
}
case TSet: {
printTcharset(tree);
printf("\n");
break;
}
case TUTFR: {
assert(sib1(tree)->tag == TXInfo);
printf(" %d (%02x %d) - %d (%02x %d) \n",
tree->u.n, tree->key, tree->cap,
sib1(tree)->u.n, sib1(tree)->key, sib1(tree)->cap);
break;
}
case TOpenCall: case TCall: {
assert(sib1(sib2(tree))->tag == TXInfo);
printf(" key: %d (rule: %d)\n", tree->key, sib1(sib2(tree))->u.n);
break;
}
case TBehind: {
printf(" %d\n", tree->u.n);
break;
}
case TCapture: {
printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key);
break;
}
case TRule: {
printf(" key: %d\n", tree->key);
sibs = 1; /* do not print 'sib2' (next rule) as a sibling */
break;
}
case TXInfo: {
printf(" n: %d\n", tree->u.n);
break;
}
case TGrammar: {
TTree *rule = sib1(tree);
printf(" %d\n", tree->u.n); /* number of rules */
for (i = 0; i < tree->u.n; i++) {
printtree(rule, ident + 2);
rule = sib2(rule);
}
assert(rule->tag == TTrue); /* sentinel */
sibs = 0; /* siblings already handled */
break;
}
default:
printf("\n");
break;
}
if (sibs >= 1) {
printtree(sib1(tree), ident + 2);
if (sibs >= 2)
printtree(sib2(tree), ident + 2);
}
}
void printktable (lua_State *L, int idx) {
int n, i;
lua_getuservalue(L, idx);
if (lua_isnil(L, -1)) /* no ktable? */
return;
n = lua_rawlen(L, -1);
printf("[");
for (i = 1; i <= n; i++) {
printf("%d = ", i);
lua_rawgeti(L, -1, i);
if (lua_isstring(L, -1))
printf("%s ", lua_tostring(L, -1));
else
printf("%s ", lua_typename(L, lua_type(L, -1)));
lua_pop(L, 1);
}
printf("]\n");
/* leave ktable at the stack */
}
/* }====================================================== */
#endif

View File

@@ -0,0 +1,32 @@
#if !defined(lpprint_h)
#define lpprint_h
#include "lptree.h"
#include "lpvm.h"
#if defined(LPEG_DEBUG)
void printpatt (Instruction *p);
void printtree (TTree *tree, int ident);
void printktable (lua_State *L, int idx);
void printcharset (const byte *st);
void printcaplist (Capture *cap);
void printinst (const Instruction *op, const Instruction *p);
#else
#define printktable(L,idx) \
luaL_error(L, "function only implemented in debug mode")
#define printtree(tree,i) \
luaL_error(L, "function only implemented in debug mode")
#define printpatt(p) \
luaL_error(L, "function only implemented in debug mode")
#endif
#endif

1399
src/lua/lpeg-1.1.0/lptree.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,92 @@
#if !defined(lptree_h)
#define lptree_h
#include "lptypes.h"
/*
** types of trees
*/
typedef enum TTag {
TChar = 0, /* 'n' = char */
TSet, /* the set is encoded in 'u.set' and the next 'u.set.size' bytes */
TAny,
TTrue,
TFalse,
TUTFR, /* range of UTF-8 codepoints; 'n' has initial codepoint;
'cap' has length; 'key' has first byte;
extra info is similar for end codepoint */
TRep, /* 'sib1'* */
TSeq, /* 'sib1' 'sib2' */
TChoice, /* 'sib1' / 'sib2' */
TNot, /* !'sib1' */
TAnd, /* &'sib1' */
TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */
TOpenCall, /* ktable[key] is rule's key */
TRule, /* ktable[key] is rule's key (but key == 0 for unused rules);
'sib1' is rule's pattern pre-rule; 'sib2' is next rule;
extra info 'n' is rule's sequential number */
TXInfo, /* extra info */
TGrammar, /* 'sib1' is initial (and first) rule */
TBehind, /* 'sib1' is pattern, 'n' is how much to go back */
TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind');
ktable[key] is Lua value associated with capture;
'sib1' is capture body */
TRunTime /* run-time capture: 'key' is Lua function;
'sib1' is capture body */
} TTag;
/*
** Tree trees
** The first child of a tree (if there is one) is immediately after
** the tree. A reference to a second child (ps) is its position
** relative to the position of the tree itself.
*/
typedef struct TTree {
byte tag;
byte cap; /* kind of capture (if it is a capture) */
unsigned short key; /* key in ktable for Lua data (0 if no key) */
union {
int ps; /* occasional second child */
int n; /* occasional counter */
struct {
byte offset; /* compact set offset (in bytes) */
byte size; /* compact set size (in bytes) */
byte deflt; /* default value */
byte bitmap[1]; /* bitmap (open array) */
} set; /* for compact sets */
} u;
} TTree;
/* access to charset */
#define treebuffer(t) ((t)->u.set.bitmap)
/*
** A complete pattern has its tree plus, if already compiled,
** its corresponding code
*/
typedef struct Pattern {
union Instruction *code;
TTree tree[1];
} Pattern;
/* number of children for each tree */
extern const byte numsiblings[];
/* access to children */
#define sib1(t) ((t) + 1)
#define sib2(t) ((t) + (t)->u.ps)
#endif

View File

@@ -0,0 +1,150 @@
/*
** LPeg - PEG pattern matching for Lua
** Copyright 2007-2023, Lua.org & PUC-Rio (see 'lpeg.html' for license)
** written by Roberto Ierusalimschy
*/
#if !defined(lptypes_h)
#define lptypes_h
#include <assert.h>
#include <limits.h>
#include <string.h>
#include "lua.h"
#define VERSION "1.1.0"
#define PATTERN_T "lpeg-pattern"
#define MAXSTACKIDX "lpeg-maxstack"
/*
** compatibility with Lua 5.1
*/
#if (LUA_VERSION_NUM == 501)
#define lp_equal lua_equal
#define lua_getuservalue lua_getfenv
#define lua_setuservalue lua_setfenv
#define lua_rawlen lua_objlen
#define luaL_setfuncs(L,f,n) luaL_register(L,NULL,f)
#define luaL_newlib(L,f) luaL_register(L,"lpeg",f)
typedef size_t lua_Unsigned;
#endif
#if !defined(lp_equal)
#define lp_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ)
#endif
/* default maximum size for call/backtrack stack */
#if !defined(MAXBACK)
#define MAXBACK 400
#endif
/* maximum number of rules in a grammar (limited by 'unsigned short') */
#if !defined(MAXRULES)
#define MAXRULES 1000
#endif
/* initial size for capture's list */
#define INITCAPSIZE 32
/* index, on Lua stack, for subject */
#define SUBJIDX 2
/* number of fixed arguments to 'match' (before capture arguments) */
#define FIXEDARGS 3
/* index, on Lua stack, for capture list */
#define caplistidx(ptop) ((ptop) + 2)
/* index, on Lua stack, for pattern's ktable */
#define ktableidx(ptop) ((ptop) + 3)
/* index, on Lua stack, for backtracking stack */
#define stackidx(ptop) ((ptop) + 4)
typedef unsigned char byte;
typedef unsigned int uint;
#define BITSPERCHAR 8
#define CHARSETSIZE ((UCHAR_MAX/BITSPERCHAR) + 1)
typedef struct Charset {
byte cs[CHARSETSIZE];
} Charset;
#define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} }
#define fillset(s,c) memset(s,c,CHARSETSIZE)
#define clearset(s) fillset(s,0)
/* number of slots needed for 'n' bytes */
#define bytes2slots(n) (((n) - 1u) / (uint)sizeof(TTree) + 1u)
/* set 'b' bit in charset 'cs' */
#define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7)))
/*
** in capture instructions, 'kind' of capture and its offset are
** packed in field 'aux', 4 bits for each
*/
#define getkind(op) ((op)->i.aux1 & 0xF)
#define getoff(op) (((op)->i.aux1 >> 4) & 0xF)
#define joinkindoff(k,o) ((k) | ((o) << 4))
#define MAXOFF 0xF
#define MAXAUX 0xFF
/* maximum number of bytes to look behind */
#define MAXBEHIND MAXAUX
/* maximum size (in elements) for a pattern */
#define MAXPATTSIZE (SHRT_MAX - 10)
/* size (in instructions) for l bytes (l > 0) */
#define instsize(l) ((int)(((l) + (uint)sizeof(Instruction) - 1u) \
/ (uint)sizeof(Instruction)))
/* size (in elements) for a ISet instruction */
#define CHARSETINSTSIZE (1 + instsize(CHARSETSIZE))
/* size (in elements) for a IFunc instruction */
#define funcinstsize(p) ((p)->i.aux + 2)
#define testchar(st,c) ((((uint)(st)[((c) >> 3)]) >> ((c) & 7)) & 1)
#endif

455
src/lua/lpeg-1.1.0/lpvm.c Normal file
View File

@@ -0,0 +1,455 @@
#include <limits.h>
#include <string.h>
#include "lua.h"
#include "lauxlib.h"
#include "lpcap.h"
#include "lptypes.h"
#include "lpvm.h"
#include "lpprint.h"
/* initial size for call/backtrack stack */
#if !defined(INITBACK)
#define INITBACK MAXBACK
#endif
#define getoffset(p) (((p) + 1)->offset)
static const Instruction giveup = {{IGiveup, 0, {0}}};
int charinset (const Instruction *i, const byte *buff, uint c) {
c -= i->i.aux2.set.offset;
if (c >= ((uint)i->i.aux2.set.size /* size in instructions... */
* (uint)sizeof(Instruction) /* in bytes... */
* 8u)) /* in bits */
return i->i.aux1; /* out of range; return default value */
return testchar(buff, c);
}
/*
** Decode one UTF-8 sequence, returning NULL if byte sequence is invalid.
*/
static const char *utf8_decode (const char *o, int *val) {
static const uint limits[] = {0xFF, 0x7F, 0x7FF, 0xFFFFu};
const unsigned char *s = (const unsigned char *)o;
uint c = s[0]; /* first byte */
uint res = 0; /* final result */
if (c < 0x80) /* ascii? */
res = c;
else {
int count = 0; /* to count number of continuation bytes */
while (c & 0x40) { /* still have continuation bytes? */
int cc = s[++count]; /* read next byte */
if ((cc & 0xC0) != 0x80) /* not a continuation byte? */
return NULL; /* invalid byte sequence */
res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */
c <<= 1; /* to test next bit */
}
res |= (c & 0x7F) << (count * 5); /* add first byte */
if (count > 3 || res > 0x10FFFFu || res <= limits[count])
return NULL; /* invalid byte sequence */
s += count; /* skip continuation bytes read */
}
*val = res;
return (const char *)s + 1; /* +1 to include first byte */
}
/*
** {======================================================
** Virtual Machine
** =======================================================
*/
typedef struct Stack {
const char *s; /* saved position (or NULL for calls) */
const Instruction *p; /* next instruction */
int caplevel;
} Stack;
#define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop)))
/*
** Ensures the size of array 'capture' (with size '*capsize' and
** 'captop' elements being used) is enough to accomodate 'n' extra
** elements plus one. (Because several opcodes add stuff to the capture
** array, it is simpler to ensure the array always has at least one free
** slot upfront and check its size later.)
*/
/* new size in number of elements cannot overflow integers, and new
size in bytes cannot overflow size_t. */
#define MAXNEWSIZE \
(((size_t)INT_MAX) <= (~(size_t)0 / sizeof(Capture)) ? \
((size_t)INT_MAX) : (~(size_t)0 / sizeof(Capture)))
static Capture *growcap (lua_State *L, Capture *capture, int *capsize,
int captop, int n, int ptop) {
if (*capsize - captop > n)
return capture; /* no need to grow array */
else { /* must grow */
Capture *newc;
uint newsize = captop + n + 1; /* minimum size needed */
if (newsize < (MAXNEWSIZE / 3) * 2)
newsize += newsize / 2; /* 1.5 that size, if not too big */
else if (newsize < (MAXNEWSIZE / 9) * 8)
newsize += newsize / 8; /* else, try 9/8 that size */
else
luaL_error(L, "too many captures");
newc = (Capture *)lua_newuserdata(L, newsize * sizeof(Capture));
memcpy(newc, capture, captop * sizeof(Capture));
*capsize = newsize;
lua_replace(L, caplistidx(ptop));
return newc;
}
}
/*
** Double the size of the stack
*/
static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
Stack *stack = getstackbase(L, ptop);
Stack *newstack;
int n = *stacklimit - stack; /* current stack size */
int max, newn;
lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
max = lua_tointeger(L, -1); /* maximum allowed size */
lua_pop(L, 1);
if (n >= max) /* already at maximum size? */
luaL_error(L, "backtrack stack overflow (current limit is %d)", max);
newn = 2 * n; /* new size */
if (newn > max) newn = max;
newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack));
memcpy(newstack, stack, n * sizeof(Stack));
lua_replace(L, stackidx(ptop));
*stacklimit = newstack + newn;
return newstack + n; /* return next position */
}
/*
** Interpret the result of a dynamic capture: false -> fail;
** true -> keep current position; number -> next position.
** Return new subject position. 'fr' is stack index where
** is the result; 'curr' is current subject position; 'limit'
** is subject's size.
*/
static int resdyncaptures (lua_State *L, int fr, int curr, int limit) {
lua_Integer res;
if (!lua_toboolean(L, fr)) { /* false value? */
lua_settop(L, fr - 1); /* remove results */
return -1; /* and fail */
}
else if (lua_isboolean(L, fr)) /* true? */
res = curr; /* keep current position */
else {
res = lua_tointeger(L, fr) - 1; /* new position */
if (res < curr || res > limit)
luaL_error(L, "invalid position returned by match-time capture");
}
lua_remove(L, fr); /* remove first result (offset) */
return res;
}
/*
** Add capture values returned by a dynamic capture to the list
** 'capture', nested inside a group. 'fd' indexes the first capture
** value, 'n' is the number of values (at least 1). The open group
** capture is already in 'capture', before the place for the new entries.
*/
static void adddyncaptures (Index_t index, Capture *capture, int n, int fd) {
int i;
assert(capture[-1].kind == Cgroup && capture[-1].siz == 0);
capture[-1].idx = 0; /* make group capture an anonymous group */
for (i = 0; i < n; i++) { /* add runtime captures */
capture[i].kind = Cruntime;
capture[i].siz = 1; /* mark it as closed */
capture[i].idx = fd + i; /* stack index of capture value */
capture[i].index = index;
}
capture[n].kind = Cclose; /* close group */
capture[n].siz = 1;
capture[n].index = index;
}
/*
** Remove dynamic captures from the Lua stack (called in case of failure)
*/
static int removedyncap (lua_State *L, Capture *capture,
int level, int last) {
int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */
int top = lua_gettop(L);
if (id == 0) return 0; /* no dynamic captures? */
lua_settop(L, id - 1); /* remove captures */
return top - id + 1; /* number of values removed */
}
/*
** Find the corresponding 'open' capture before 'cap', when that capture
** can become a full capture. If a full capture c1 is followed by an
** empty capture c2, there is no way to know whether c2 is inside
** c1. So, full captures can enclose only captures that start *before*
** its end.
*/
static Capture *findopen (Capture *cap, Index_t currindex) {
int i;
cap--; /* check last capture */
/* Must it be inside current one, but starts where current one ends? */
if (!isopencap(cap) && cap->index == currindex)
return NULL; /* current one cannot be a full capture */
/* else, look for an 'open' capture */
for (i = 0; i < MAXLOP; i++, cap--) {
if (currindex - cap->index >= UCHAR_MAX)
return NULL; /* capture too long for a full capture */
else if (isopencap(cap)) /* open capture? */
return cap; /* that's the one to be closed */
else if (cap->kind == Cclose)
return NULL; /* a full capture should not nest a non-full one */
}
return NULL; /* not found within allowed search limit */
}
/*
** Opcode interpreter
*/
const char *match (lua_State *L, const char *o, const char *s, const char *e,
Instruction *op, Capture *capture, int ptop) {
Stack stackbase[INITBACK];
Stack *stacklimit = stackbase + INITBACK;
Stack *stack = stackbase; /* point to first empty slot in stack */
int capsize = INITCAPSIZE;
int captop = 0; /* point to first empty slot in captures */
int ndyncap = 0; /* number of dynamic captures (in Lua stack) */
const Instruction *p = op; /* current instruction */
stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++;
lua_pushlightuserdata(L, stackbase);
for (;;) {
#if defined(DEBUG)
printf("-------------------------------------\n");
printcaplist(capture, capture + captop);
printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ",
s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop);
printinst(op, p);
#endif
assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop);
switch ((Opcode)p->i.code) {
case IEnd: {
assert(stack == getstackbase(L, ptop) + 1);
capture[captop].kind = Cclose;
capture[captop].index = MAXINDT;
return s;
}
case IGiveup: {
assert(stack == getstackbase(L, ptop));
return NULL;
}
case IRet: {
assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL);
p = (--stack)->p;
continue;
}
case IAny: {
if (s < e) { p++; s++; }
else goto fail;
continue;
}
case IUTFR: {
int codepoint;
if (s >= e)
goto fail;
s = utf8_decode (s, &codepoint);
if (s && p[1].offset <= codepoint && codepoint <= utf_to(p))
p += 2;
else
goto fail;
continue;
}
case ITestAny: {
if (s < e) p += 2;
else p += getoffset(p);
continue;
}
case IChar: {
if ((byte)*s == p->i.aux1 && s < e) { p++; s++; }
else goto fail;
continue;
}
case ITestChar: {
if ((byte)*s == p->i.aux1 && s < e) p += 2;
else p += getoffset(p);
continue;
}
case ISet: {
uint c = (byte)*s;
if (charinset(p, (p+1)->buff, c) && s < e)
{ p += 1 + p->i.aux2.set.size; s++; }
else goto fail;
continue;
}
case ITestSet: {
uint c = (byte)*s;
if (charinset(p, (p + 2)->buff, c) && s < e)
p += 2 + p->i.aux2.set.size;
else p += getoffset(p);
continue;
}
case IBehind: {
int n = p->i.aux1;
if (n > s - o) goto fail;
s -= n; p++;
continue;
}
case ISpan: {
for (; s < e; s++) {
uint c = (byte)*s;
if (!charinset(p, (p+1)->buff, c)) break;
}
p += 1 + p->i.aux2.set.size;
continue;
}
case IJmp: {
p += getoffset(p);
continue;
}
case IChoice: {
if (stack == stacklimit)
stack = doublestack(L, &stacklimit, ptop);
stack->p = p + getoffset(p);
stack->s = s;
stack->caplevel = captop;
stack++;
p += 2;
continue;
}
case ICall: {
if (stack == stacklimit)
stack = doublestack(L, &stacklimit, ptop);
stack->s = NULL;
stack->p = p + 2; /* save return address */
stack++;
p += getoffset(p);
continue;
}
case ICommit: {
assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
stack--;
p += getoffset(p);
continue;
}
case IPartialCommit: {
assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
(stack - 1)->s = s;
(stack - 1)->caplevel = captop;
p += getoffset(p);
continue;
}
case IBackCommit: {
assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
s = (--stack)->s;
if (ndyncap > 0) /* are there matchtime captures? */
ndyncap -= removedyncap(L, capture, stack->caplevel, captop);
captop = stack->caplevel;
p += getoffset(p);
continue;
}
case IFailTwice:
assert(stack > getstackbase(L, ptop));
stack--;
/* FALLTHROUGH */
case IFail:
fail: { /* pattern failed: try to backtrack */
do { /* remove pending calls */
assert(stack > getstackbase(L, ptop));
s = (--stack)->s;
} while (s == NULL);
if (ndyncap > 0) /* is there matchtime captures? */
ndyncap -= removedyncap(L, capture, stack->caplevel, captop);
captop = stack->caplevel;
p = stack->p;
#if defined(DEBUG)
printf("**FAIL**\n");
#endif
continue;
}
case ICloseRunTime: {
CapState cs;
int rem, res, n;
int fr = lua_gettop(L) + 1; /* stack index of first result */
cs.reclevel = 0; cs.L = L;
cs.s = o; cs.ocap = capture; cs.ptop = ptop;
n = runtimecap(&cs, capture + captop, s, &rem); /* call function */
captop -= n; /* remove nested captures */
ndyncap -= rem; /* update number of dynamic captures */
fr -= rem; /* 'rem' items were popped from Lua stack */
res = resdyncaptures(L, fr, s - o, e - o); /* get result */
if (res == -1) /* fail? */
goto fail;
s = o + res; /* else update current position */
n = lua_gettop(L) - fr + 1; /* number of new captures */
ndyncap += n; /* update number of dynamic captures */
if (n == 0) /* no new captures? */
captop--; /* remove open group */
else { /* new captures; keep original open group */
if (fr + n >= SHRT_MAX)
luaL_error(L, "too many results in match-time capture");
/* add new captures + close group to 'capture' list */
capture = growcap(L, capture, &capsize, captop, n + 1, ptop);
adddyncaptures(s - o, capture + captop, n, fr);
captop += n + 1; /* new captures + close group */
}
p++;
continue;
}
case ICloseCapture: {
Capture *open = findopen(capture + captop, s - o);
assert(captop > 0);
if (open) { /* if possible, turn capture into a full capture */
open->siz = (s - o) - open->index + 1;
p++;
continue;
}
else { /* must create a close capture */
capture[captop].siz = 1; /* mark entry as closed */
capture[captop].index = s - o;
goto pushcapture;
}
}
case IOpenCapture:
capture[captop].siz = 0; /* mark entry as open */
capture[captop].index = s - o;
goto pushcapture;
case IFullCapture:
capture[captop].siz = getoff(p) + 1; /* save capture size */
capture[captop].index = s - o - getoff(p);
/* goto pushcapture; */
pushcapture: {
capture[captop].idx = p->i.aux2.key;
capture[captop].kind = getkind(p);
captop++;
capture = growcap(L, capture, &capsize, captop, 0, ptop);
p++;
continue;
}
default: assert(0); return NULL;
}
}
}
/* }====================================================== */

79
src/lua/lpeg-1.1.0/lpvm.h Normal file
View File

@@ -0,0 +1,79 @@
#if !defined(lpvm_h)
#define lpvm_h
#include "lpcap.h"
/*
** About Character sets in instructions: a set is a bit map with an
** initial offset, in bits, and a size, in number of instructions.
** aux1 has the default value for the bits outsize that range.
*/
/* Virtual Machine's instructions */
typedef enum Opcode {
IAny, /* if no char, fail */
IChar, /* if char != aux1, fail */
ISet, /* if char not in set, fail */
ITestAny, /* in no char, jump to 'offset' */
ITestChar, /* if char != aux1, jump to 'offset' */
ITestSet, /* if char not in set, jump to 'offset' */
ISpan, /* read a span of chars in set */
IUTFR, /* if codepoint not in range [offset, utf_to], fail */
IBehind, /* walk back 'aux1' characters (fail if not possible) */
IRet, /* return from a rule */
IEnd, /* end of pattern */
IChoice, /* stack a choice; next fail will jump to 'offset' */
IJmp, /* jump to 'offset' */
ICall, /* call rule at 'offset' */
IOpenCall, /* call rule number 'key' (must be closed to a ICall) */
ICommit, /* pop choice and jump to 'offset' */
IPartialCommit, /* update top choice to current position and jump */
IBackCommit, /* backtrack like "fail" but jump to its own 'offset' */
IFailTwice, /* pop one choice and then fail */
IFail, /* go back to saved state on choice and jump to saved offset */
IGiveup, /* internal use */
IFullCapture, /* complete capture of last 'off' chars */
IOpenCapture, /* start a capture */
ICloseCapture,
ICloseRunTime,
IEmpty /* to fill empty slots left by optimizations */
} Opcode;
/*
** All array of instructions has a 'codesize' as its first element
** and is referred by a pointer to its second element, which is the
** first actual opcode.
*/
typedef union Instruction {
struct Inst {
byte code;
byte aux1;
union {
short key;
struct {
byte offset;
byte size;
} set;
} aux2;
} i;
int offset;
uint codesize;
byte buff[1];
} Instruction;
/* extract 24-bit value from an instruction */
#define utf_to(inst) (((inst)->i.aux2.key << 8) | (inst)->i.aux1)
int charinset (const Instruction *i, const byte *buff, uint c);
const char *match (lua_State *L, const char *o, const char *s, const char *e,
Instruction *op, Capture *capture, int ptop);
#endif

View File

@@ -0,0 +1,59 @@
LIBNAME = lpeg
LUADIR = ./lua/
COPT = -O2 -DNDEBUG
# COPT = -O0 -DLPEG_DEBUG -g
CWARNS = -Wall -Wextra -pedantic \
-Waggregate-return \
-Wcast-align \
-Wcast-qual \
-Wdisabled-optimization \
-Wpointer-arith \
-Wshadow \
-Wredundant-decls \
-Wsign-compare \
-Wundef \
-Wwrite-strings \
-Wbad-function-cast \
-Wdeclaration-after-statement \
-Wmissing-prototypes \
-Wmissing-declarations \
-Wnested-externs \
-Wstrict-prototypes \
-Wc++-compat \
# -Wunreachable-code \
CFLAGS = $(CWARNS) $(COPT) -std=c99 -I$(LUADIR) -fPIC
CC = gcc
FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o lpcset.o
# For Linux
linux:
$(MAKE) lpeg.so "DLLFLAGS = -shared -fPIC"
# For Mac OS
macosx:
$(MAKE) lpeg.so "DLLFLAGS = -bundle -undefined dynamic_lookup"
lpeg.so: $(FILES)
env $(CC) $(DLLFLAGS) $(FILES) -o lpeg.so
$(FILES): makefile
test: test.lua re.lua lpeg.so
./test.lua
clean:
rm -f $(FILES) lpeg.so
lpcap.o: lpcap.c lpcap.h lptypes.h
lpcode.o: lpcode.c lptypes.h lpcode.h lptree.h lpvm.h lpcap.h lpcset.h
lpcset.o: lpcset.c lptypes.h lpcset.h lpcode.h lptree.h lpvm.h lpcap.h
lpprint.o: lpprint.c lptypes.h lpprint.h lptree.h lpvm.h lpcap.h lpcode.h
lptree.o: lptree.c lptypes.h lpcap.h lpcode.h lptree.h lpvm.h lpprint.h \
lpcset.h
lpvm.o: lpvm.c lpcap.h lptypes.h lpvm.h lpprint.h lptree.h

472
src/lua/lpeg-1.1.0/re.html Normal file
View File

@@ -0,0 +1,472 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"//www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
<title>LPeg.re - Regex syntax for LPEG</title>
<link rel="stylesheet"
href="//www.inf.puc-rio.br/~roberto/lpeg/doc.css"
type="text/css"/>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
</head>
<body>
<div id="container">
<div id="product">
<div id="product_logo">
<a href="//www.inf.puc-rio.br/~roberto/lpeg/">
<img alt="LPeg logo" src="lpeg-128.gif"/>
</a>
</div>
<div id="product_name"><big><strong>LPeg.re</strong></big></div>
<div id="product_description">
Regex syntax for LPEG
</div>
</div> <!-- id="product" -->
<div id="main">
<div id="navigation">
<h1>re</h1>
<ul>
<li><a href="#basic">Basic Constructions</a></li>
<li><a href="#func">Functions</a></li>
<li><a href="#ex">Some Examples</a></li>
<li><a href="#license">License</a></li>
</ul>
</li>
</ul>
</div> <!-- id="navigation" -->
<div id="content">
<h2><a name="basic"></a>The <code>re</code> Module</h2>
<p>
The <code>re</code> module
(provided by file <code>re.lua</code> in the distribution)
supports a somewhat conventional regex syntax
for pattern usage within <a href="lpeg.html">LPeg</a>.
</p>
<p>
The next table summarizes <code>re</code>'s syntax.
A <code>p</code> represents an arbitrary pattern;
<code>num</code> represents a number (<code>[0-9]+</code>);
<code>name</code> represents an identifier
(<code>[a-zA-Z][a-zA-Z0-9_]*</code>).
Constructions are listed in order of decreasing precedence.
<table border="1">
<tbody><tr><td><b>Syntax</b></td><td><b>Description</b></td></tr>
<tr><td><code>( p )</code></td> <td>grouping</td></tr>
<tr><td><code>&amp; p</code></td> <td>and predicate</td></tr>
<tr><td><code>! p</code></td> <td>not predicate</td></tr>
<tr><td><code>p1 p2</code></td> <td>concatenation</td></tr>
<tr><td><code>p1 / p2</code></td> <td>ordered choice</td></tr>
<tr><td><code>p ?</code></td> <td>optional match</td></tr>
<tr><td><code>p *</code></td> <td>zero or more repetitions</td></tr>
<tr><td><code>p +</code></td> <td>one or more repetitions</td></tr>
<tr><td><code>p^num</code></td>
<td>exactly <code>num</code> repetitions</td></tr>
<tr><td><code>p^+num</code></td>
<td>at least <code>num</code> repetitions</td></tr>
<tr><td><code>p^-num</code></td>
<td>at most <code>num</code> repetitions</td></tr>
<tr><td>(<code>name &lt;- p</code>)<sup>+</sup></td> <td>grammar</td></tr>
<tr><td><code>'string'</code></td> <td>literal string</td></tr>
<tr><td><code>"string"</code></td> <td>literal string</td></tr>
<tr><td><code>[class]</code></td> <td>character class</td></tr>
<tr><td><code>.</code></td> <td>any character</td></tr>
<tr><td><code>%name</code></td>
<td>pattern <code>defs[name]</code> or a pre-defined pattern</td></tr>
<tr><td><code>name</code></td><td>non terminal</td></tr>
<tr><td><code>&lt;name&gt;</code></td><td>non terminal</td></tr>
<tr><td><code>{}</code></td> <td>position capture</td></tr>
<tr><td><code>{ p }</code></td> <td>simple capture</td></tr>
<tr><td><code>{: p :}</code></td> <td>anonymous group capture</td></tr>
<tr><td><code>{:name: p :}</code></td> <td>named group capture</td></tr>
<tr><td><code>{~ p ~}</code></td> <td>substitution capture</td></tr>
<tr><td><code>{| p |}</code></td> <td>table capture</td></tr>
<tr><td><code>=name</code></td> <td>back reference</td></tr>
<tr><td><code>p -&gt; 'string'</code></td> <td>string capture</td></tr>
<tr><td><code>p -&gt; "string"</code></td> <td>string capture</td></tr>
<tr><td><code>p -&gt; num</code></td> <td>numbered capture</td></tr>
<tr><td><code>p -&gt; name</code></td> <td>function/query/string capture
equivalent to <code>p / defs[name]</code></td></tr>
<tr><td><code>p =&gt; name</code></td> <td>match-time capture
equivalent to <code>lpeg.Cmt(p, defs[name])</code></td></tr>
<tr><td><code>p ~&gt; name</code></td> <td>fold capture
(deprecated)</td></tr>
<tr><td><code>p &gt;&gt; name</code></td> <td>accumulator capture
equivalent to <code>(p % defs[name])</code></td></tr>
</tbody></table>
<p>
Any space appearing in a syntax description can be
replaced by zero or more space characters and Lua-style short comments
(<code>--</code> until end of line).
</p>
<p>
Character classes define sets of characters.
An initial <code>^</code> complements the resulting set.
A range <em>x</em><code>-</code><em>y</em> includes in the set
all characters with codes between the codes of <em>x</em> and <em>y</em>.
A pre-defined class <code>%</code><em>name</em> includes all
characters of that class.
A simple character includes itself in the set.
The only special characters inside a class are <code>^</code>
(special only if it is the first character);
<code>]</code>
(can be included in the set as the first character,
after the optional <code>^</code>);
<code>%</code> (special only if followed by a letter);
and <code>-</code>
(can be included in the set as the first or the last character).
</p>
<p>
Currently the pre-defined classes are similar to those from the
Lua's string library
(<code>%a</code> for letters,
<code>%A</code> for non letters, etc.).
There is also a class <code>%nl</code>
containing only the newline character,
which is particularly handy for grammars written inside long strings,
as long strings do not interpret escape sequences like <code>\n</code>.
</p>
<h2><a name="func">Functions</a></h2>
<h3><code>re.compile (string, [, defs])</code></h3>
<p>
Compiles the given string and
returns an equivalent LPeg pattern.
The given string may define either an expression or a grammar.
The optional <code>defs</code> table provides extra Lua values
to be used by the pattern.
</p>
<h3><code>re.find (subject, pattern [, init])</code></h3>
<p>
Searches the given pattern in the given subject.
If it finds a match,
returns the index where this occurrence starts and
the index where it ends.
Otherwise, returns nil.
</p>
<p>
An optional numeric argument <code>init</code> makes the search
starts at that position in the subject string.
As usual in Lua libraries,
a negative value counts from the end.
</p>
<h3><code>re.gsub (subject, pattern, replacement)</code></h3>
<p>
Does a <em>global substitution</em>,
replacing all occurrences of <code>pattern</code>
in the given <code>subject</code> by <code>replacement</code>.
<h3><code>re.match (subject, pattern)</code></h3>
<p>
Matches the given pattern against the given subject,
returning all captures.
</p>
<h3><code>re.updatelocale ()</code></h3>
<p>
Updates the pre-defined character classes to the current locale.
</p>
<h2><a name="ex">Some Examples</a></h2>
<h3>A complete simple program</h3>
<p>
The next code shows a simple complete Lua program using
the <code>re</code> module:
</p>
<pre class="example">
local re = require"re"
-- find the position of the first numeral in a string
print(re.find("the number 423 is odd", "[0-9]+")) --&gt; 12 14
-- returns all words in a string
print(re.match("the number 423 is odd", "({%a+} / .)*"))
--&gt; the number is odd
-- returns the first numeral in a string
print(re.match("the number 423 is odd", "s &lt;- {%d+} / . s"))
--&gt; 423
-- substitutes a dot for each vowel in a string
print(re.gsub("hello World", "[aeiou]", "."))
--&gt; h.ll. W.rld
</pre>
<h3>Balanced parentheses</h3>
<p>
The following call will produce the same pattern produced by the
Lua expression in the
<a href="lpeg.html#balanced">balanced parentheses</a> example:
</p>
<pre class="example">
b = re.compile[[ balanced &lt;- "(" ([^()] / balanced)* ")" ]]
</pre>
<h3>String reversal</h3>
<p>
The next example reverses a string:
</p>
<pre class="example">
rev = re.compile[[ R &lt;- (!.) -&gt; '' / ({.} R) -&gt; '%2%1']]
print(rev:match"0123456789") --&gt; 9876543210
</pre>
<h3>CSV decoder</h3>
<p>
The next example replicates the <a href="lpeg.html#CSV">CSV decoder</a>:
</p>
<pre class="example">
record = re.compile[[
record &lt;- {| field (',' field)* |} (%nl / !.)
field &lt;- escaped / nonescaped
nonescaped &lt;- { [^,"%nl]* }
escaped &lt;- '"' {~ ([^"] / '""' -&gt; '"')* ~} '"'
]]
</pre>
<h3>Lua's long strings</h3>
<p>
The next example matches Lua long strings:
</p>
<pre class="example">
c = re.compile([[
longstring &lt;- ('[' {:eq: '='* :} '[' close)
close &lt;- ']' =eq ']' / . close
]])
print(c:match'[==[]]===]]]]==]===[]') --&gt; 17
</pre>
<h3>Abstract Syntax Trees</h3>
<p>
This example shows a simple way to build an
abstract syntax tree (AST) for a given grammar.
To keep our example simple,
let us consider the following grammar
for lists of names:
</p>
<pre class="example">
p = re.compile[[
listname &lt;- (name s)*
name &lt;- [a-z][a-z]*
s &lt;- %s*
]]
</pre>
<p>
Now, we will add captures to build a corresponding AST.
As a first step, the pattern will build a table to
represent each non terminal;
terminals will be represented by their corresponding strings:
</p>
<pre class="example">
c = re.compile[[
listname &lt;- {| (name s)* |}
name &lt;- {| {[a-z][a-z]*} |}
s &lt;- %s*
]]
</pre>
<p>
Now, a match against <code>"hi hello bye"</code>
results in the table
<code>{{"hi"}, {"hello"}, {"bye"}}</code>.
</p>
<p>
For such a simple grammar,
this AST is more than enough;
actually, the tables around each single name
are already overkilling.
More complex grammars,
however, may need some more structure.
Specifically,
it would be useful if each table had
a <code>tag</code> field telling what non terminal
that table represents.
We can add such a tag using
<a href="lpeg.html#cap-g">named group captures</a>:
</p>
<pre class="example">
x = re.compile[[
listname <- {| {:tag: '' -> 'list':} (name s)* |}
name <- {| {:tag: '' -> 'id':} {[a-z][a-z]*} |}
s <- ' '*
]]
</pre>
<p>
With these group captures,
a match against <code>"hi hello bye"</code>
results in the following table:
</p>
<pre class="example">
{tag="list",
{tag="id", "hi"},
{tag="id", "hello"},
{tag="id", "bye"}
}
</pre>
<h3>Indented blocks</h3>
<p>
This example breaks indented blocks into tables,
respecting the indentation:
</p>
<pre class="example">
p = re.compile[[
block &lt;- {| {:ident:' '*:} line
((=ident !' ' line) / &amp;(=ident ' ') block)* |}
line &lt;- {[^%nl]*} %nl
]]
</pre>
<p>
As an example,
consider the following text:
</p>
<pre class="example">
t = p:match[[
first line
subline 1
subline 2
second line
third line
subline 3.1
subline 3.1.1
subline 3.2
]]
</pre>
<p>
The resulting table <code>t</code> will be like this:
</p>
<pre class="example">
{'first line'; {'subline 1'; 'subline 2'; ident = ' '};
'second line';
'third line'; { 'subline 3.1'; {'subline 3.1.1'; ident = ' '};
'subline 3.2'; ident = ' '};
ident = ''}
</pre>
<h3>Macro expander</h3>
<p>
This example implements a simple macro expander.
Macros must be defined as part of the pattern,
following some simple rules:
</p>
<pre class="example">
p = re.compile[[
text &lt;- {~ item* ~}
item &lt;- macro / [^()] / '(' item* ')'
arg &lt;- ' '* {~ (!',' item)* ~}
args &lt;- '(' arg (',' arg)* ')'
-- now we define some macros
macro &lt;- ('apply' args) -&gt; '%1(%2)'
/ ('add' args) -&gt; '%1 + %2'
/ ('mul' args) -&gt; '%1 * %2'
]]
print(p:match"add(mul(a,b), apply(f,x))") --&gt; a * b + f(x)
</pre>
<p>
A <code>text</code> is a sequence of items,
wherein we apply a substitution capture to expand any macros.
An <code>item</code> is either a macro,
any character different from parentheses,
or a parenthesized expression.
A macro argument (<code>arg</code>) is a sequence
of items different from a comma.
(Note that a comma may appear inside an item,
e.g., inside a parenthesized expression.)
Again we do a substitution capture to expand any macro
in the argument before expanding the outer macro.
<code>args</code> is a list of arguments separated by commas.
Finally we define the macros.
Each macro is a string substitution;
it replaces the macro name and its arguments by its corresponding string,
with each <code>%</code><em>n</em> replaced by the <em>n</em>-th argument.
</p>
<h3>Patterns</h3>
<p>
This example shows the complete syntax
of patterns accepted by <code>re</code>.
</p>
<pre class="example">
p = [=[
pattern &lt;- exp !.
exp &lt;- S (grammar / alternative)
alternative &lt;- seq ('/' S seq)*
seq &lt;- prefix*
prefix &lt;- '&amp;' S prefix / '!' S prefix / suffix
suffix &lt;- primary S (([+*?]
/ '^' [+-]? num
/ '-&gt;' S (string / '{}' / name)
/ '&gt&gt;' S name
/ '=&gt;' S name) S)*
primary &lt;- '(' exp ')' / string / class / defined
/ '{:' (name ':')? exp ':}'
/ '=' name
/ '{}'
/ '{~' exp '~}'
/ '{|' exp '|}'
/ '{' exp '}'
/ '.'
/ name S !arrow
/ '&lt;' name '&gt;' -- old-style non terminals
grammar &lt;- definition+
definition &lt;- name S arrow exp
class &lt;- '[' '^'? item (!']' item)* ']'
item &lt;- defined / range / .
range &lt;- . '-' [^]]
S &lt;- (%s / '--' [^%nl]*)* -- spaces and comments
name &lt;- [A-Za-z_][A-Za-z0-9_]*
arrow &lt;- '&lt;-'
num &lt;- [0-9]+
string &lt;- '"' [^"]* '"' / "'" [^']* "'"
defined &lt;- '%' name
]=]
print(re.match(p, p)) -- a self description must match itself
</pre>
<h2><a name="license">License</a></h2>
<p>
This module is part of the <a href="lpeg.html">LPeg</a> package and shares
its <a href="lpeg.html#license">license</a>.
</div> <!-- id="content" -->
</div> <!-- id="main" -->
</div> <!-- id="container" -->
</body>
</html>

270
src/lua/lpeg-1.1.0/re.lua Normal file
View File

@@ -0,0 +1,270 @@
--
-- Copyright 2007-2023, Lua.org & PUC-Rio (see 'lpeg.html' for license)
-- written by Roberto Ierusalimschy
--
-- imported functions and modules
local tonumber, type, print, error = tonumber, type, print, error
local setmetatable = setmetatable
local m = require"lpeg"
-- 'm' will be used to parse expressions, and 'mm' will be used to
-- create expressions; that is, 're' runs on 'm', creating patterns
-- on 'mm'
local mm = m
-- patterns' metatable
local mt = getmetatable(mm.P(0))
local version = _VERSION
-- No more global accesses after this point
_ENV = nil -- does no harm in Lua 5.1
local any = m.P(1)
-- Pre-defined names
local Predef = { nl = m.P"\n" }
local mem
local fmem
local gmem
local function updatelocale ()
mm.locale(Predef)
Predef.a = Predef.alpha
Predef.c = Predef.cntrl
Predef.d = Predef.digit
Predef.g = Predef.graph
Predef.l = Predef.lower
Predef.p = Predef.punct
Predef.s = Predef.space
Predef.u = Predef.upper
Predef.w = Predef.alnum
Predef.x = Predef.xdigit
Predef.A = any - Predef.a
Predef.C = any - Predef.c
Predef.D = any - Predef.d
Predef.G = any - Predef.g
Predef.L = any - Predef.l
Predef.P = any - Predef.p
Predef.S = any - Predef.s
Predef.U = any - Predef.u
Predef.W = any - Predef.w
Predef.X = any - Predef.x
mem = {} -- restart memoization
fmem = {}
gmem = {}
local mt = {__mode = "v"}
setmetatable(mem, mt)
setmetatable(fmem, mt)
setmetatable(gmem, mt)
end
updatelocale()
local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end)
local function patt_error (s, i)
local msg = (#s < i + 20) and s:sub(i)
or s:sub(i,i+20) .. "..."
msg = ("pattern error near '%s'"):format(msg)
error(msg, 2)
end
local function mult (p, n)
local np = mm.P(true)
while n >= 1 do
if n%2 >= 1 then np = np * p end
p = p * p
n = n/2
end
return np
end
local function equalcap (s, i, c)
if type(c) ~= "string" then return nil end
local e = #c + i
if s:sub(i, e - 1) == c then return e else return nil end
end
local S = (Predef.space + "--" * (any - Predef.nl)^0)^0
local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0
local arrow = S * "<-"
local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1
name = m.C(name)
-- a defined name only have meaning in a given environment
local Def = name * m.Carg(1)
local function getdef (id, defs)
local c = defs and defs[id]
if not c then error("undefined name: " .. id) end
return c
end
-- match a name and return a group of its corresponding definition
-- and 'f' (to be folded in 'Suffix')
local function defwithfunc (f)
return m.Cg(Def / getdef * m.Cc(f))
end
local num = m.C(m.R"09"^1) * S / tonumber
local String = "'" * m.C((any - "'")^0) * "'" +
'"' * m.C((any - '"')^0) * '"'
local defined = "%" * Def / function (c,Defs)
local cat = Defs and Defs[c] or Predef[c]
if not cat then error ("name '" .. c .. "' undefined") end
return cat
end
local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R
local item = (defined + Range + m.C(any)) / m.P
local Class =
"["
* (m.C(m.P"^"^-1)) -- optional complement symbol
* (item * ((item % mt.__add) - "]")^0) /
function (c, p) return c == "^" and any - p or p end
* "]"
local function adddef (t, k, exp)
if t[k] then
error("'"..k.."' already defined as a rule")
else
t[k] = exp
end
return t
end
local function firstdef (n, r) return adddef({n}, n, r) end
local function NT (n, b)
if not b then
error("rule '"..n.."' used outside a grammar")
else return mm.V(n)
end
end
local exp = m.P{ "Exp",
Exp = S * ( m.V"Grammar"
+ m.V"Seq" * ("/" * S * m.V"Seq" % mt.__add)^0 );
Seq = (m.Cc(m.P"") * (m.V"Prefix" % mt.__mul)^0)
* (#seq_follow + patt_error);
Prefix = "&" * S * m.V"Prefix" / mt.__len
+ "!" * S * m.V"Prefix" / mt.__unm
+ m.V"Suffix";
Suffix = m.V"Primary" * S *
( ( m.P"+" * m.Cc(1, mt.__pow)
+ m.P"*" * m.Cc(0, mt.__pow)
+ m.P"?" * m.Cc(-1, mt.__pow)
+ "^" * ( m.Cg(num * m.Cc(mult))
+ m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow))
)
+ "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div))
+ m.P"{}" * m.Cc(nil, m.Ct)
+ defwithfunc(mt.__div)
)
+ "=>" * S * defwithfunc(mm.Cmt)
+ ">>" * S * defwithfunc(mt.__mod)
+ "~>" * S * defwithfunc(mm.Cf)
) % function (a,b,f) return f(a,b) end * S
)^0;
Primary = "(" * m.V"Exp" * ")"
+ String / mm.P
+ Class
+ defined
+ "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" /
function (n, p) return mm.Cg(p, n) end
+ "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end
+ m.P"{}" / mm.Cp
+ "{~" * m.V"Exp" * "~}" / mm.Cs
+ "{|" * m.V"Exp" * "|}" / mm.Ct
+ "{" * m.V"Exp" * "}" / mm.C
+ m.P"." * m.Cc(any)
+ (name * -arrow + "<" * name * ">") * m.Cb("G") / NT;
Definition = name * arrow * m.V"Exp";
Grammar = m.Cg(m.Cc(true), "G") *
((m.V"Definition" / firstdef) * (m.V"Definition" % adddef)^0) / mm.P
}
local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error)
local function compile (p, defs)
if mm.type(p) == "pattern" then return p end -- already compiled
local cp = pattern:match(p, 1, defs)
if not cp then error("incorrect pattern", 3) end
return cp
end
local function match (s, p, i)
local cp = mem[p]
if not cp then
cp = compile(p)
mem[p] = cp
end
return cp:match(s, i or 1)
end
local function find (s, p, i)
local cp = fmem[p]
if not cp then
cp = compile(p) / 0
cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) }
fmem[p] = cp
end
local i, e = cp:match(s, i or 1)
if i then return i, e - 1
else return i
end
end
local function gsub (s, p, rep)
local g = gmem[p] or {} -- ensure gmem[p] is not collected while here
gmem[p] = g
local cp = g[rep]
if not cp then
cp = compile(p)
cp = mm.Cs((cp / rep + 1)^0)
g[rep] = cp
end
return cp:match(s)
end
-- exported names
local re = {
compile = compile,
match = match,
find = find,
gsub = gsub,
updatelocale = updatelocale,
}
if version == "Lua 5.1" then _G.re = re end
return re

1667
src/lua/lpeg-1.1.0/test.lua Executable file

File diff suppressed because it is too large Load Diff