1*dfc11533SChris Williamson /* 2*dfc11533SChris Williamson ** $Id: lstrlib.c,v 1.178.1.1 2013/04/12 18:48:47 roberto Exp $ 3*dfc11533SChris Williamson ** Standard library for string operations and pattern-matching 4*dfc11533SChris Williamson ** See Copyright Notice in lua.h 5*dfc11533SChris Williamson */ 6*dfc11533SChris Williamson 7*dfc11533SChris Williamson 8*dfc11533SChris Williamson #include <sys/ctype.h> 9*dfc11533SChris Williamson #include <sys/zfs_context.h> 10*dfc11533SChris Williamson 11*dfc11533SChris Williamson #define lstrlib_c 12*dfc11533SChris Williamson #define LUA_LIB 13*dfc11533SChris Williamson 14*dfc11533SChris Williamson #include "lua.h" 15*dfc11533SChris Williamson 16*dfc11533SChris Williamson #include "lauxlib.h" 17*dfc11533SChris Williamson #include "lualib.h" 18*dfc11533SChris Williamson 19*dfc11533SChris Williamson 20*dfc11533SChris Williamson /* 21*dfc11533SChris Williamson ** maximum number of captures that a pattern can do during 22*dfc11533SChris Williamson ** pattern-matching. This limit is arbitrary. 23*dfc11533SChris Williamson */ 24*dfc11533SChris Williamson #if !defined(LUA_MAXCAPTURES) 25*dfc11533SChris Williamson #define LUA_MAXCAPTURES 32 26*dfc11533SChris Williamson #endif 27*dfc11533SChris Williamson 28*dfc11533SChris Williamson 29*dfc11533SChris Williamson /* macro to `unsign' a character */ 30*dfc11533SChris Williamson #define uchar(c) ((unsigned char)(c)) 31*dfc11533SChris Williamson 32*dfc11533SChris Williamson /* 33*dfc11533SChris Williamson * PATCHED: add missing character macros. 34*dfc11533SChris Williamson */ 35*dfc11533SChris Williamson #define tolower(C) (((C) >= 'A' && (C) <= 'Z') ? (C) - 'A' + 'a' : (C)) 36*dfc11533SChris Williamson #define toupper(C) (((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A': (C)) 37*dfc11533SChris Williamson #define iscntrl(C) ((((C) >= 0) && ((C) <= 0x1f)) || ((C) == 0x7f)) 38*dfc11533SChris Williamson #define isgraph(C) ((C) >= 0x21 && (C) <= 0x7E) 39*dfc11533SChris Williamson #define ispunct(C) (((C) >= 0x21 && (C) <= 0x2F) || \ 40*dfc11533SChris Williamson ((C) >= 0x3A && (C) <= 0x40) || \ 41*dfc11533SChris Williamson ((C) >= 0x5B && (C) <= 0x60) || \ 42*dfc11533SChris Williamson ((C) >= 0x7B && (C) <= 0x7E)) 43*dfc11533SChris Williamson 44*dfc11533SChris Williamson /* 45*dfc11533SChris Williamson * The provided version of sprintf returns a char *, but str_format expects 46*dfc11533SChris Williamson * it to return the number of characters printed. This version has the expected 47*dfc11533SChris Williamson * behavior. 48*dfc11533SChris Williamson */ 49*dfc11533SChris Williamson static size_t str_sprintf(char *buf, const char *fmt, ...) { 50*dfc11533SChris Williamson va_list args; 51*dfc11533SChris Williamson size_t len; 52*dfc11533SChris Williamson 53*dfc11533SChris Williamson va_start(args, fmt); 54*dfc11533SChris Williamson len = vsnprintf(buf, INT_MAX, fmt, args); 55*dfc11533SChris Williamson va_end(args); 56*dfc11533SChris Williamson 57*dfc11533SChris Williamson return len; 58*dfc11533SChris Williamson } 59*dfc11533SChris Williamson 60*dfc11533SChris Williamson 61*dfc11533SChris Williamson static int str_len (lua_State *L) { 62*dfc11533SChris Williamson size_t l; 63*dfc11533SChris Williamson luaL_checklstring(L, 1, &l); 64*dfc11533SChris Williamson lua_pushinteger(L, (lua_Integer)l); 65*dfc11533SChris Williamson return 1; 66*dfc11533SChris Williamson } 67*dfc11533SChris Williamson 68*dfc11533SChris Williamson 69*dfc11533SChris Williamson /* translate a relative string position: negative means back from end */ 70*dfc11533SChris Williamson static size_t posrelat (ptrdiff_t pos, size_t len) { 71*dfc11533SChris Williamson if (pos >= 0) return (size_t)pos; 72*dfc11533SChris Williamson else if (0u - (size_t)pos > len) return 0; 73*dfc11533SChris Williamson else return len - ((size_t)-pos) + 1; 74*dfc11533SChris Williamson } 75*dfc11533SChris Williamson 76*dfc11533SChris Williamson 77*dfc11533SChris Williamson static int str_sub (lua_State *L) { 78*dfc11533SChris Williamson size_t l; 79*dfc11533SChris Williamson const char *s = luaL_checklstring(L, 1, &l); 80*dfc11533SChris Williamson size_t start = posrelat(luaL_checkinteger(L, 2), l); 81*dfc11533SChris Williamson size_t end = posrelat(luaL_optinteger(L, 3, -1), l); 82*dfc11533SChris Williamson if (start < 1) start = 1; 83*dfc11533SChris Williamson if (end > l) end = l; 84*dfc11533SChris Williamson if (start <= end) 85*dfc11533SChris Williamson lua_pushlstring(L, s + start - 1, end - start + 1); 86*dfc11533SChris Williamson else lua_pushliteral(L, ""); 87*dfc11533SChris Williamson return 1; 88*dfc11533SChris Williamson } 89*dfc11533SChris Williamson 90*dfc11533SChris Williamson 91*dfc11533SChris Williamson static int str_reverse (lua_State *L) { 92*dfc11533SChris Williamson size_t l, i; 93*dfc11533SChris Williamson luaL_Buffer b; 94*dfc11533SChris Williamson const char *s = luaL_checklstring(L, 1, &l); 95*dfc11533SChris Williamson char *p = luaL_buffinitsize(L, &b, l); 96*dfc11533SChris Williamson for (i = 0; i < l; i++) 97*dfc11533SChris Williamson p[i] = s[l - i - 1]; 98*dfc11533SChris Williamson luaL_pushresultsize(&b, l); 99*dfc11533SChris Williamson return 1; 100*dfc11533SChris Williamson } 101*dfc11533SChris Williamson 102*dfc11533SChris Williamson 103*dfc11533SChris Williamson static int str_lower (lua_State *L) { 104*dfc11533SChris Williamson size_t l; 105*dfc11533SChris Williamson size_t i; 106*dfc11533SChris Williamson luaL_Buffer b; 107*dfc11533SChris Williamson const char *s = luaL_checklstring(L, 1, &l); 108*dfc11533SChris Williamson char *p = luaL_buffinitsize(L, &b, l); 109*dfc11533SChris Williamson for (i=0; i<l; i++) 110*dfc11533SChris Williamson p[i] = tolower(uchar(s[i])); 111*dfc11533SChris Williamson luaL_pushresultsize(&b, l); 112*dfc11533SChris Williamson return 1; 113*dfc11533SChris Williamson } 114*dfc11533SChris Williamson 115*dfc11533SChris Williamson 116*dfc11533SChris Williamson static int str_upper (lua_State *L) { 117*dfc11533SChris Williamson size_t l; 118*dfc11533SChris Williamson size_t i; 119*dfc11533SChris Williamson luaL_Buffer b; 120*dfc11533SChris Williamson const char *s = luaL_checklstring(L, 1, &l); 121*dfc11533SChris Williamson char *p = luaL_buffinitsize(L, &b, l); 122*dfc11533SChris Williamson for (i=0; i<l; i++) 123*dfc11533SChris Williamson p[i] = toupper(uchar(s[i])); 124*dfc11533SChris Williamson luaL_pushresultsize(&b, l); 125*dfc11533SChris Williamson return 1; 126*dfc11533SChris Williamson } 127*dfc11533SChris Williamson 128*dfc11533SChris Williamson 129*dfc11533SChris Williamson /* reasonable limit to avoid arithmetic overflow */ 130*dfc11533SChris Williamson #define MAXSIZE ((~(size_t)0) >> 1) 131*dfc11533SChris Williamson 132*dfc11533SChris Williamson static int str_rep (lua_State *L) { 133*dfc11533SChris Williamson size_t l, lsep; 134*dfc11533SChris Williamson const char *s = luaL_checklstring(L, 1, &l); 135*dfc11533SChris Williamson int n = luaL_checkint(L, 2); 136*dfc11533SChris Williamson const char *sep = luaL_optlstring(L, 3, "", &lsep); 137*dfc11533SChris Williamson if (n <= 0) lua_pushliteral(L, ""); 138*dfc11533SChris Williamson else if (l + lsep < l || l + lsep >= MAXSIZE / n) /* may overflow? */ 139*dfc11533SChris Williamson return luaL_error(L, "resulting string too large"); 140*dfc11533SChris Williamson else { 141*dfc11533SChris Williamson size_t totallen = n * l + (n - 1) * lsep; 142*dfc11533SChris Williamson luaL_Buffer b; 143*dfc11533SChris Williamson char *p = luaL_buffinitsize(L, &b, totallen); 144*dfc11533SChris Williamson while (n-- > 1) { /* first n-1 copies (followed by separator) */ 145*dfc11533SChris Williamson memcpy(p, s, l * sizeof(char)); p += l; 146*dfc11533SChris Williamson if (lsep > 0) { /* avoid empty 'memcpy' (may be expensive) */ 147*dfc11533SChris Williamson memcpy(p, sep, lsep * sizeof(char)); p += lsep; 148*dfc11533SChris Williamson } 149*dfc11533SChris Williamson } 150*dfc11533SChris Williamson memcpy(p, s, l * sizeof(char)); /* last copy (not followed by separator) */ 151*dfc11533SChris Williamson luaL_pushresultsize(&b, totallen); 152*dfc11533SChris Williamson } 153*dfc11533SChris Williamson return 1; 154*dfc11533SChris Williamson } 155*dfc11533SChris Williamson 156*dfc11533SChris Williamson 157*dfc11533SChris Williamson static int str_byte (lua_State *L) { 158*dfc11533SChris Williamson size_t l; 159*dfc11533SChris Williamson const char *s = luaL_checklstring(L, 1, &l); 160*dfc11533SChris Williamson size_t posi = posrelat(luaL_optinteger(L, 2, 1), l); 161*dfc11533SChris Williamson size_t pose = posrelat(luaL_optinteger(L, 3, posi), l); 162*dfc11533SChris Williamson int n, i; 163*dfc11533SChris Williamson if (posi < 1) posi = 1; 164*dfc11533SChris Williamson if (pose > l) pose = l; 165*dfc11533SChris Williamson if (posi > pose) return 0; /* empty interval; return no values */ 166*dfc11533SChris Williamson n = (int)(pose - posi + 1); 167*dfc11533SChris Williamson if (posi + n <= pose) /* (size_t -> int) overflow? */ 168*dfc11533SChris Williamson return luaL_error(L, "string slice too long"); 169*dfc11533SChris Williamson luaL_checkstack(L, n, "string slice too long"); 170*dfc11533SChris Williamson for (i=0; i<n; i++) 171*dfc11533SChris Williamson lua_pushinteger(L, uchar(s[posi+i-1])); 172*dfc11533SChris Williamson return n; 173*dfc11533SChris Williamson } 174*dfc11533SChris Williamson 175*dfc11533SChris Williamson 176*dfc11533SChris Williamson static int str_char (lua_State *L) { 177*dfc11533SChris Williamson int n = lua_gettop(L); /* number of arguments */ 178*dfc11533SChris Williamson int i; 179*dfc11533SChris Williamson luaL_Buffer b; 180*dfc11533SChris Williamson char *p = luaL_buffinitsize(L, &b, n); 181*dfc11533SChris Williamson for (i=1; i<=n; i++) { 182*dfc11533SChris Williamson int c = luaL_checkint(L, i); 183*dfc11533SChris Williamson luaL_argcheck(L, uchar(c) == c, i, "value out of range"); 184*dfc11533SChris Williamson p[i - 1] = uchar(c); 185*dfc11533SChris Williamson } 186*dfc11533SChris Williamson luaL_pushresultsize(&b, n); 187*dfc11533SChris Williamson return 1; 188*dfc11533SChris Williamson } 189*dfc11533SChris Williamson 190*dfc11533SChris Williamson 191*dfc11533SChris Williamson static int writer (lua_State *L, const void* b, size_t size, void* B) { 192*dfc11533SChris Williamson (void)L; 193*dfc11533SChris Williamson luaL_addlstring((luaL_Buffer*) B, (const char *)b, size); 194*dfc11533SChris Williamson return 0; 195*dfc11533SChris Williamson } 196*dfc11533SChris Williamson 197*dfc11533SChris Williamson 198*dfc11533SChris Williamson static int str_dump (lua_State *L) { 199*dfc11533SChris Williamson luaL_Buffer b; 200*dfc11533SChris Williamson luaL_checktype(L, 1, LUA_TFUNCTION); 201*dfc11533SChris Williamson lua_settop(L, 1); 202*dfc11533SChris Williamson luaL_buffinit(L,&b); 203*dfc11533SChris Williamson if (lua_dump(L, writer, &b) != 0) 204*dfc11533SChris Williamson return luaL_error(L, "unable to dump given function"); 205*dfc11533SChris Williamson luaL_pushresult(&b); 206*dfc11533SChris Williamson return 1; 207*dfc11533SChris Williamson } 208*dfc11533SChris Williamson 209*dfc11533SChris Williamson 210*dfc11533SChris Williamson 211*dfc11533SChris Williamson /* 212*dfc11533SChris Williamson ** {====================================================== 213*dfc11533SChris Williamson ** PATTERN MATCHING 214*dfc11533SChris Williamson ** ======================================================= 215*dfc11533SChris Williamson */ 216*dfc11533SChris Williamson 217*dfc11533SChris Williamson 218*dfc11533SChris Williamson #define CAP_UNFINISHED (-1) 219*dfc11533SChris Williamson #define CAP_POSITION (-2) 220*dfc11533SChris Williamson 221*dfc11533SChris Williamson 222*dfc11533SChris Williamson typedef struct MatchState { 223*dfc11533SChris Williamson int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ 224*dfc11533SChris Williamson const char *src_init; /* init of source string */ 225*dfc11533SChris Williamson const char *src_end; /* end ('\0') of source string */ 226*dfc11533SChris Williamson const char *p_end; /* end ('\0') of pattern */ 227*dfc11533SChris Williamson lua_State *L; 228*dfc11533SChris Williamson int level; /* total number of captures (finished or unfinished) */ 229*dfc11533SChris Williamson struct { 230*dfc11533SChris Williamson const char *init; 231*dfc11533SChris Williamson ptrdiff_t len; 232*dfc11533SChris Williamson } capture[LUA_MAXCAPTURES]; 233*dfc11533SChris Williamson } MatchState; 234*dfc11533SChris Williamson 235*dfc11533SChris Williamson 236*dfc11533SChris Williamson /* recursive function */ 237*dfc11533SChris Williamson static const char *match (MatchState *ms, const char *s, const char *p); 238*dfc11533SChris Williamson 239*dfc11533SChris Williamson 240*dfc11533SChris Williamson /* maximum recursion depth for 'match' */ 241*dfc11533SChris Williamson #if !defined(MAXCCALLS) 242*dfc11533SChris Williamson #define MAXCCALLS 200 243*dfc11533SChris Williamson #endif 244*dfc11533SChris Williamson 245*dfc11533SChris Williamson 246*dfc11533SChris Williamson #define L_ESC '%' 247*dfc11533SChris Williamson #define SPECIALS "^$*+?.([%-" 248*dfc11533SChris Williamson 249*dfc11533SChris Williamson 250*dfc11533SChris Williamson static int check_capture (MatchState *ms, int l) { 251*dfc11533SChris Williamson l -= '1'; 252*dfc11533SChris Williamson if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) 253*dfc11533SChris Williamson return luaL_error(ms->L, "invalid capture index %%%d", l + 1); 254*dfc11533SChris Williamson return l; 255*dfc11533SChris Williamson } 256*dfc11533SChris Williamson 257*dfc11533SChris Williamson 258*dfc11533SChris Williamson static int capture_to_close (MatchState *ms) { 259*dfc11533SChris Williamson int level = ms->level; 260*dfc11533SChris Williamson for (level--; level>=0; level--) 261*dfc11533SChris Williamson if (ms->capture[level].len == CAP_UNFINISHED) return level; 262*dfc11533SChris Williamson return luaL_error(ms->L, "invalid pattern capture"); 263*dfc11533SChris Williamson } 264*dfc11533SChris Williamson 265*dfc11533SChris Williamson 266*dfc11533SChris Williamson static const char *classend (MatchState *ms, const char *p) { 267*dfc11533SChris Williamson switch (*p++) { 268*dfc11533SChris Williamson case L_ESC: { 269*dfc11533SChris Williamson if (p == ms->p_end) 270*dfc11533SChris Williamson luaL_error(ms->L, "malformed pattern (ends with " LUA_QL("%%") ")"); 271*dfc11533SChris Williamson return p+1; 272*dfc11533SChris Williamson } 273*dfc11533SChris Williamson case '[': { 274*dfc11533SChris Williamson if (*p == '^') p++; 275*dfc11533SChris Williamson do { /* look for a `]' */ 276*dfc11533SChris Williamson if (p == ms->p_end) 277*dfc11533SChris Williamson luaL_error(ms->L, "malformed pattern (missing " LUA_QL("]") ")"); 278*dfc11533SChris Williamson if (*(p++) == L_ESC && p < ms->p_end) 279*dfc11533SChris Williamson p++; /* skip escapes (e.g. `%]') */ 280*dfc11533SChris Williamson } while (*p != ']'); 281*dfc11533SChris Williamson return p+1; 282*dfc11533SChris Williamson } 283*dfc11533SChris Williamson default: { 284*dfc11533SChris Williamson return p; 285*dfc11533SChris Williamson } 286*dfc11533SChris Williamson } 287*dfc11533SChris Williamson } 288*dfc11533SChris Williamson 289*dfc11533SChris Williamson 290*dfc11533SChris Williamson static int match_class (int c, int cl) { 291*dfc11533SChris Williamson int res; 292*dfc11533SChris Williamson switch (tolower(cl)) { 293*dfc11533SChris Williamson case 'a' : res = isalpha(c); break; 294*dfc11533SChris Williamson case 'c' : res = iscntrl(c); break; 295*dfc11533SChris Williamson case 'd' : res = isdigit(c); break; 296*dfc11533SChris Williamson case 'g' : res = isgraph(c); break; 297*dfc11533SChris Williamson case 'l' : res = islower(c); break; 298*dfc11533SChris Williamson case 'p' : res = ispunct(c); break; 299*dfc11533SChris Williamson case 's' : res = isspace(c); break; 300*dfc11533SChris Williamson case 'u' : res = isupper(c); break; 301*dfc11533SChris Williamson case 'w' : res = isalnum(c); break; 302*dfc11533SChris Williamson case 'x' : res = isxdigit(c); break; 303*dfc11533SChris Williamson case 'z' : res = (c == 0); break; /* deprecated option */ 304*dfc11533SChris Williamson default: return (cl == c); 305*dfc11533SChris Williamson } 306*dfc11533SChris Williamson return (islower(cl) ? res : !res); 307*dfc11533SChris Williamson } 308*dfc11533SChris Williamson 309*dfc11533SChris Williamson 310*dfc11533SChris Williamson static int matchbracketclass (int c, const char *p, const char *ec) { 311*dfc11533SChris Williamson int sig = 1; 312*dfc11533SChris Williamson if (*(p+1) == '^') { 313*dfc11533SChris Williamson sig = 0; 314*dfc11533SChris Williamson p++; /* skip the `^' */ 315*dfc11533SChris Williamson } 316*dfc11533SChris Williamson while (++p < ec) { 317*dfc11533SChris Williamson if (*p == L_ESC) { 318*dfc11533SChris Williamson p++; 319*dfc11533SChris Williamson if (match_class(c, uchar(*p))) 320*dfc11533SChris Williamson return sig; 321*dfc11533SChris Williamson } 322*dfc11533SChris Williamson else if ((*(p+1) == '-') && (p+2 < ec)) { 323*dfc11533SChris Williamson p+=2; 324*dfc11533SChris Williamson if (uchar(*(p-2)) <= c && c <= uchar(*p)) 325*dfc11533SChris Williamson return sig; 326*dfc11533SChris Williamson } 327*dfc11533SChris Williamson else if (uchar(*p) == c) return sig; 328*dfc11533SChris Williamson } 329*dfc11533SChris Williamson return !sig; 330*dfc11533SChris Williamson } 331*dfc11533SChris Williamson 332*dfc11533SChris Williamson 333*dfc11533SChris Williamson static int singlematch (MatchState *ms, const char *s, const char *p, 334*dfc11533SChris Williamson const char *ep) { 335*dfc11533SChris Williamson if (s >= ms->src_end) 336*dfc11533SChris Williamson return 0; 337*dfc11533SChris Williamson else { 338*dfc11533SChris Williamson int c = uchar(*s); 339*dfc11533SChris Williamson switch (*p) { 340*dfc11533SChris Williamson case '.': return 1; /* matches any char */ 341*dfc11533SChris Williamson case L_ESC: return match_class(c, uchar(*(p+1))); 342*dfc11533SChris Williamson case '[': return matchbracketclass(c, p, ep-1); 343*dfc11533SChris Williamson default: return (uchar(*p) == c); 344*dfc11533SChris Williamson } 345*dfc11533SChris Williamson } 346*dfc11533SChris Williamson } 347*dfc11533SChris Williamson 348*dfc11533SChris Williamson 349*dfc11533SChris Williamson static const char *matchbalance (MatchState *ms, const char *s, 350*dfc11533SChris Williamson const char *p) { 351*dfc11533SChris Williamson if (p >= ms->p_end - 1) 352*dfc11533SChris Williamson luaL_error(ms->L, "malformed pattern " 353*dfc11533SChris Williamson "(missing arguments to " LUA_QL("%%b") ")"); 354*dfc11533SChris Williamson if (*s != *p) return NULL; 355*dfc11533SChris Williamson else { 356*dfc11533SChris Williamson int b = *p; 357*dfc11533SChris Williamson int e = *(p+1); 358*dfc11533SChris Williamson int cont = 1; 359*dfc11533SChris Williamson while (++s < ms->src_end) { 360*dfc11533SChris Williamson if (*s == e) { 361*dfc11533SChris Williamson if (--cont == 0) return s+1; 362*dfc11533SChris Williamson } 363*dfc11533SChris Williamson else if (*s == b) cont++; 364*dfc11533SChris Williamson } 365*dfc11533SChris Williamson } 366*dfc11533SChris Williamson return NULL; /* string ends out of balance */ 367*dfc11533SChris Williamson } 368*dfc11533SChris Williamson 369*dfc11533SChris Williamson 370*dfc11533SChris Williamson static const char *max_expand (MatchState *ms, const char *s, 371*dfc11533SChris Williamson const char *p, const char *ep) { 372*dfc11533SChris Williamson ptrdiff_t i = 0; /* counts maximum expand for item */ 373*dfc11533SChris Williamson while (singlematch(ms, s + i, p, ep)) 374*dfc11533SChris Williamson i++; 375*dfc11533SChris Williamson /* keeps trying to match with the maximum repetitions */ 376*dfc11533SChris Williamson while (i>=0) { 377*dfc11533SChris Williamson const char *res = match(ms, (s+i), ep+1); 378*dfc11533SChris Williamson if (res) return res; 379*dfc11533SChris Williamson i--; /* else didn't match; reduce 1 repetition to try again */ 380*dfc11533SChris Williamson } 381*dfc11533SChris Williamson return NULL; 382*dfc11533SChris Williamson } 383*dfc11533SChris Williamson 384*dfc11533SChris Williamson 385*dfc11533SChris Williamson static const char *min_expand (MatchState *ms, const char *s, 386*dfc11533SChris Williamson const char *p, const char *ep) { 387*dfc11533SChris Williamson for (;;) { 388*dfc11533SChris Williamson const char *res = match(ms, s, ep+1); 389*dfc11533SChris Williamson if (res != NULL) 390*dfc11533SChris Williamson return res; 391*dfc11533SChris Williamson else if (singlematch(ms, s, p, ep)) 392*dfc11533SChris Williamson s++; /* try with one more repetition */ 393*dfc11533SChris Williamson else return NULL; 394*dfc11533SChris Williamson } 395*dfc11533SChris Williamson } 396*dfc11533SChris Williamson 397*dfc11533SChris Williamson 398*dfc11533SChris Williamson static const char *start_capture (MatchState *ms, const char *s, 399*dfc11533SChris Williamson const char *p, int what) { 400*dfc11533SChris Williamson const char *res; 401*dfc11533SChris Williamson int level = ms->level; 402*dfc11533SChris Williamson if (level >= LUA_MAXCAPTURES) luaL_error(ms->L, "too many captures"); 403*dfc11533SChris Williamson ms->capture[level].init = s; 404*dfc11533SChris Williamson ms->capture[level].len = what; 405*dfc11533SChris Williamson ms->level = level+1; 406*dfc11533SChris Williamson if ((res=match(ms, s, p)) == NULL) /* match failed? */ 407*dfc11533SChris Williamson ms->level--; /* undo capture */ 408*dfc11533SChris Williamson return res; 409*dfc11533SChris Williamson } 410*dfc11533SChris Williamson 411*dfc11533SChris Williamson 412*dfc11533SChris Williamson static const char *end_capture (MatchState *ms, const char *s, 413*dfc11533SChris Williamson const char *p) { 414*dfc11533SChris Williamson int l = capture_to_close(ms); 415*dfc11533SChris Williamson const char *res; 416*dfc11533SChris Williamson ms->capture[l].len = s - ms->capture[l].init; /* close capture */ 417*dfc11533SChris Williamson if ((res = match(ms, s, p)) == NULL) /* match failed? */ 418*dfc11533SChris Williamson ms->capture[l].len = CAP_UNFINISHED; /* undo capture */ 419*dfc11533SChris Williamson return res; 420*dfc11533SChris Williamson } 421*dfc11533SChris Williamson 422*dfc11533SChris Williamson 423*dfc11533SChris Williamson static const char *match_capture (MatchState *ms, const char *s, int l) { 424*dfc11533SChris Williamson size_t len; 425*dfc11533SChris Williamson l = check_capture(ms, l); 426*dfc11533SChris Williamson len = ms->capture[l].len; 427*dfc11533SChris Williamson if ((size_t)(ms->src_end-s) >= len && 428*dfc11533SChris Williamson memcmp(ms->capture[l].init, s, len) == 0) 429*dfc11533SChris Williamson return s+len; 430*dfc11533SChris Williamson else return NULL; 431*dfc11533SChris Williamson } 432*dfc11533SChris Williamson 433*dfc11533SChris Williamson 434*dfc11533SChris Williamson static const char *match (MatchState *ms, const char *s, const char *p) { 435*dfc11533SChris Williamson if (ms->matchdepth-- == 0) 436*dfc11533SChris Williamson luaL_error(ms->L, "pattern too complex"); 437*dfc11533SChris Williamson init: /* using goto's to optimize tail recursion */ 438*dfc11533SChris Williamson if (p != ms->p_end) { /* end of pattern? */ 439*dfc11533SChris Williamson switch (*p) { 440*dfc11533SChris Williamson case '(': { /* start capture */ 441*dfc11533SChris Williamson if (*(p + 1) == ')') /* position capture? */ 442*dfc11533SChris Williamson s = start_capture(ms, s, p + 2, CAP_POSITION); 443*dfc11533SChris Williamson else 444*dfc11533SChris Williamson s = start_capture(ms, s, p + 1, CAP_UNFINISHED); 445*dfc11533SChris Williamson break; 446*dfc11533SChris Williamson } 447*dfc11533SChris Williamson case ')': { /* end capture */ 448*dfc11533SChris Williamson s = end_capture(ms, s, p + 1); 449*dfc11533SChris Williamson break; 450*dfc11533SChris Williamson } 451*dfc11533SChris Williamson case '$': { 452*dfc11533SChris Williamson if ((p + 1) != ms->p_end) /* is the `$' the last char in pattern? */ 453*dfc11533SChris Williamson goto dflt; /* no; go to default */ 454*dfc11533SChris Williamson s = (s == ms->src_end) ? s : NULL; /* check end of string */ 455*dfc11533SChris Williamson break; 456*dfc11533SChris Williamson } 457*dfc11533SChris Williamson case L_ESC: { /* escaped sequences not in the format class[*+?-]? */ 458*dfc11533SChris Williamson switch (*(p + 1)) { 459*dfc11533SChris Williamson case 'b': { /* balanced string? */ 460*dfc11533SChris Williamson s = matchbalance(ms, s, p + 2); 461*dfc11533SChris Williamson if (s != NULL) { 462*dfc11533SChris Williamson p += 4; goto init; /* return match(ms, s, p + 4); */ 463*dfc11533SChris Williamson } /* else fail (s == NULL) */ 464*dfc11533SChris Williamson break; 465*dfc11533SChris Williamson } 466*dfc11533SChris Williamson case 'f': { /* frontier? */ 467*dfc11533SChris Williamson const char *ep; char previous; 468*dfc11533SChris Williamson p += 2; 469*dfc11533SChris Williamson if (*p != '[') 470*dfc11533SChris Williamson luaL_error(ms->L, "missing " LUA_QL("[") " after " 471*dfc11533SChris Williamson LUA_QL("%%f") " in pattern"); 472*dfc11533SChris Williamson ep = classend(ms, p); /* points to what is next */ 473*dfc11533SChris Williamson previous = (s == ms->src_init) ? '\0' : *(s - 1); 474*dfc11533SChris Williamson if (!matchbracketclass(uchar(previous), p, ep - 1) && 475*dfc11533SChris Williamson matchbracketclass(uchar(*s), p, ep - 1)) { 476*dfc11533SChris Williamson p = ep; goto init; /* return match(ms, s, ep); */ 477*dfc11533SChris Williamson } 478*dfc11533SChris Williamson s = NULL; /* match failed */ 479*dfc11533SChris Williamson break; 480*dfc11533SChris Williamson } 481*dfc11533SChris Williamson case '0': case '1': case '2': case '3': 482*dfc11533SChris Williamson case '4': case '5': case '6': case '7': 483*dfc11533SChris Williamson case '8': case '9': { /* capture results (%0-%9)? */ 484*dfc11533SChris Williamson s = match_capture(ms, s, uchar(*(p + 1))); 485*dfc11533SChris Williamson if (s != NULL) { 486*dfc11533SChris Williamson p += 2; goto init; /* return match(ms, s, p + 2) */ 487*dfc11533SChris Williamson } 488*dfc11533SChris Williamson break; 489*dfc11533SChris Williamson } 490*dfc11533SChris Williamson default: goto dflt; 491*dfc11533SChris Williamson } 492*dfc11533SChris Williamson break; 493*dfc11533SChris Williamson } 494*dfc11533SChris Williamson default: dflt: { /* pattern class plus optional suffix */ 495*dfc11533SChris Williamson const char *ep = classend(ms, p); /* points to optional suffix */ 496*dfc11533SChris Williamson /* does not match at least once? */ 497*dfc11533SChris Williamson if (!singlematch(ms, s, p, ep)) { 498*dfc11533SChris Williamson if (*ep == '*' || *ep == '?' || *ep == '-') { /* accept empty? */ 499*dfc11533SChris Williamson p = ep + 1; goto init; /* return match(ms, s, ep + 1); */ 500*dfc11533SChris Williamson } 501*dfc11533SChris Williamson else /* '+' or no suffix */ 502*dfc11533SChris Williamson s = NULL; /* fail */ 503*dfc11533SChris Williamson } 504*dfc11533SChris Williamson else { /* matched once */ 505*dfc11533SChris Williamson switch (*ep) { /* handle optional suffix */ 506*dfc11533SChris Williamson case '?': { /* optional */ 507*dfc11533SChris Williamson const char *res; 508*dfc11533SChris Williamson if ((res = match(ms, s + 1, ep + 1)) != NULL) 509*dfc11533SChris Williamson s = res; 510*dfc11533SChris Williamson else { 511*dfc11533SChris Williamson p = ep + 1; goto init; /* else return match(ms, s, ep + 1); */ 512*dfc11533SChris Williamson } 513*dfc11533SChris Williamson break; 514*dfc11533SChris Williamson } 515*dfc11533SChris Williamson case '+': /* 1 or more repetitions */ 516*dfc11533SChris Williamson s++; /* 1 match already done */ 517*dfc11533SChris Williamson /* go through */ 518*dfc11533SChris Williamson case '*': /* 0 or more repetitions */ 519*dfc11533SChris Williamson s = max_expand(ms, s, p, ep); 520*dfc11533SChris Williamson break; 521*dfc11533SChris Williamson case '-': /* 0 or more repetitions (minimum) */ 522*dfc11533SChris Williamson s = min_expand(ms, s, p, ep); 523*dfc11533SChris Williamson break; 524*dfc11533SChris Williamson default: /* no suffix */ 525*dfc11533SChris Williamson s++; p = ep; goto init; /* return match(ms, s + 1, ep); */ 526*dfc11533SChris Williamson } 527*dfc11533SChris Williamson } 528*dfc11533SChris Williamson break; 529*dfc11533SChris Williamson } 530*dfc11533SChris Williamson } 531*dfc11533SChris Williamson } 532*dfc11533SChris Williamson ms->matchdepth++; 533*dfc11533SChris Williamson return s; 534*dfc11533SChris Williamson } 535*dfc11533SChris Williamson 536*dfc11533SChris Williamson 537*dfc11533SChris Williamson 538*dfc11533SChris Williamson static const char *lmemfind (const char *s1, size_t l1, 539*dfc11533SChris Williamson const char *s2, size_t l2) { 540*dfc11533SChris Williamson if (l2 == 0) return s1; /* empty strings are everywhere */ 541*dfc11533SChris Williamson else if (l2 > l1) return NULL; /* avoids a negative `l1' */ 542*dfc11533SChris Williamson else { 543*dfc11533SChris Williamson const char *init; /* to search for a `*s2' inside `s1' */ 544*dfc11533SChris Williamson l2--; /* 1st char will be checked by `memchr' */ 545*dfc11533SChris Williamson l1 = l1-l2; /* `s2' cannot be found after that */ 546*dfc11533SChris Williamson while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { 547*dfc11533SChris Williamson init++; /* 1st char is already checked */ 548*dfc11533SChris Williamson if (memcmp(init, s2+1, l2) == 0) 549*dfc11533SChris Williamson return init-1; 550*dfc11533SChris Williamson else { /* correct `l1' and `s1' to try again */ 551*dfc11533SChris Williamson l1 -= init-s1; 552*dfc11533SChris Williamson s1 = init; 553*dfc11533SChris Williamson } 554*dfc11533SChris Williamson } 555*dfc11533SChris Williamson return NULL; /* not found */ 556*dfc11533SChris Williamson } 557*dfc11533SChris Williamson } 558*dfc11533SChris Williamson 559*dfc11533SChris Williamson 560*dfc11533SChris Williamson static void push_onecapture (MatchState *ms, int i, const char *s, 561*dfc11533SChris Williamson const char *e) { 562*dfc11533SChris Williamson if (i >= ms->level) { 563*dfc11533SChris Williamson if (i == 0) /* ms->level == 0, too */ 564*dfc11533SChris Williamson lua_pushlstring(ms->L, s, e - s); /* add whole match */ 565*dfc11533SChris Williamson else 566*dfc11533SChris Williamson luaL_error(ms->L, "invalid capture index"); 567*dfc11533SChris Williamson } 568*dfc11533SChris Williamson else { 569*dfc11533SChris Williamson ptrdiff_t l = ms->capture[i].len; 570*dfc11533SChris Williamson if (l == CAP_UNFINISHED) luaL_error(ms->L, "unfinished capture"); 571*dfc11533SChris Williamson if (l == CAP_POSITION) 572*dfc11533SChris Williamson lua_pushinteger(ms->L, ms->capture[i].init - ms->src_init + 1); 573*dfc11533SChris Williamson else 574*dfc11533SChris Williamson lua_pushlstring(ms->L, ms->capture[i].init, l); 575*dfc11533SChris Williamson } 576*dfc11533SChris Williamson } 577*dfc11533SChris Williamson 578*dfc11533SChris Williamson 579*dfc11533SChris Williamson static int push_captures (MatchState *ms, const char *s, const char *e) { 580*dfc11533SChris Williamson int i; 581*dfc11533SChris Williamson int nlevels = (ms->level == 0 && s) ? 1 : ms->level; 582*dfc11533SChris Williamson luaL_checkstack(ms->L, nlevels, "too many captures"); 583*dfc11533SChris Williamson for (i = 0; i < nlevels; i++) 584*dfc11533SChris Williamson push_onecapture(ms, i, s, e); 585*dfc11533SChris Williamson return nlevels; /* number of strings pushed */ 586*dfc11533SChris Williamson } 587*dfc11533SChris Williamson 588*dfc11533SChris Williamson 589*dfc11533SChris Williamson /* check whether pattern has no special characters */ 590*dfc11533SChris Williamson static int nospecials (const char *p, size_t l) { 591*dfc11533SChris Williamson size_t upto = 0; 592*dfc11533SChris Williamson do { 593*dfc11533SChris Williamson if (strpbrk(p + upto, SPECIALS)) 594*dfc11533SChris Williamson return 0; /* pattern has a special character */ 595*dfc11533SChris Williamson upto += strlen(p + upto) + 1; /* may have more after \0 */ 596*dfc11533SChris Williamson } while (upto <= l); 597*dfc11533SChris Williamson return 1; /* no special chars found */ 598*dfc11533SChris Williamson } 599*dfc11533SChris Williamson 600*dfc11533SChris Williamson 601*dfc11533SChris Williamson static int str_find_aux (lua_State *L, int find) { 602*dfc11533SChris Williamson size_t ls, lp; 603*dfc11533SChris Williamson const char *s = luaL_checklstring(L, 1, &ls); 604*dfc11533SChris Williamson const char *p = luaL_checklstring(L, 2, &lp); 605*dfc11533SChris Williamson size_t init = posrelat(luaL_optinteger(L, 3, 1), ls); 606*dfc11533SChris Williamson if (init < 1) init = 1; 607*dfc11533SChris Williamson else if (init > ls + 1) { /* start after string's end? */ 608*dfc11533SChris Williamson lua_pushnil(L); /* cannot find anything */ 609*dfc11533SChris Williamson return 1; 610*dfc11533SChris Williamson } 611*dfc11533SChris Williamson /* explicit request or no special characters? */ 612*dfc11533SChris Williamson if (find && (lua_toboolean(L, 4) || nospecials(p, lp))) { 613*dfc11533SChris Williamson /* do a plain search */ 614*dfc11533SChris Williamson const char *s2 = lmemfind(s + init - 1, ls - init + 1, p, lp); 615*dfc11533SChris Williamson if (s2) { 616*dfc11533SChris Williamson lua_pushinteger(L, s2 - s + 1); 617*dfc11533SChris Williamson lua_pushinteger(L, s2 - s + lp); 618*dfc11533SChris Williamson return 2; 619*dfc11533SChris Williamson } 620*dfc11533SChris Williamson } 621*dfc11533SChris Williamson else { 622*dfc11533SChris Williamson MatchState ms; 623*dfc11533SChris Williamson const char *s1 = s + init - 1; 624*dfc11533SChris Williamson int anchor = (*p == '^'); 625*dfc11533SChris Williamson if (anchor) { 626*dfc11533SChris Williamson p++; lp--; /* skip anchor character */ 627*dfc11533SChris Williamson } 628*dfc11533SChris Williamson ms.L = L; 629*dfc11533SChris Williamson ms.matchdepth = MAXCCALLS; 630*dfc11533SChris Williamson ms.src_init = s; 631*dfc11533SChris Williamson ms.src_end = s + ls; 632*dfc11533SChris Williamson ms.p_end = p + lp; 633*dfc11533SChris Williamson do { 634*dfc11533SChris Williamson const char *res; 635*dfc11533SChris Williamson ms.level = 0; 636*dfc11533SChris Williamson lua_assert(ms.matchdepth == MAXCCALLS); 637*dfc11533SChris Williamson if ((res=match(&ms, s1, p)) != NULL) { 638*dfc11533SChris Williamson if (find) { 639*dfc11533SChris Williamson lua_pushinteger(L, s1 - s + 1); /* start */ 640*dfc11533SChris Williamson lua_pushinteger(L, res - s); /* end */ 641*dfc11533SChris Williamson return push_captures(&ms, NULL, 0) + 2; 642*dfc11533SChris Williamson } 643*dfc11533SChris Williamson else 644*dfc11533SChris Williamson return push_captures(&ms, s1, res); 645*dfc11533SChris Williamson } 646*dfc11533SChris Williamson } while (s1++ < ms.src_end && !anchor); 647*dfc11533SChris Williamson } 648*dfc11533SChris Williamson lua_pushnil(L); /* not found */ 649*dfc11533SChris Williamson return 1; 650*dfc11533SChris Williamson } 651*dfc11533SChris Williamson 652*dfc11533SChris Williamson 653*dfc11533SChris Williamson static int str_find (lua_State *L) { 654*dfc11533SChris Williamson return str_find_aux(L, 1); 655*dfc11533SChris Williamson } 656*dfc11533SChris Williamson 657*dfc11533SChris Williamson 658*dfc11533SChris Williamson static int str_match (lua_State *L) { 659*dfc11533SChris Williamson return str_find_aux(L, 0); 660*dfc11533SChris Williamson } 661*dfc11533SChris Williamson 662*dfc11533SChris Williamson 663*dfc11533SChris Williamson static int gmatch_aux (lua_State *L) { 664*dfc11533SChris Williamson MatchState ms; 665*dfc11533SChris Williamson size_t ls, lp; 666*dfc11533SChris Williamson const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls); 667*dfc11533SChris Williamson const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp); 668*dfc11533SChris Williamson const char *src; 669*dfc11533SChris Williamson ms.L = L; 670*dfc11533SChris Williamson ms.matchdepth = MAXCCALLS; 671*dfc11533SChris Williamson ms.src_init = s; 672*dfc11533SChris Williamson ms.src_end = s+ls; 673*dfc11533SChris Williamson ms.p_end = p + lp; 674*dfc11533SChris Williamson for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3)); 675*dfc11533SChris Williamson src <= ms.src_end; 676*dfc11533SChris Williamson src++) { 677*dfc11533SChris Williamson const char *e; 678*dfc11533SChris Williamson ms.level = 0; 679*dfc11533SChris Williamson lua_assert(ms.matchdepth == MAXCCALLS); 680*dfc11533SChris Williamson if ((e = match(&ms, src, p)) != NULL) { 681*dfc11533SChris Williamson lua_Integer newstart = e-s; 682*dfc11533SChris Williamson if (e == src) newstart++; /* empty match? go at least one position */ 683*dfc11533SChris Williamson lua_pushinteger(L, newstart); 684*dfc11533SChris Williamson lua_replace(L, lua_upvalueindex(3)); 685*dfc11533SChris Williamson return push_captures(&ms, src, e); 686*dfc11533SChris Williamson } 687*dfc11533SChris Williamson } 688*dfc11533SChris Williamson return 0; /* not found */ 689*dfc11533SChris Williamson } 690*dfc11533SChris Williamson 691*dfc11533SChris Williamson 692*dfc11533SChris Williamson static int str_gmatch (lua_State *L) { 693*dfc11533SChris Williamson luaL_checkstring(L, 1); 694*dfc11533SChris Williamson luaL_checkstring(L, 2); 695*dfc11533SChris Williamson lua_settop(L, 2); 696*dfc11533SChris Williamson lua_pushinteger(L, 0); 697*dfc11533SChris Williamson lua_pushcclosure(L, gmatch_aux, 3); 698*dfc11533SChris Williamson return 1; 699*dfc11533SChris Williamson } 700*dfc11533SChris Williamson 701*dfc11533SChris Williamson 702*dfc11533SChris Williamson static void add_s (MatchState *ms, luaL_Buffer *b, const char *s, 703*dfc11533SChris Williamson const char *e) { 704*dfc11533SChris Williamson size_t l, i; 705*dfc11533SChris Williamson const char *news = lua_tolstring(ms->L, 3, &l); 706*dfc11533SChris Williamson for (i = 0; i < l; i++) { 707*dfc11533SChris Williamson if (news[i] != L_ESC) 708*dfc11533SChris Williamson luaL_addchar(b, news[i]); 709*dfc11533SChris Williamson else { 710*dfc11533SChris Williamson i++; /* skip ESC */ 711*dfc11533SChris Williamson if (!isdigit(uchar(news[i]))) { 712*dfc11533SChris Williamson if (news[i] != L_ESC) 713*dfc11533SChris Williamson luaL_error(ms->L, "invalid use of " LUA_QL("%c") 714*dfc11533SChris Williamson " in replacement string", L_ESC); 715*dfc11533SChris Williamson luaL_addchar(b, news[i]); 716*dfc11533SChris Williamson } 717*dfc11533SChris Williamson else if (news[i] == '0') 718*dfc11533SChris Williamson luaL_addlstring(b, s, e - s); 719*dfc11533SChris Williamson else { 720*dfc11533SChris Williamson push_onecapture(ms, news[i] - '1', s, e); 721*dfc11533SChris Williamson luaL_addvalue(b); /* add capture to accumulated result */ 722*dfc11533SChris Williamson } 723*dfc11533SChris Williamson } 724*dfc11533SChris Williamson } 725*dfc11533SChris Williamson } 726*dfc11533SChris Williamson 727*dfc11533SChris Williamson 728*dfc11533SChris Williamson static void add_value (MatchState *ms, luaL_Buffer *b, const char *s, 729*dfc11533SChris Williamson const char *e, int tr) { 730*dfc11533SChris Williamson lua_State *L = ms->L; 731*dfc11533SChris Williamson switch (tr) { 732*dfc11533SChris Williamson case LUA_TFUNCTION: { 733*dfc11533SChris Williamson int n; 734*dfc11533SChris Williamson lua_pushvalue(L, 3); 735*dfc11533SChris Williamson n = push_captures(ms, s, e); 736*dfc11533SChris Williamson lua_call(L, n, 1); 737*dfc11533SChris Williamson break; 738*dfc11533SChris Williamson } 739*dfc11533SChris Williamson case LUA_TTABLE: { 740*dfc11533SChris Williamson push_onecapture(ms, 0, s, e); 741*dfc11533SChris Williamson lua_gettable(L, 3); 742*dfc11533SChris Williamson break; 743*dfc11533SChris Williamson } 744*dfc11533SChris Williamson default: { /* LUA_TNUMBER or LUA_TSTRING */ 745*dfc11533SChris Williamson add_s(ms, b, s, e); 746*dfc11533SChris Williamson return; 747*dfc11533SChris Williamson } 748*dfc11533SChris Williamson } 749*dfc11533SChris Williamson if (!lua_toboolean(L, -1)) { /* nil or false? */ 750*dfc11533SChris Williamson lua_pop(L, 1); 751*dfc11533SChris Williamson lua_pushlstring(L, s, e - s); /* keep original text */ 752*dfc11533SChris Williamson } 753*dfc11533SChris Williamson else if (!lua_isstring(L, -1)) 754*dfc11533SChris Williamson luaL_error(L, "invalid replacement value (a %s)", luaL_typename(L, -1)); 755*dfc11533SChris Williamson luaL_addvalue(b); /* add result to accumulator */ 756*dfc11533SChris Williamson } 757*dfc11533SChris Williamson 758*dfc11533SChris Williamson 759*dfc11533SChris Williamson static int str_gsub (lua_State *L) { 760*dfc11533SChris Williamson size_t srcl, lp; 761*dfc11533SChris Williamson const char *src = luaL_checklstring(L, 1, &srcl); 762*dfc11533SChris Williamson const char *p = luaL_checklstring(L, 2, &lp); 763*dfc11533SChris Williamson int tr = lua_type(L, 3); 764*dfc11533SChris Williamson size_t max_s = luaL_optinteger(L, 4, srcl+1); 765*dfc11533SChris Williamson int anchor = (*p == '^'); 766*dfc11533SChris Williamson size_t n = 0; 767*dfc11533SChris Williamson MatchState ms; 768*dfc11533SChris Williamson luaL_Buffer b; 769*dfc11533SChris Williamson luaL_argcheck(L, tr == LUA_TNUMBER || tr == LUA_TSTRING || 770*dfc11533SChris Williamson tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3, 771*dfc11533SChris Williamson "string/function/table expected"); 772*dfc11533SChris Williamson luaL_buffinit(L, &b); 773*dfc11533SChris Williamson if (anchor) { 774*dfc11533SChris Williamson p++; lp--; /* skip anchor character */ 775*dfc11533SChris Williamson } 776*dfc11533SChris Williamson ms.L = L; 777*dfc11533SChris Williamson ms.matchdepth = MAXCCALLS; 778*dfc11533SChris Williamson ms.src_init = src; 779*dfc11533SChris Williamson ms.src_end = src+srcl; 780*dfc11533SChris Williamson ms.p_end = p + lp; 781*dfc11533SChris Williamson while (n < max_s) { 782*dfc11533SChris Williamson const char *e; 783*dfc11533SChris Williamson ms.level = 0; 784*dfc11533SChris Williamson lua_assert(ms.matchdepth == MAXCCALLS); 785*dfc11533SChris Williamson e = match(&ms, src, p); 786*dfc11533SChris Williamson if (e) { 787*dfc11533SChris Williamson n++; 788*dfc11533SChris Williamson add_value(&ms, &b, src, e, tr); 789*dfc11533SChris Williamson } 790*dfc11533SChris Williamson if (e && e>src) /* non empty match? */ 791*dfc11533SChris Williamson src = e; /* skip it */ 792*dfc11533SChris Williamson else if (src < ms.src_end) 793*dfc11533SChris Williamson luaL_addchar(&b, *src++); 794*dfc11533SChris Williamson else break; 795*dfc11533SChris Williamson if (anchor) break; 796*dfc11533SChris Williamson } 797*dfc11533SChris Williamson luaL_addlstring(&b, src, ms.src_end-src); 798*dfc11533SChris Williamson luaL_pushresult(&b); 799*dfc11533SChris Williamson lua_pushinteger(L, n); /* number of substitutions */ 800*dfc11533SChris Williamson return 2; 801*dfc11533SChris Williamson } 802*dfc11533SChris Williamson 803*dfc11533SChris Williamson /* }====================================================== */ 804*dfc11533SChris Williamson 805*dfc11533SChris Williamson 806*dfc11533SChris Williamson 807*dfc11533SChris Williamson /* 808*dfc11533SChris Williamson ** {====================================================== 809*dfc11533SChris Williamson ** STRING FORMAT 810*dfc11533SChris Williamson ** ======================================================= 811*dfc11533SChris Williamson */ 812*dfc11533SChris Williamson 813*dfc11533SChris Williamson /* 814*dfc11533SChris Williamson ** LUA_INTFRMLEN is the length modifier for integer conversions in 815*dfc11533SChris Williamson ** 'string.format'; LUA_INTFRM_T is the integer type corresponding to 816*dfc11533SChris Williamson ** the previous length 817*dfc11533SChris Williamson */ 818*dfc11533SChris Williamson #if !defined(LUA_INTFRMLEN) /* { */ 819*dfc11533SChris Williamson #if defined(LUA_USE_LONGLONG) 820*dfc11533SChris Williamson 821*dfc11533SChris Williamson #define LUA_INTFRMLEN "ll" 822*dfc11533SChris Williamson #define LUA_INTFRM_T long long 823*dfc11533SChris Williamson 824*dfc11533SChris Williamson #else 825*dfc11533SChris Williamson 826*dfc11533SChris Williamson #define LUA_INTFRMLEN "l" 827*dfc11533SChris Williamson #define LUA_INTFRM_T long 828*dfc11533SChris Williamson 829*dfc11533SChris Williamson #endif 830*dfc11533SChris Williamson #endif /* } */ 831*dfc11533SChris Williamson 832*dfc11533SChris Williamson 833*dfc11533SChris Williamson /* 834*dfc11533SChris Williamson ** LUA_FLTFRMLEN is the length modifier for float conversions in 835*dfc11533SChris Williamson ** 'string.format'; LUA_FLTFRM_T is the float type corresponding to 836*dfc11533SChris Williamson ** the previous length 837*dfc11533SChris Williamson */ 838*dfc11533SChris Williamson #if !defined(LUA_FLTFRMLEN) 839*dfc11533SChris Williamson 840*dfc11533SChris Williamson #define LUA_FLTFRMLEN "" 841*dfc11533SChris Williamson #define LUA_FLTFRM_T double 842*dfc11533SChris Williamson 843*dfc11533SChris Williamson #endif 844*dfc11533SChris Williamson 845*dfc11533SChris Williamson 846*dfc11533SChris Williamson /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ 847*dfc11533SChris Williamson #define MAX_ITEM 512 848*dfc11533SChris Williamson /* valid flags in a format specification */ 849*dfc11533SChris Williamson #define FLAGS "-+ #0" 850*dfc11533SChris Williamson /* 851*dfc11533SChris Williamson ** maximum size of each format specification (such as '%-099.99d') 852*dfc11533SChris Williamson ** (+10 accounts for %99.99x plus margin of error) 853*dfc11533SChris Williamson */ 854*dfc11533SChris Williamson #define MAX_FORMAT (sizeof(FLAGS) + sizeof(LUA_INTFRMLEN) + 10) 855*dfc11533SChris Williamson 856*dfc11533SChris Williamson 857*dfc11533SChris Williamson static void addquoted (lua_State *L, luaL_Buffer *b, int arg) { 858*dfc11533SChris Williamson size_t l; 859*dfc11533SChris Williamson const char *s = luaL_checklstring(L, arg, &l); 860*dfc11533SChris Williamson luaL_addchar(b, '"'); 861*dfc11533SChris Williamson while (l--) { 862*dfc11533SChris Williamson if (*s == '"' || *s == '\\' || *s == '\n') { 863*dfc11533SChris Williamson luaL_addchar(b, '\\'); 864*dfc11533SChris Williamson luaL_addchar(b, *s); 865*dfc11533SChris Williamson } 866*dfc11533SChris Williamson else if (*s == '\0' || iscntrl(uchar(*s))) { 867*dfc11533SChris Williamson char buff[10]; 868*dfc11533SChris Williamson if (!isdigit(uchar(*(s+1)))) 869*dfc11533SChris Williamson sprintf(buff, "\\%d", (int)uchar(*s)); 870*dfc11533SChris Williamson else 871*dfc11533SChris Williamson sprintf(buff, "\\%03d", (int)uchar(*s)); 872*dfc11533SChris Williamson luaL_addstring(b, buff); 873*dfc11533SChris Williamson } 874*dfc11533SChris Williamson else 875*dfc11533SChris Williamson luaL_addchar(b, *s); 876*dfc11533SChris Williamson s++; 877*dfc11533SChris Williamson } 878*dfc11533SChris Williamson luaL_addchar(b, '"'); 879*dfc11533SChris Williamson } 880*dfc11533SChris Williamson 881*dfc11533SChris Williamson static const char *scanformat (lua_State *L, const char *strfrmt, char *form) { 882*dfc11533SChris Williamson const char *p = strfrmt; 883*dfc11533SChris Williamson while (*p != '\0' && strchr(FLAGS, *p) != NULL) p++; /* skip flags */ 884*dfc11533SChris Williamson if ((size_t)(p - strfrmt) >= sizeof(FLAGS)/sizeof(char)) 885*dfc11533SChris Williamson luaL_error(L, "invalid format (repeated flags)"); 886*dfc11533SChris Williamson if (isdigit(uchar(*p))) p++; /* skip width */ 887*dfc11533SChris Williamson if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ 888*dfc11533SChris Williamson if (*p == '.') { 889*dfc11533SChris Williamson p++; 890*dfc11533SChris Williamson if (isdigit(uchar(*p))) p++; /* skip precision */ 891*dfc11533SChris Williamson if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ 892*dfc11533SChris Williamson } 893*dfc11533SChris Williamson if (isdigit(uchar(*p))) 894*dfc11533SChris Williamson luaL_error(L, "invalid format (width or precision too long)"); 895*dfc11533SChris Williamson *(form++) = '%'; 896*dfc11533SChris Williamson memcpy(form, strfrmt, (p - strfrmt + 1) * sizeof(char)); 897*dfc11533SChris Williamson form += p - strfrmt + 1; 898*dfc11533SChris Williamson *form = '\0'; 899*dfc11533SChris Williamson return p; 900*dfc11533SChris Williamson } 901*dfc11533SChris Williamson 902*dfc11533SChris Williamson 903*dfc11533SChris Williamson /* 904*dfc11533SChris Williamson ** add length modifier into formats 905*dfc11533SChris Williamson */ 906*dfc11533SChris Williamson static void addlenmod (char *form, const char *lenmod) { 907*dfc11533SChris Williamson size_t l = strlen(form); 908*dfc11533SChris Williamson size_t lm = strlen(lenmod); 909*dfc11533SChris Williamson char spec = form[l - 1]; 910*dfc11533SChris Williamson strcpy(form + l - 1, lenmod); 911*dfc11533SChris Williamson form[l + lm - 1] = spec; 912*dfc11533SChris Williamson form[l + lm] = '\0'; 913*dfc11533SChris Williamson } 914*dfc11533SChris Williamson 915*dfc11533SChris Williamson 916*dfc11533SChris Williamson static int str_format (lua_State *L) { 917*dfc11533SChris Williamson int top = lua_gettop(L); 918*dfc11533SChris Williamson int arg = 1; 919*dfc11533SChris Williamson size_t sfl; 920*dfc11533SChris Williamson const char *strfrmt = luaL_checklstring(L, arg, &sfl); 921*dfc11533SChris Williamson const char *strfrmt_end = strfrmt+sfl; 922*dfc11533SChris Williamson luaL_Buffer b; 923*dfc11533SChris Williamson luaL_buffinit(L, &b); 924*dfc11533SChris Williamson while (strfrmt < strfrmt_end) { 925*dfc11533SChris Williamson if (*strfrmt != L_ESC) 926*dfc11533SChris Williamson luaL_addchar(&b, *strfrmt++); 927*dfc11533SChris Williamson else if (*++strfrmt == L_ESC) 928*dfc11533SChris Williamson luaL_addchar(&b, *strfrmt++); /* %% */ 929*dfc11533SChris Williamson else { /* format item */ 930*dfc11533SChris Williamson char form[MAX_FORMAT]; /* to store the format (`%...') */ 931*dfc11533SChris Williamson char *buff = luaL_prepbuffsize(&b, MAX_ITEM); /* to put formatted item */ 932*dfc11533SChris Williamson int nb = 0; /* number of bytes in added item */ 933*dfc11533SChris Williamson if (++arg > top) 934*dfc11533SChris Williamson luaL_argerror(L, arg, "no value"); 935*dfc11533SChris Williamson strfrmt = scanformat(L, strfrmt, form); 936*dfc11533SChris Williamson switch (*strfrmt++) { 937*dfc11533SChris Williamson case 'c': { 938*dfc11533SChris Williamson nb = str_sprintf(buff, form, luaL_checkint(L, arg)); 939*dfc11533SChris Williamson break; 940*dfc11533SChris Williamson } 941*dfc11533SChris Williamson case 'd': case 'i': { 942*dfc11533SChris Williamson lua_Number n = luaL_checknumber(L, arg); 943*dfc11533SChris Williamson LUA_INTFRM_T ni = (LUA_INTFRM_T)n; 944*dfc11533SChris Williamson lua_Number diff = n - (lua_Number)ni; 945*dfc11533SChris Williamson luaL_argcheck(L, -1 < diff && diff < 1, arg, 946*dfc11533SChris Williamson "not a number in proper range"); 947*dfc11533SChris Williamson addlenmod(form, LUA_INTFRMLEN); 948*dfc11533SChris Williamson nb = str_sprintf(buff, form, ni); 949*dfc11533SChris Williamson break; 950*dfc11533SChris Williamson } 951*dfc11533SChris Williamson case 'o': case 'u': case 'x': case 'X': { 952*dfc11533SChris Williamson lua_Number n = luaL_checknumber(L, arg); 953*dfc11533SChris Williamson unsigned LUA_INTFRM_T ni = (unsigned LUA_INTFRM_T)n; 954*dfc11533SChris Williamson lua_Number diff = n - (lua_Number)ni; 955*dfc11533SChris Williamson luaL_argcheck(L, -1 < diff && diff < 1, arg, 956*dfc11533SChris Williamson "not a non-negative number in proper range"); 957*dfc11533SChris Williamson addlenmod(form, LUA_INTFRMLEN); 958*dfc11533SChris Williamson nb = str_sprintf(buff, form, ni); 959*dfc11533SChris Williamson break; 960*dfc11533SChris Williamson } 961*dfc11533SChris Williamson case 'e': case 'E': case 'f': 962*dfc11533SChris Williamson #if defined(LUA_USE_AFORMAT) 963*dfc11533SChris Williamson case 'a': case 'A': 964*dfc11533SChris Williamson #endif 965*dfc11533SChris Williamson case 'g': case 'G': { 966*dfc11533SChris Williamson addlenmod(form, LUA_FLTFRMLEN); 967*dfc11533SChris Williamson nb = str_sprintf(buff, form, (LUA_FLTFRM_T)luaL_checknumber(L, arg)); 968*dfc11533SChris Williamson break; 969*dfc11533SChris Williamson } 970*dfc11533SChris Williamson case 'q': { 971*dfc11533SChris Williamson addquoted(L, &b, arg); 972*dfc11533SChris Williamson break; 973*dfc11533SChris Williamson } 974*dfc11533SChris Williamson case 's': { 975*dfc11533SChris Williamson size_t l; 976*dfc11533SChris Williamson const char *s = luaL_tolstring(L, arg, &l); 977*dfc11533SChris Williamson if (!strchr(form, '.') && l >= 100) { 978*dfc11533SChris Williamson /* no precision and string is too long to be formatted; 979*dfc11533SChris Williamson keep original string */ 980*dfc11533SChris Williamson luaL_addvalue(&b); 981*dfc11533SChris Williamson break; 982*dfc11533SChris Williamson } 983*dfc11533SChris Williamson else { 984*dfc11533SChris Williamson nb = str_sprintf(buff, form, s); 985*dfc11533SChris Williamson lua_pop(L, 1); /* remove result from 'luaL_tolstring' */ 986*dfc11533SChris Williamson break; 987*dfc11533SChris Williamson } 988*dfc11533SChris Williamson } 989*dfc11533SChris Williamson default: { /* also treat cases `pnLlh' */ 990*dfc11533SChris Williamson return luaL_error(L, "invalid option " LUA_QL("%%%c") " to " 991*dfc11533SChris Williamson LUA_QL("format"), *(strfrmt - 1)); 992*dfc11533SChris Williamson } 993*dfc11533SChris Williamson } 994*dfc11533SChris Williamson luaL_addsize(&b, nb); 995*dfc11533SChris Williamson } 996*dfc11533SChris Williamson } 997*dfc11533SChris Williamson luaL_pushresult(&b); 998*dfc11533SChris Williamson return 1; 999*dfc11533SChris Williamson } 1000*dfc11533SChris Williamson 1001*dfc11533SChris Williamson /* }====================================================== */ 1002*dfc11533SChris Williamson 1003*dfc11533SChris Williamson 1004*dfc11533SChris Williamson static const luaL_Reg strlib[] = { 1005*dfc11533SChris Williamson {"byte", str_byte}, 1006*dfc11533SChris Williamson {"char", str_char}, 1007*dfc11533SChris Williamson {"dump", str_dump}, 1008*dfc11533SChris Williamson {"find", str_find}, 1009*dfc11533SChris Williamson {"format", str_format}, 1010*dfc11533SChris Williamson {"gmatch", str_gmatch}, 1011*dfc11533SChris Williamson {"gsub", str_gsub}, 1012*dfc11533SChris Williamson {"len", str_len}, 1013*dfc11533SChris Williamson {"lower", str_lower}, 1014*dfc11533SChris Williamson {"match", str_match}, 1015*dfc11533SChris Williamson {"rep", str_rep}, 1016*dfc11533SChris Williamson {"reverse", str_reverse}, 1017*dfc11533SChris Williamson {"sub", str_sub}, 1018*dfc11533SChris Williamson {"upper", str_upper}, 1019*dfc11533SChris Williamson {NULL, NULL} 1020*dfc11533SChris Williamson }; 1021*dfc11533SChris Williamson 1022*dfc11533SChris Williamson 1023*dfc11533SChris Williamson static void createmetatable (lua_State *L) { 1024*dfc11533SChris Williamson lua_createtable(L, 0, 1); /* table to be metatable for strings */ 1025*dfc11533SChris Williamson lua_pushliteral(L, ""); /* dummy string */ 1026*dfc11533SChris Williamson lua_pushvalue(L, -2); /* copy table */ 1027*dfc11533SChris Williamson lua_setmetatable(L, -2); /* set table as metatable for strings */ 1028*dfc11533SChris Williamson lua_pop(L, 1); /* pop dummy string */ 1029*dfc11533SChris Williamson lua_pushvalue(L, -2); /* get string library */ 1030*dfc11533SChris Williamson lua_setfield(L, -2, "__index"); /* metatable.__index = string */ 1031*dfc11533SChris Williamson lua_pop(L, 1); /* pop metatable */ 1032*dfc11533SChris Williamson } 1033*dfc11533SChris Williamson 1034*dfc11533SChris Williamson 1035*dfc11533SChris Williamson /* 1036*dfc11533SChris Williamson ** Open string library 1037*dfc11533SChris Williamson */ 1038*dfc11533SChris Williamson LUAMOD_API int luaopen_string (lua_State *L) { 1039*dfc11533SChris Williamson luaL_newlib(L, strlib); 1040*dfc11533SChris Williamson createmetatable(L); 1041*dfc11533SChris Williamson return 1; 1042*dfc11533SChris Williamson } 1043*dfc11533SChris Williamson 1044