diff options
-rw-r--r-- | extmod/modure.c | 3 | ||||
-rw-r--r-- | extmod/re1.5/charclass.c | 11 | ||||
-rw-r--r-- | extmod/re1.5/compilecode.c | 32 | ||||
-rw-r--r-- | extmod/re1.5/dumpcode.c | 12 | ||||
-rw-r--r-- | extmod/re1.5/re1.5.h (renamed from extmod/re1.5/regexp.h) | 5 | ||||
-rw-r--r-- | extmod/re1.5/recursiveloop.c | 8 | ||||
-rw-r--r-- | tests/extmod/ure1.py | 7 |
7 files changed, 74 insertions, 4 deletions
diff --git a/extmod/modure.c b/extmod/modure.c index 7acc045e70..ae47a2129a 100644 --- a/extmod/modure.c +++ b/extmod/modure.c @@ -38,7 +38,7 @@ #if MICROPY_PY_URE -#include "re1.5/regexp.h" +#include "re1.5/re1.5.h" #define FLAG_DEBUG 0x1000 @@ -245,5 +245,6 @@ const mp_obj_module_t mp_module_ure = { #include "re1.5/compilecode.c" #include "re1.5/dumpcode.c" #include "re1.5/recursiveloop.c" +#include "re1.5/charclass.c" #endif //MICROPY_PY_URE diff --git a/extmod/re1.5/charclass.c b/extmod/re1.5/charclass.c new file mode 100644 index 0000000000..c9f617592b --- /dev/null +++ b/extmod/re1.5/charclass.c @@ -0,0 +1,11 @@ +#include "re1.5.h" + +int _re1_5_classmatch(const char *pc, const char *sp) +{ + // pc points to "cnt" byte after opcode + int cnt = *pc++; + while (cnt--) { + if (!(*sp >= *pc && *sp <= pc[1])) return 0; + } + return 1; +}
\ No newline at end of file diff --git a/extmod/re1.5/compilecode.c b/extmod/re1.5/compilecode.c index 5b5d28c2a0..a7942b1216 100644 --- a/extmod/re1.5/compilecode.c +++ b/extmod/re1.5/compilecode.c @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include "regexp.h" +#include "re1.5.h" static void insert_code(char *code, int at, int num, int *pc) { @@ -45,6 +45,18 @@ int re1_5_sizecode(const char *re) break; case ')': break; + case '[': { + pc += 2; + re++; + while (*re != ']') { + if (!*re) return -1; + if (re[1] == '-') { + re += 2; + } + pc += 2; + re++; + } + } } } @@ -76,6 +88,24 @@ const char *_compilecode(const char *re, ByteProg *prog) EMIT(pc++, Any); prog->len++; break; + case '[': { + int cnt; + term = pc; + EMIT(pc++, Class); + pc++; // Skip # of pair byte + prog->len++; + re++; + for (cnt = 0; *re != ']'; re++, cnt++) { + if (!*re) return NULL; + EMIT(pc++, *re); + if (re[1] == '-') { + re += 2; + } + EMIT(pc++, *re); + } + EMIT(term + 1, cnt); + break; + } case '(': term = pc; diff --git a/extmod/re1.5/dumpcode.c b/extmod/re1.5/dumpcode.c index b91ded03a6..ca41cfeda4 100644 --- a/extmod/re1.5/dumpcode.c +++ b/extmod/re1.5/dumpcode.c @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include "regexp.h" +#include "re1.5.h" void re1_5_dumpcode(ByteProg *prog) { @@ -32,6 +32,16 @@ void re1_5_dumpcode(ByteProg *prog) case Any: printf("any\n"); break; + case Class: { + int num = code[pc++]; + printf("class %d", num); + while (num--) { + printf(" 0x%02x-0x%02x", code[pc], code[pc + 1]); + pc += 2; + } + printf("\n"); + break; + } case Match: printf("match\n"); break; diff --git a/extmod/re1.5/regexp.h b/extmod/re1.5/re1.5.h index 316b27076d..ac41bab8f3 100644 --- a/extmod/re1.5/regexp.h +++ b/extmod/re1.5/re1.5.h @@ -80,14 +80,18 @@ enum /* Inst.opcode */ CONSUMERS = 1, Char = CONSUMERS, Any, + Class, + ASSERTS = 0x50, Bol = ASSERTS, Eol, + // Instructions which take relative offset as arg JUMPS = 0x60, Jmp = JUMPS, Split, RSplit, + // Other (special) instructions Save = 0x7e, Match = 0x7f, @@ -139,5 +143,6 @@ int re1_5_sizecode(const char *re); int re1_5_compilecode(ByteProg *prog, const char *re); void re1_5_dumpcode(ByteProg *prog); void cleanmarks(ByteProg *prog); +int _re1_5_classmatch(const char *pc, const char *sp); #endif /*_RE1_5_REGEXP__H*/ diff --git a/extmod/re1.5/recursiveloop.c b/extmod/re1.5/recursiveloop.c index 7b95eb4c95..26c6da43de 100644 --- a/extmod/re1.5/recursiveloop.c +++ b/extmod/re1.5/recursiveloop.c @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include "regexp.h" +#include "re1.5.h" static int recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int nsubp) @@ -23,6 +23,12 @@ recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int n case Any: sp++; continue; + case Class: + if (!_re1_5_classmatch(pc, sp)) + return 0; + pc += *(unsigned char*)pc * 2 + 1; + sp++; + continue; case Match: return 1; case Jmp: diff --git a/tests/extmod/ure1.py b/tests/extmod/ure1.py index dff099c8cc..577c8f61e7 100644 --- a/tests/extmod/ure1.py +++ b/tests/extmod/ure1.py @@ -20,6 +20,13 @@ try: except IndexError: print("IndexError") +r = re.compile("[a-c]") +m = r.match("a") +print(m.group(0)) +m = r.match("d") +print(m) +m = r.match("A") +print(m) r = re.compile("o+") m = r.search("foobar") |