summaryrefslogtreecommitdiffstatshomepage
path: root/extmod
diff options
context:
space:
mode:
Diffstat (limited to 'extmod')
-rw-r--r--extmod/modure.c3
-rw-r--r--extmod/re1.5/charclass.c11
-rw-r--r--extmod/re1.5/compilecode.c32
-rw-r--r--extmod/re1.5/dumpcode.c12
-rw-r--r--extmod/re1.5/re1.5.h (renamed from extmod/re1.5/regexp.h)5
-rw-r--r--extmod/re1.5/recursiveloop.c8
6 files changed, 67 insertions, 4 deletions
diff --git a/extmod/modure.c b/extmod/modure.c
index 7acc045e70..ae47a2129a 100644
--- a/extmod/modure.c
+++ b/extmod/modure.c
@@ -38,7 +38,7 @@
#if MICROPY_PY_URE
-#include "re1.5/regexp.h"
+#include "re1.5/re1.5.h"
#define FLAG_DEBUG 0x1000
@@ -245,5 +245,6 @@ const mp_obj_module_t mp_module_ure = {
#include "re1.5/compilecode.c"
#include "re1.5/dumpcode.c"
#include "re1.5/recursiveloop.c"
+#include "re1.5/charclass.c"
#endif //MICROPY_PY_URE
diff --git a/extmod/re1.5/charclass.c b/extmod/re1.5/charclass.c
new file mode 100644
index 0000000000..c9f617592b
--- /dev/null
+++ b/extmod/re1.5/charclass.c
@@ -0,0 +1,11 @@
+#include "re1.5.h"
+
+int _re1_5_classmatch(const char *pc, const char *sp)
+{
+ // pc points to "cnt" byte after opcode
+ int cnt = *pc++;
+ while (cnt--) {
+ if (!(*sp >= *pc && *sp <= pc[1])) return 0;
+ }
+ return 1;
+} \ No newline at end of file
diff --git a/extmod/re1.5/compilecode.c b/extmod/re1.5/compilecode.c
index 5b5d28c2a0..a7942b1216 100644
--- a/extmod/re1.5/compilecode.c
+++ b/extmod/re1.5/compilecode.c
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#include "regexp.h"
+#include "re1.5.h"
static void insert_code(char *code, int at, int num, int *pc)
{
@@ -45,6 +45,18 @@ int re1_5_sizecode(const char *re)
break;
case ')':
break;
+ case '[': {
+ pc += 2;
+ re++;
+ while (*re != ']') {
+ if (!*re) return -1;
+ if (re[1] == '-') {
+ re += 2;
+ }
+ pc += 2;
+ re++;
+ }
+ }
}
}
@@ -76,6 +88,24 @@ const char *_compilecode(const char *re, ByteProg *prog)
EMIT(pc++, Any);
prog->len++;
break;
+ case '[': {
+ int cnt;
+ term = pc;
+ EMIT(pc++, Class);
+ pc++; // Skip # of pair byte
+ prog->len++;
+ re++;
+ for (cnt = 0; *re != ']'; re++, cnt++) {
+ if (!*re) return NULL;
+ EMIT(pc++, *re);
+ if (re[1] == '-') {
+ re += 2;
+ }
+ EMIT(pc++, *re);
+ }
+ EMIT(term + 1, cnt);
+ break;
+ }
case '(':
term = pc;
diff --git a/extmod/re1.5/dumpcode.c b/extmod/re1.5/dumpcode.c
index b91ded03a6..ca41cfeda4 100644
--- a/extmod/re1.5/dumpcode.c
+++ b/extmod/re1.5/dumpcode.c
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#include "regexp.h"
+#include "re1.5.h"
void re1_5_dumpcode(ByteProg *prog)
{
@@ -32,6 +32,16 @@ void re1_5_dumpcode(ByteProg *prog)
case Any:
printf("any\n");
break;
+ case Class: {
+ int num = code[pc++];
+ printf("class %d", num);
+ while (num--) {
+ printf(" 0x%02x-0x%02x", code[pc], code[pc + 1]);
+ pc += 2;
+ }
+ printf("\n");
+ break;
+ }
case Match:
printf("match\n");
break;
diff --git a/extmod/re1.5/regexp.h b/extmod/re1.5/re1.5.h
index 316b27076d..ac41bab8f3 100644
--- a/extmod/re1.5/regexp.h
+++ b/extmod/re1.5/re1.5.h
@@ -80,14 +80,18 @@ enum /* Inst.opcode */
CONSUMERS = 1,
Char = CONSUMERS,
Any,
+ Class,
+
ASSERTS = 0x50,
Bol = ASSERTS,
Eol,
+
// Instructions which take relative offset as arg
JUMPS = 0x60,
Jmp = JUMPS,
Split,
RSplit,
+
// Other (special) instructions
Save = 0x7e,
Match = 0x7f,
@@ -139,5 +143,6 @@ int re1_5_sizecode(const char *re);
int re1_5_compilecode(ByteProg *prog, const char *re);
void re1_5_dumpcode(ByteProg *prog);
void cleanmarks(ByteProg *prog);
+int _re1_5_classmatch(const char *pc, const char *sp);
#endif /*_RE1_5_REGEXP__H*/
diff --git a/extmod/re1.5/recursiveloop.c b/extmod/re1.5/recursiveloop.c
index 7b95eb4c95..26c6da43de 100644
--- a/extmod/re1.5/recursiveloop.c
+++ b/extmod/re1.5/recursiveloop.c
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#include "regexp.h"
+#include "re1.5.h"
static int
recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int nsubp)
@@ -23,6 +23,12 @@ recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int n
case Any:
sp++;
continue;
+ case Class:
+ if (!_re1_5_classmatch(pc, sp))
+ return 0;
+ pc += *(unsigned char*)pc * 2 + 1;
+ sp++;
+ continue;
case Match:
return 1;
case Jmp: