aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Modules/_sre.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_sre.c')
-rw-r--r--Modules/_sre.c272
1 files changed, 157 insertions, 115 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 8f47688d410..88bbf6a941e 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -58,12 +58,8 @@ static char copyright[] =
/* defining this one enables tracing */
#undef VERBOSE
-#if PY_VERSION_HEX >= 0x01060000
-#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
/* defining this enables unicode support (default under 1.6a1 and later) */
#define HAVE_UNICODE
-#endif
-#endif
/* -------------------------------------------------------------------- */
/* optional features */
@@ -71,9 +67,6 @@ static char copyright[] =
/* enables fast searching */
#define USE_FAST_SEARCH
-/* enables aggressive inlining (always on for Visual C) */
-#undef USE_INLINE
-
/* enables copy/deepcopy handling (work in progress) */
#undef USE_BUILTIN_COPY
@@ -1671,7 +1664,7 @@ state_reset(SRE_STATE* state)
}
static void*
-getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
+getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize, Py_buffer *view)
{
/* given a python object, return a data pointer, a length (in
characters), and a character size. return NULL if the object
@@ -1682,40 +1675,37 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
int charsize;
void* ptr;
-#if defined(HAVE_UNICODE)
+ /* Unicode objects do not support the buffer API. So, get the data
+ directly instead. */
if (PyUnicode_Check(string)) {
- /* unicode strings doesn't always support the buffer interface */
- ptr = (void*) PyUnicode_AS_DATA(string);
- /* bytes = PyUnicode_GET_DATA_SIZE(string); */
- size = PyUnicode_GET_SIZE(string);
- charsize = sizeof(Py_UNICODE);
-
- } else {
-#endif
+ ptr = (void *)PyUnicode_AS_DATA(string);
+ *p_length = PyUnicode_GET_SIZE(string);
+ *p_charsize = sizeof(Py_UNICODE);
+ return ptr;
+ }
/* get pointer to string buffer */
+ view->len = -1;
buffer = Py_TYPE(string)->tp_as_buffer;
- if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
- buffer->bf_getsegcount(string, NULL) != 1) {
- PyErr_SetString(PyExc_TypeError, "expected string or buffer");
- return NULL;
+ if (!buffer || !buffer->bf_getbuffer ||
+ (*buffer->bf_getbuffer)(string, view, PyBUF_SIMPLE) < 0) {
+ PyErr_SetString(PyExc_TypeError, "expected string or buffer");
+ return NULL;
}
/* determine buffer size */
- bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
+ bytes = view->len;
+ ptr = view->buf;
+
if (bytes < 0) {
PyErr_SetString(PyExc_TypeError, "buffer has negative size");
- return NULL;
+ goto err;
}
/* determine character size */
-#if PY_VERSION_HEX >= 0x01060000
size = PyObject_Size(string);
-#else
- size = PyObject_Length(string);
-#endif
- if (PyString_Check(string) || bytes == size)
+ if (PyBytes_Check(string) || bytes == size)
charsize = 1;
#if defined(HAVE_UNICODE)
else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
@@ -1723,17 +1713,22 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
#endif
else {
PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
- return NULL;
+ goto err;
}
-#if defined(HAVE_UNICODE)
- }
-#endif
-
*p_length = size;
*p_charsize = charsize;
+ if (ptr == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "Buffer is NULL");
+ goto err;
+ }
return ptr;
+ err:
+ PyBuffer_Release(view);
+ view->buf = NULL;
+ return NULL;
}
LOCAL(PyObject*)
@@ -1751,9 +1746,21 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
state->lastmark = -1;
state->lastindex = -1;
- ptr = getstring(string, &length, &charsize);
+ state->buffer.buf = NULL;
+ ptr = getstring(string, &length, &charsize, &state->buffer);
if (!ptr)
- return NULL;
+ goto err;
+
+ if (charsize == 1 && pattern->charsize > 1) {
+ PyErr_SetString(PyExc_TypeError,
+ "can't use a string pattern on a bytes-like object");
+ goto err;
+ }
+ if (charsize > 1 && pattern->charsize == 1) {
+ PyErr_SetString(PyExc_TypeError,
+ "can't use a bytes pattern on a string-like object");
+ goto err;
+ }
/* adjust boundaries */
if (start < 0)
@@ -1790,11 +1797,17 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
state->lower = sre_lower;
return string;
+ err:
+ if (state->buffer.buf)
+ PyBuffer_Release(&state->buffer);
+ return NULL;
}
LOCAL(void)
state_fini(SRE_STATE* state)
{
+ if (state->buffer.buf)
+ PyBuffer_Release(&state->buffer);
Py_XDECREF(state->string);
data_stack_dealloc(state);
}
@@ -1856,6 +1869,8 @@ pattern_dealloc(PatternObject* self)
{
if (self->weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject *) self);
+ if (self->view.buf)
+ PyBuffer_Release(&self->view);
Py_XDECREF(self->pattern);
Py_XDECREF(self->groupindex);
Py_XDECREF(self->indexgroup);
@@ -1949,7 +1964,7 @@ call(char* module, char* function, PyObject* args)
if (!args)
return NULL;
- name = PyString_FromString(module);
+ name = PyUnicode_FromString(module);
if (!name)
return NULL;
mod = PyImport_Import(name);
@@ -2290,6 +2305,7 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Py_ssize_t i, b, e;
int bint;
int filter_is_callable;
+ Py_buffer view;
if (PyCallable_Check(ptemplate)) {
/* sub/subn takes either a function or a template */
@@ -2299,7 +2315,8 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
} else {
/* if not callable, check if it's a literal string */
int literal;
- ptr = getstring(ptemplate, &n, &bint);
+ view.buf = NULL;
+ ptr = getstring(ptemplate, &n, &bint, &view);
b = bint;
if (ptr) {
if (b == 1) {
@@ -2313,6 +2330,8 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
PyErr_Clear();
literal = 0;
}
+ if (view.buf)
+ PyBuffer_Release(&view);
if (literal) {
filter = ptemplate;
Py_INCREF(filter);
@@ -2540,35 +2559,35 @@ pattern_deepcopy(PatternObject* self, PyObject* memo)
}
PyDoc_STRVAR(pattern_match_doc,
-"match(string[, pos[, endpos]]) --> match object or None.\n\
+"match(string[, pos[, endpos]]) -> match object or None.\n\n\
Matches zero or more characters at the beginning of the string");
PyDoc_STRVAR(pattern_search_doc,
-"search(string[, pos[, endpos]]) --> match object or None.\n\
+"search(string[, pos[, endpos]]) -> match object or None.\n\n\
Scan through string looking for a match, and return a corresponding\n\
match object instance. Return None if no position in the string matches.");
PyDoc_STRVAR(pattern_split_doc,
-"split(string[, maxsplit = 0]) --> list.\n\
+"split(string[, maxsplit = 0]) -> list.\n\n\
Split string by the occurrences of pattern.");
PyDoc_STRVAR(pattern_findall_doc,
-"findall(string[, pos[, endpos]]) --> list.\n\
+"findall(string[, pos[, endpos]]) -> list.\n\n\
Return a list of all non-overlapping matches of pattern in string.");
PyDoc_STRVAR(pattern_finditer_doc,
-"finditer(string[, pos[, endpos]]) --> iterator.\n\
+"finditer(string[, pos[, endpos]]) -> iterator.\n\n\
Return an iterator over all non-overlapping matches for the \n\
RE pattern in string. For each match, the iterator returns a\n\
match object.");
PyDoc_STRVAR(pattern_sub_doc,
-"sub(repl, string[, count = 0]) --> newstring\n\
+"sub(repl, string[, count = 0]) -> newstring.\n\n\
Return the string obtained by replacing the leftmost non-overlapping\n\
occurrences of pattern in string by the replacement repl.");
PyDoc_STRVAR(pattern_subn_doc,
-"subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
+"subn(repl, string[, count = 0]) -> (newstring, number of subs)\n\n\
Return the tuple (new_string, number_of_subs_made) found by replacing\n\
the leftmost non-overlapping occurrences of pattern with the\n\
replacement repl.");
@@ -2607,15 +2626,15 @@ static PyMemberDef pattern_members[] = {
{NULL} /* Sentinel */
};
-statichere PyTypeObject Pattern_Type = {
- PyObject_HEAD_INIT(NULL)
- 0, "_" SRE_MODULE ".SRE_Pattern",
+static PyTypeObject Pattern_Type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "_" SRE_MODULE ".SRE_Pattern",
sizeof(PatternObject), sizeof(SRE_CODE),
- (destructor)pattern_dealloc, /*tp_dealloc*/
- 0, /* tp_print */
- 0, /* tp_getattrn */
+ (destructor)pattern_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
0, /* tp_setattr */
- 0, /* tp_compare */
+ 0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
@@ -2626,7 +2645,7 @@ statichere PyTypeObject Pattern_Type = {
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT, /* tp_flags */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
pattern_doc, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
@@ -2654,6 +2673,7 @@ _compile(PyObject* self_, PyObject* args)
Py_ssize_t groups = 0;
PyObject* groupindex = NULL;
PyObject* indexgroup = NULL;
+
if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
&PyList_Type, &code, &groups,
&groupindex, &indexgroup))
@@ -2668,13 +2688,13 @@ _compile(PyObject* self_, PyObject* args)
self->pattern = NULL;
self->groupindex = NULL;
self->indexgroup = NULL;
+ self->view.buf = NULL;
self->codesize = n;
for (i = 0; i < n; i++) {
PyObject *o = PyList_GET_ITEM(code, i);
- unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
- : PyLong_AsUnsignedLong(o);
+ unsigned long value = PyLong_AsUnsignedLong(o);
if (value == (unsigned long)-1 && PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
PyErr_SetString(PyExc_OverflowError,
@@ -2695,6 +2715,16 @@ _compile(PyObject* self_, PyObject* args)
return NULL;
}
+ if (pattern == Py_None)
+ self->charsize = -1;
+ else {
+ Py_ssize_t p_length;
+ if (!getstring(pattern, &p_length, &self->charsize, &self->view)) {
+ Py_DECREF(self);
+ return NULL;
+ }
+ }
+
Py_INCREF(pattern);
self->pattern = pattern;
@@ -2954,13 +2984,13 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
<INFO> <1=skip> <2=flags> <3=min> <4=max>;
If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
more follows. */
- SRE_CODE flags, i;
+ SRE_CODE flags, min, max, i;
SRE_CODE *newcode;
GET_SKIP;
newcode = code+skip-1;
GET_ARG; flags = arg;
- GET_ARG; /* min */
- GET_ARG; /* max */
+ GET_ARG; min = arg;
+ GET_ARG; max = arg;
/* Check that only valid flags are present */
if ((flags & ~(SRE_INFO_PREFIX |
SRE_INFO_LITERAL |
@@ -2976,9 +3006,9 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
FAIL;
/* Validate the prefix */
if (flags & SRE_INFO_PREFIX) {
- SRE_CODE prefix_len;
+ SRE_CODE prefix_len, prefix_skip;
GET_ARG; prefix_len = arg;
- GET_ARG; /* prefix skip */
+ GET_ARG; prefix_skip = arg;
/* Here comes the prefix string */
if (code+prefix_len < code || code+prefix_len > newcode)
FAIL;
@@ -3224,16 +3254,20 @@ match_getindex(MatchObject* self, PyObject* index)
{
Py_ssize_t i;
- if (PyInt_Check(index))
- return PyInt_AsSsize_t(index);
+ if (index == NULL)
+ /* Default value */
+ return 0;
+
+ if (PyLong_Check(index))
+ return PyLong_AsSsize_t(index);
i = -1;
if (self->pattern->groupindex) {
index = PyObject_GetItem(self->pattern->groupindex, index);
if (index) {
- if (PyInt_Check(index) || PyLong_Check(index))
- i = PyInt_AsSsize_t(index);
+ if (PyLong_Check(index))
+ i = PyLong_AsSsize_t(index);
Py_DECREF(index);
} else
PyErr_Clear();
@@ -3374,7 +3408,7 @@ match_start(MatchObject* self, PyObject* args)
{
Py_ssize_t index;
- PyObject* index_ = Py_False; /* zero */
+ PyObject* index_ = NULL;
if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
return NULL;
@@ -3397,7 +3431,7 @@ match_end(MatchObject* self, PyObject* args)
{
Py_ssize_t index;
- PyObject* index_ = Py_False; /* zero */
+ PyObject* index_ = NULL;
if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
return NULL;
@@ -3425,12 +3459,12 @@ _pair(Py_ssize_t i1, Py_ssize_t i2)
if (!pair)
return NULL;
- item = PyInt_FromSsize_t(i1);
+ item = PyLong_FromSsize_t(i1);
if (!item)
goto error;
PyTuple_SET_ITEM(pair, 0, item);
- item = PyInt_FromSsize_t(i2);
+ item = PyLong_FromSsize_t(i2);
if (!item)
goto error;
PyTuple_SET_ITEM(pair, 1, item);
@@ -3447,7 +3481,7 @@ match_span(MatchObject* self, PyObject* args)
{
Py_ssize_t index;
- PyObject* index_ = Py_False; /* zero */
+ PyObject* index_ = NULL;
if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
return NULL;
@@ -3550,36 +3584,36 @@ PyDoc_STRVAR(match_doc,
Match objects always have a boolean value of True.");
PyDoc_STRVAR(match_group_doc,
-"group([group1, ...]) -> str or tuple.\n\
+"group([group1, ...]) -> str or tuple.\n\n\
Return subgroup(s) of the match by indices or names.\n\
For 0 returns the entire match.");
PyDoc_STRVAR(match_start_doc,
-"start([group=0]) -> int.\n\
+"start([group=0]) -> int.\n\n\
Return index of the start of the substring matched by group.");
PyDoc_STRVAR(match_end_doc,
-"end([group=0]) -> int.\n\
+"end([group=0]) -> int.\n\n\
Return index of the end of the substring matched by group.");
PyDoc_STRVAR(match_span_doc,
-"span([group]) -> tuple.\n\
+"span([group]) -> tuple.\n\n\
For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
PyDoc_STRVAR(match_groups_doc,
-"groups([default=None]) -> tuple.\n\
+"groups([default=None]) -> tuple.\n\n\
Return a tuple containing all the subgroups of the match, from 1.\n\
The default argument is used for groups\n\
that did not participate in the match");
PyDoc_STRVAR(match_groupdict_doc,
-"groupdict([default=None]) -> dict.\n\
+"groupdict([default=None]) -> dict.\n\n\
Return a dictionary containing all the named subgroups of the match,\n\
keyed by the subgroup name. The default argument is used for groups\n\
that did not participate in the match");
PyDoc_STRVAR(match_expand_doc,
-"expand(template) -> str.\n\
+"expand(template) -> str.\n\n\
Return the string obtained by doing backslash substitution\n\
on the string template, as done by the sub() method.");
@@ -3648,37 +3682,36 @@ static PyMemberDef match_members[] = {
{NULL}
};
-
/* FIXME: implement setattr("string", None) as a special case (to
detach the associated string, if any */
static PyTypeObject Match_Type = {
- PyVarObject_HEAD_INIT(NULL, 0)
+ PyVarObject_HEAD_INIT(NULL,0)
"_" SRE_MODULE ".SRE_Match",
sizeof(MatchObject), sizeof(Py_ssize_t),
- (destructor)match_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_compare */
- 0, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- 0, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT,
- match_doc, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iternext */
+ (destructor)match_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_reserved */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ match_doc, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
match_methods, /* tp_methods */
match_members, /* tp_members */
match_getset, /* tp_getset */
@@ -3836,11 +3869,11 @@ static PyMemberDef scanner_members[] = {
{NULL} /* Sentinel */
};
-statichere PyTypeObject Scanner_Type = {
- PyObject_HEAD_INIT(NULL)
- 0, "_" SRE_MODULE ".SRE_Scanner",
+static PyTypeObject Scanner_Type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "_" SRE_MODULE ".SRE_Scanner",
sizeof(ScannerObject), 0,
- (destructor)scanner_dealloc, /*tp_dealloc*/
+ (destructor)scanner_dealloc,/* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
@@ -3906,11 +3939,19 @@ static PyMethodDef _functions[] = {
{NULL, NULL}
};
-#if PY_VERSION_HEX < 0x02030000
-DL_EXPORT(void) init_sre(void)
-#else
-PyMODINIT_FUNC init_sre(void)
-#endif
+static struct PyModuleDef sremodule = {
+ PyModuleDef_HEAD_INIT,
+ "_" SRE_MODULE,
+ NULL,
+ -1,
+ _functions,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+};
+
+PyMODINIT_FUNC PyInit__sre(void)
{
PyObject* m;
PyObject* d;
@@ -3919,30 +3960,31 @@ PyMODINIT_FUNC init_sre(void)
/* Patch object types */
if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
PyType_Ready(&Scanner_Type))
- return;
+ return NULL;
- m = Py_InitModule("_" SRE_MODULE, _functions);
+ m = PyModule_Create(&sremodule);
if (m == NULL)
- return;
+ return NULL;
d = PyModule_GetDict(m);
- x = PyInt_FromLong(SRE_MAGIC);
+ x = PyLong_FromLong(SRE_MAGIC);
if (x) {
PyDict_SetItemString(d, "MAGIC", x);
Py_DECREF(x);
}
- x = PyInt_FromLong(sizeof(SRE_CODE));
+ x = PyLong_FromLong(sizeof(SRE_CODE));
if (x) {
PyDict_SetItemString(d, "CODESIZE", x);
Py_DECREF(x);
}
- x = PyString_FromString(copyright);
+ x = PyUnicode_FromString(copyright);
if (x) {
PyDict_SetItemString(d, "copyright", x);
Py_DECREF(x);
}
+ return m;
}
#endif /* !defined(SRE_RECURSIVE) */