diff options
Diffstat (limited to 'Modules/_sre.c')
-rw-r--r-- | Modules/_sre.c | 272 |
1 files changed, 157 insertions, 115 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c index 8f47688d410..88bbf6a941e 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -58,12 +58,8 @@ static char copyright[] = /* defining this one enables tracing */ #undef VERBOSE -#if PY_VERSION_HEX >= 0x01060000 -#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE) /* defining this enables unicode support (default under 1.6a1 and later) */ #define HAVE_UNICODE -#endif -#endif /* -------------------------------------------------------------------- */ /* optional features */ @@ -71,9 +67,6 @@ static char copyright[] = /* enables fast searching */ #define USE_FAST_SEARCH -/* enables aggressive inlining (always on for Visual C) */ -#undef USE_INLINE - /* enables copy/deepcopy handling (work in progress) */ #undef USE_BUILTIN_COPY @@ -1671,7 +1664,7 @@ state_reset(SRE_STATE* state) } static void* -getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize) +getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize, Py_buffer *view) { /* given a python object, return a data pointer, a length (in characters), and a character size. return NULL if the object @@ -1682,40 +1675,37 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize) int charsize; void* ptr; -#if defined(HAVE_UNICODE) + /* Unicode objects do not support the buffer API. So, get the data + directly instead. */ if (PyUnicode_Check(string)) { - /* unicode strings doesn't always support the buffer interface */ - ptr = (void*) PyUnicode_AS_DATA(string); - /* bytes = PyUnicode_GET_DATA_SIZE(string); */ - size = PyUnicode_GET_SIZE(string); - charsize = sizeof(Py_UNICODE); - - } else { -#endif + ptr = (void *)PyUnicode_AS_DATA(string); + *p_length = PyUnicode_GET_SIZE(string); + *p_charsize = sizeof(Py_UNICODE); + return ptr; + } /* get pointer to string buffer */ + view->len = -1; buffer = Py_TYPE(string)->tp_as_buffer; - if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount || - buffer->bf_getsegcount(string, NULL) != 1) { - PyErr_SetString(PyExc_TypeError, "expected string or buffer"); - return NULL; + if (!buffer || !buffer->bf_getbuffer || + (*buffer->bf_getbuffer)(string, view, PyBUF_SIMPLE) < 0) { + PyErr_SetString(PyExc_TypeError, "expected string or buffer"); + return NULL; } /* determine buffer size */ - bytes = buffer->bf_getreadbuffer(string, 0, &ptr); + bytes = view->len; + ptr = view->buf; + if (bytes < 0) { PyErr_SetString(PyExc_TypeError, "buffer has negative size"); - return NULL; + goto err; } /* determine character size */ -#if PY_VERSION_HEX >= 0x01060000 size = PyObject_Size(string); -#else - size = PyObject_Length(string); -#endif - if (PyString_Check(string) || bytes == size) + if (PyBytes_Check(string) || bytes == size) charsize = 1; #if defined(HAVE_UNICODE) else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE))) @@ -1723,17 +1713,22 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize) #endif else { PyErr_SetString(PyExc_TypeError, "buffer size mismatch"); - return NULL; + goto err; } -#if defined(HAVE_UNICODE) - } -#endif - *p_length = size; *p_charsize = charsize; + if (ptr == NULL) { + PyErr_SetString(PyExc_ValueError, + "Buffer is NULL"); + goto err; + } return ptr; + err: + PyBuffer_Release(view); + view->buf = NULL; + return NULL; } LOCAL(PyObject*) @@ -1751,9 +1746,21 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, state->lastmark = -1; state->lastindex = -1; - ptr = getstring(string, &length, &charsize); + state->buffer.buf = NULL; + ptr = getstring(string, &length, &charsize, &state->buffer); if (!ptr) - return NULL; + goto err; + + if (charsize == 1 && pattern->charsize > 1) { + PyErr_SetString(PyExc_TypeError, + "can't use a string pattern on a bytes-like object"); + goto err; + } + if (charsize > 1 && pattern->charsize == 1) { + PyErr_SetString(PyExc_TypeError, + "can't use a bytes pattern on a string-like object"); + goto err; + } /* adjust boundaries */ if (start < 0) @@ -1790,11 +1797,17 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, state->lower = sre_lower; return string; + err: + if (state->buffer.buf) + PyBuffer_Release(&state->buffer); + return NULL; } LOCAL(void) state_fini(SRE_STATE* state) { + if (state->buffer.buf) + PyBuffer_Release(&state->buffer); Py_XDECREF(state->string); data_stack_dealloc(state); } @@ -1856,6 +1869,8 @@ pattern_dealloc(PatternObject* self) { if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) self); + if (self->view.buf) + PyBuffer_Release(&self->view); Py_XDECREF(self->pattern); Py_XDECREF(self->groupindex); Py_XDECREF(self->indexgroup); @@ -1949,7 +1964,7 @@ call(char* module, char* function, PyObject* args) if (!args) return NULL; - name = PyString_FromString(module); + name = PyUnicode_FromString(module); if (!name) return NULL; mod = PyImport_Import(name); @@ -2290,6 +2305,7 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, Py_ssize_t i, b, e; int bint; int filter_is_callable; + Py_buffer view; if (PyCallable_Check(ptemplate)) { /* sub/subn takes either a function or a template */ @@ -2299,7 +2315,8 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, } else { /* if not callable, check if it's a literal string */ int literal; - ptr = getstring(ptemplate, &n, &bint); + view.buf = NULL; + ptr = getstring(ptemplate, &n, &bint, &view); b = bint; if (ptr) { if (b == 1) { @@ -2313,6 +2330,8 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, PyErr_Clear(); literal = 0; } + if (view.buf) + PyBuffer_Release(&view); if (literal) { filter = ptemplate; Py_INCREF(filter); @@ -2540,35 +2559,35 @@ pattern_deepcopy(PatternObject* self, PyObject* memo) } PyDoc_STRVAR(pattern_match_doc, -"match(string[, pos[, endpos]]) --> match object or None.\n\ +"match(string[, pos[, endpos]]) -> match object or None.\n\n\ Matches zero or more characters at the beginning of the string"); PyDoc_STRVAR(pattern_search_doc, -"search(string[, pos[, endpos]]) --> match object or None.\n\ +"search(string[, pos[, endpos]]) -> match object or None.\n\n\ Scan through string looking for a match, and return a corresponding\n\ match object instance. Return None if no position in the string matches."); PyDoc_STRVAR(pattern_split_doc, -"split(string[, maxsplit = 0]) --> list.\n\ +"split(string[, maxsplit = 0]) -> list.\n\n\ Split string by the occurrences of pattern."); PyDoc_STRVAR(pattern_findall_doc, -"findall(string[, pos[, endpos]]) --> list.\n\ +"findall(string[, pos[, endpos]]) -> list.\n\n\ Return a list of all non-overlapping matches of pattern in string."); PyDoc_STRVAR(pattern_finditer_doc, -"finditer(string[, pos[, endpos]]) --> iterator.\n\ +"finditer(string[, pos[, endpos]]) -> iterator.\n\n\ Return an iterator over all non-overlapping matches for the \n\ RE pattern in string. For each match, the iterator returns a\n\ match object."); PyDoc_STRVAR(pattern_sub_doc, -"sub(repl, string[, count = 0]) --> newstring\n\ +"sub(repl, string[, count = 0]) -> newstring.\n\n\ Return the string obtained by replacing the leftmost non-overlapping\n\ occurrences of pattern in string by the replacement repl."); PyDoc_STRVAR(pattern_subn_doc, -"subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\ +"subn(repl, string[, count = 0]) -> (newstring, number of subs)\n\n\ Return the tuple (new_string, number_of_subs_made) found by replacing\n\ the leftmost non-overlapping occurrences of pattern with the\n\ replacement repl."); @@ -2607,15 +2626,15 @@ static PyMemberDef pattern_members[] = { {NULL} /* Sentinel */ }; -statichere PyTypeObject Pattern_Type = { - PyObject_HEAD_INIT(NULL) - 0, "_" SRE_MODULE ".SRE_Pattern", +static PyTypeObject Pattern_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_" SRE_MODULE ".SRE_Pattern", sizeof(PatternObject), sizeof(SRE_CODE), - (destructor)pattern_dealloc, /*tp_dealloc*/ - 0, /* tp_print */ - 0, /* tp_getattrn */ + (destructor)pattern_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ 0, /* tp_setattr */ - 0, /* tp_compare */ + 0, /* tp_reserved */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ @@ -2626,7 +2645,7 @@ statichere PyTypeObject Pattern_Type = { 0, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ pattern_doc, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ @@ -2654,6 +2673,7 @@ _compile(PyObject* self_, PyObject* args) Py_ssize_t groups = 0; PyObject* groupindex = NULL; PyObject* indexgroup = NULL; + if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags, &PyList_Type, &code, &groups, &groupindex, &indexgroup)) @@ -2668,13 +2688,13 @@ _compile(PyObject* self_, PyObject* args) self->pattern = NULL; self->groupindex = NULL; self->indexgroup = NULL; + self->view.buf = NULL; self->codesize = n; for (i = 0; i < n; i++) { PyObject *o = PyList_GET_ITEM(code, i); - unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o) - : PyLong_AsUnsignedLong(o); + unsigned long value = PyLong_AsUnsignedLong(o); if (value == (unsigned long)-1 && PyErr_Occurred()) { if (PyErr_ExceptionMatches(PyExc_OverflowError)) { PyErr_SetString(PyExc_OverflowError, @@ -2695,6 +2715,16 @@ _compile(PyObject* self_, PyObject* args) return NULL; } + if (pattern == Py_None) + self->charsize = -1; + else { + Py_ssize_t p_length; + if (!getstring(pattern, &p_length, &self->charsize, &self->view)) { + Py_DECREF(self); + return NULL; + } + } + Py_INCREF(pattern); self->pattern = pattern; @@ -2954,13 +2984,13 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) <INFO> <1=skip> <2=flags> <3=min> <4=max>; If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags, more follows. */ - SRE_CODE flags, i; + SRE_CODE flags, min, max, i; SRE_CODE *newcode; GET_SKIP; newcode = code+skip-1; GET_ARG; flags = arg; - GET_ARG; /* min */ - GET_ARG; /* max */ + GET_ARG; min = arg; + GET_ARG; max = arg; /* Check that only valid flags are present */ if ((flags & ~(SRE_INFO_PREFIX | SRE_INFO_LITERAL | @@ -2976,9 +3006,9 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) FAIL; /* Validate the prefix */ if (flags & SRE_INFO_PREFIX) { - SRE_CODE prefix_len; + SRE_CODE prefix_len, prefix_skip; GET_ARG; prefix_len = arg; - GET_ARG; /* prefix skip */ + GET_ARG; prefix_skip = arg; /* Here comes the prefix string */ if (code+prefix_len < code || code+prefix_len > newcode) FAIL; @@ -3224,16 +3254,20 @@ match_getindex(MatchObject* self, PyObject* index) { Py_ssize_t i; - if (PyInt_Check(index)) - return PyInt_AsSsize_t(index); + if (index == NULL) + /* Default value */ + return 0; + + if (PyLong_Check(index)) + return PyLong_AsSsize_t(index); i = -1; if (self->pattern->groupindex) { index = PyObject_GetItem(self->pattern->groupindex, index); if (index) { - if (PyInt_Check(index) || PyLong_Check(index)) - i = PyInt_AsSsize_t(index); + if (PyLong_Check(index)) + i = PyLong_AsSsize_t(index); Py_DECREF(index); } else PyErr_Clear(); @@ -3374,7 +3408,7 @@ match_start(MatchObject* self, PyObject* args) { Py_ssize_t index; - PyObject* index_ = Py_False; /* zero */ + PyObject* index_ = NULL; if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_)) return NULL; @@ -3397,7 +3431,7 @@ match_end(MatchObject* self, PyObject* args) { Py_ssize_t index; - PyObject* index_ = Py_False; /* zero */ + PyObject* index_ = NULL; if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_)) return NULL; @@ -3425,12 +3459,12 @@ _pair(Py_ssize_t i1, Py_ssize_t i2) if (!pair) return NULL; - item = PyInt_FromSsize_t(i1); + item = PyLong_FromSsize_t(i1); if (!item) goto error; PyTuple_SET_ITEM(pair, 0, item); - item = PyInt_FromSsize_t(i2); + item = PyLong_FromSsize_t(i2); if (!item) goto error; PyTuple_SET_ITEM(pair, 1, item); @@ -3447,7 +3481,7 @@ match_span(MatchObject* self, PyObject* args) { Py_ssize_t index; - PyObject* index_ = Py_False; /* zero */ + PyObject* index_ = NULL; if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_)) return NULL; @@ -3550,36 +3584,36 @@ PyDoc_STRVAR(match_doc, Match objects always have a boolean value of True."); PyDoc_STRVAR(match_group_doc, -"group([group1, ...]) -> str or tuple.\n\ +"group([group1, ...]) -> str or tuple.\n\n\ Return subgroup(s) of the match by indices or names.\n\ For 0 returns the entire match."); PyDoc_STRVAR(match_start_doc, -"start([group=0]) -> int.\n\ +"start([group=0]) -> int.\n\n\ Return index of the start of the substring matched by group."); PyDoc_STRVAR(match_end_doc, -"end([group=0]) -> int.\n\ +"end([group=0]) -> int.\n\n\ Return index of the end of the substring matched by group."); PyDoc_STRVAR(match_span_doc, -"span([group]) -> tuple.\n\ +"span([group]) -> tuple.\n\n\ For MatchObject m, return the 2-tuple (m.start(group), m.end(group))."); PyDoc_STRVAR(match_groups_doc, -"groups([default=None]) -> tuple.\n\ +"groups([default=None]) -> tuple.\n\n\ Return a tuple containing all the subgroups of the match, from 1.\n\ The default argument is used for groups\n\ that did not participate in the match"); PyDoc_STRVAR(match_groupdict_doc, -"groupdict([default=None]) -> dict.\n\ +"groupdict([default=None]) -> dict.\n\n\ Return a dictionary containing all the named subgroups of the match,\n\ keyed by the subgroup name. The default argument is used for groups\n\ that did not participate in the match"); PyDoc_STRVAR(match_expand_doc, -"expand(template) -> str.\n\ +"expand(template) -> str.\n\n\ Return the string obtained by doing backslash substitution\n\ on the string template, as done by the sub() method."); @@ -3648,37 +3682,36 @@ static PyMemberDef match_members[] = { {NULL} }; - /* FIXME: implement setattr("string", None) as a special case (to detach the associated string, if any */ static PyTypeObject Match_Type = { - PyVarObject_HEAD_INIT(NULL, 0) + PyVarObject_HEAD_INIT(NULL,0) "_" SRE_MODULE ".SRE_Match", sizeof(MatchObject), sizeof(Py_ssize_t), - (destructor)match_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, - match_doc, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ + (destructor)match_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + match_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ match_methods, /* tp_methods */ match_members, /* tp_members */ match_getset, /* tp_getset */ @@ -3836,11 +3869,11 @@ static PyMemberDef scanner_members[] = { {NULL} /* Sentinel */ }; -statichere PyTypeObject Scanner_Type = { - PyObject_HEAD_INIT(NULL) - 0, "_" SRE_MODULE ".SRE_Scanner", +static PyTypeObject Scanner_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_" SRE_MODULE ".SRE_Scanner", sizeof(ScannerObject), 0, - (destructor)scanner_dealloc, /*tp_dealloc*/ + (destructor)scanner_dealloc,/* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ @@ -3906,11 +3939,19 @@ static PyMethodDef _functions[] = { {NULL, NULL} }; -#if PY_VERSION_HEX < 0x02030000 -DL_EXPORT(void) init_sre(void) -#else -PyMODINIT_FUNC init_sre(void) -#endif +static struct PyModuleDef sremodule = { + PyModuleDef_HEAD_INIT, + "_" SRE_MODULE, + NULL, + -1, + _functions, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC PyInit__sre(void) { PyObject* m; PyObject* d; @@ -3919,30 +3960,31 @@ PyMODINIT_FUNC init_sre(void) /* Patch object types */ if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) || PyType_Ready(&Scanner_Type)) - return; + return NULL; - m = Py_InitModule("_" SRE_MODULE, _functions); + m = PyModule_Create(&sremodule); if (m == NULL) - return; + return NULL; d = PyModule_GetDict(m); - x = PyInt_FromLong(SRE_MAGIC); + x = PyLong_FromLong(SRE_MAGIC); if (x) { PyDict_SetItemString(d, "MAGIC", x); Py_DECREF(x); } - x = PyInt_FromLong(sizeof(SRE_CODE)); + x = PyLong_FromLong(sizeof(SRE_CODE)); if (x) { PyDict_SetItemString(d, "CODESIZE", x); Py_DECREF(x); } - x = PyString_FromString(copyright); + x = PyUnicode_FromString(copyright); if (x) { PyDict_SetItemString(d, "copyright", x); Py_DECREF(x); } + return m; } #endif /* !defined(SRE_RECURSIVE) */ |