aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Modules/_io/bytesio.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_io/bytesio.c')
-rw-r--r--Modules/_io/bytesio.c260
1 files changed, 180 insertions, 80 deletions
diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c
index 54840bb88a2..56ad788d3f1 100644
--- a/Modules/_io/bytesio.c
+++ b/Modules/_io/bytesio.c
@@ -11,6 +11,10 @@ typedef struct {
PyObject *dict;
PyObject *weakreflist;
Py_ssize_t exports;
+ /** If `initvalue' != NULL, `buf' is a read-only pointer into the PyBytes
+ * referenced by `initvalue'. It must be copied prior to mutation, and
+ * released during finalization */
+ PyObject *initvalue;
} bytesio;
typedef struct {
@@ -19,11 +23,11 @@ typedef struct {
} bytesiobuf;
-#define CHECK_CLOSED(self) \
+#define CHECK_CLOSED(self, ret) \
if ((self)->buf == NULL) { \
PyErr_SetString(PyExc_ValueError, \
"I/O operation on closed file."); \
- return NULL; \
+ return ret; \
}
#define CHECK_EXPORTS(self) \
@@ -33,36 +37,74 @@ typedef struct {
return NULL; \
}
+/* Ensure we have a buffer suitable for writing, in the case that an initvalue
+ * object was provided, and we're currently borrowing its buffer. `size'
+ * indicates the new buffer size allocated as part of unsharing, to avoid a
+ * redundant reallocation caused by any subsequent mutation. `truncate'
+ * indicates whether truncation should occur if `size` < self->string_size.
+ *
+ * Do nothing if the buffer wasn't shared. Returns 0 on success, or sets an
+ * exception and returns -1 on failure. Existing state is preserved on failure.
+ */
+static int
+unshare(bytesio *self, size_t preferred_size, int truncate)
+{
+ if (self->initvalue) {
+ Py_ssize_t copy_size;
+ char *new_buf;
+
+ if((! truncate) && preferred_size < (size_t)self->string_size) {
+ preferred_size = self->string_size;
+ }
+
+ /* PyMem_Malloc() returns NULL if preferred_size is bigger
+ than PY_SSIZE_T_MAX */
+ new_buf = (char *)PyMem_Malloc(preferred_size);
+ if (new_buf == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ copy_size = self->string_size;
+ if ((size_t)copy_size > preferred_size) {
+ copy_size = preferred_size;
+ }
+
+ memcpy(new_buf, self->buf, copy_size);
+ Py_CLEAR(self->initvalue);
+ self->buf = new_buf;
+ self->buf_size = preferred_size;
+ self->string_size = (Py_ssize_t) copy_size;
+ }
+ return 0;
+}
/* Internal routine to get a line from the buffer of a BytesIO
object. Returns the length between the current position to the
next newline character. */
static Py_ssize_t
-get_line(bytesio *self, char **output)
+scan_eol(bytesio *self, Py_ssize_t len)
{
- char *n;
- const char *str_end;
- Py_ssize_t len;
+ const char *start, *n;
+ Py_ssize_t maxlen;
assert(self->buf != NULL);
/* Move to the end of the line, up to the end of the string, s. */
- str_end = self->buf + self->string_size;
- for (n = self->buf + self->pos;
- n < str_end && *n != '\n';
- n++);
-
- /* Skip the newline character */
- if (n < str_end)
- n++;
-
- /* Get the length from the current position to the end of the line. */
- len = n - (self->buf + self->pos);
- *output = self->buf + self->pos;
-
+ start = self->buf + self->pos;
+ maxlen = self->string_size - self->pos;
+ if (len < 0 || len > maxlen)
+ len = maxlen;
+
+ if (len) {
+ n = memchr(start, '\n', len);
+ if (n)
+ /* Get the length from the current position to the end of
+ the line. */
+ len = n - start + 1;
+ }
assert(len >= 0);
assert(self->pos < PY_SSIZE_T_MAX - len);
- self->pos += len;
return len;
}
@@ -125,11 +167,18 @@ resize_buffer(bytesio *self, size_t size)
static Py_ssize_t
write_bytes(bytesio *self, const char *bytes, Py_ssize_t len)
{
+ size_t desired;
+
assert(self->buf != NULL);
assert(self->pos >= 0);
assert(len >= 0);
- if ((size_t)self->pos + len > self->buf_size) {
+ desired = (size_t)self->pos + len;
+ if (unshare(self, desired, 0) < 0) {
+ return -1;
+ }
+
+ if (desired > self->buf_size) {
if (resize_buffer(self, (size_t)self->pos + len) < 0)
return -1;
}
@@ -160,6 +209,74 @@ write_bytes(bytesio *self, const char *bytes, Py_ssize_t len)
return len;
}
+/* Release or free any existing buffer, and place the BytesIO in the closed
+ * state. */
+static void
+reset(bytesio *self)
+{
+ if (self->initvalue) {
+ Py_CLEAR(self->initvalue);
+ } else if (self->buf) {
+ PyMem_Free(self->buf);
+ }
+ self->buf = NULL;
+ self->string_size = 0;
+ self->pos = 0;
+}
+
+/* Reinitialize with a new heap-allocated buffer of size `size`. Returns 0 on
+ * success, or sets an exception and returns -1 on failure. Existing state is
+ * preserved on failure. */
+static int
+reinit_private(bytesio *self, Py_ssize_t size)
+{
+ char *tmp = (char *)PyMem_Malloc(size);
+ if (tmp == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ reset(self);
+ self->buf = tmp;
+ self->buf_size = size;
+ return 0;
+}
+
+/* Internal version of BytesIO.__init__; resets the object to its initial
+ * (closed) state before repopulating it, optionally by sharing a PyBytes
+ * buffer provided by `initvalue'. Returns 0 on success, or sets an exception
+ * and returns -1 on failure. */
+static int
+reinit(bytesio *self, PyObject *initvalue)
+{
+ CHECK_CLOSED(self, -1);
+
+ if (initvalue == NULL || initvalue == Py_None) {
+ if (reinit_private(self, 0) < 0) {
+ return -1;
+ }
+ } else if (PyBytes_CheckExact(initvalue)) {
+ reset(self);
+ Py_INCREF(initvalue);
+ self->initvalue = initvalue;
+ self->buf = PyBytes_AS_STRING(initvalue);
+ self->buf_size = PyBytes_GET_SIZE(initvalue);
+ self->string_size = PyBytes_GET_SIZE(initvalue);
+ } else {
+ Py_buffer buf;
+ if (PyObject_GetBuffer(initvalue, &buf, PyBUF_CONTIG_RO) < 0) {
+ return -1;
+ }
+ if (reinit_private(self, buf.len) < 0) {
+ PyBuffer_Release(&buf);
+ return -1;
+ }
+ memcpy(self->buf, buf.buf, buf.len);
+ self->string_size = buf.len;
+ PyBuffer_Release(&buf);
+ }
+ return 0;
+}
+
static PyObject *
bytesio_get_closed(bytesio *self)
{
@@ -184,7 +301,7 @@ PyDoc_STRVAR(seekable_doc,
static PyObject *
return_not_closed(bytesio *self)
{
- CHECK_CLOSED(self);
+ CHECK_CLOSED(self, NULL);
Py_RETURN_TRUE;
}
@@ -194,7 +311,7 @@ PyDoc_STRVAR(flush_doc,
static PyObject *
bytesio_flush(bytesio *self)
{
- CHECK_CLOSED(self);
+ CHECK_CLOSED(self, NULL);
Py_RETURN_NONE;
}
@@ -210,7 +327,7 @@ bytesio_getbuffer(bytesio *self)
bytesiobuf *buf;
PyObject *view;
- CHECK_CLOSED(self);
+ CHECK_CLOSED(self, NULL);
buf = (bytesiobuf *) type->tp_alloc(type, 0);
if (buf == NULL)
@@ -230,7 +347,7 @@ PyDoc_STRVAR(getval_doc,
static PyObject *
bytesio_getvalue(bytesio *self)
{
- CHECK_CLOSED(self);
+ CHECK_CLOSED(self, NULL);
return PyBytes_FromStringAndSize(self->buf, self->string_size);
}
@@ -243,7 +360,7 @@ PyDoc_STRVAR(isatty_doc,
static PyObject *
bytesio_isatty(bytesio *self)
{
- CHECK_CLOSED(self);
+ CHECK_CLOSED(self, NULL);
Py_RETURN_FALSE;
}
@@ -253,7 +370,7 @@ PyDoc_STRVAR(tell_doc,
static PyObject *
bytesio_tell(bytesio *self)
{
- CHECK_CLOSED(self);
+ CHECK_CLOSED(self, NULL);
return PyLong_FromSsize_t(self->pos);
}
@@ -270,7 +387,7 @@ bytesio_read(bytesio *self, PyObject *args)
char *output;
PyObject *arg = Py_None;
- CHECK_CLOSED(self);
+ CHECK_CLOSED(self, NULL);
if (!PyArg_ParseTuple(args, "|O:read", &arg))
return NULL;
@@ -339,7 +456,7 @@ bytesio_readline(bytesio *self, PyObject *args)
char *output;
PyObject *arg = Py_None;
- CHECK_CLOSED(self);
+ CHECK_CLOSED(self, NULL);
if (!PyArg_ParseTuple(args, "|O:readline", &arg))
return NULL;
@@ -359,14 +476,10 @@ bytesio_readline(bytesio *self, PyObject *args)
return NULL;
}
- n = get_line(self, &output);
-
- if (size >= 0 && size < n) {
- size = n - size;
- n -= size;
- self->pos -= size;
- }
+ n = scan_eol(self, size);
+ output = self->buf + self->pos;
+ self->pos += n;
return PyBytes_FromStringAndSize(output, n);
}
@@ -385,7 +498,7 @@ bytesio_readlines(bytesio *self, PyObject *args)
char *output;
PyObject *arg = Py_None;
- CHECK_CLOSED(self);
+ CHECK_CLOSED(self, NULL);
if (!PyArg_ParseTuple(args, "|O:readlines", &arg))
return NULL;
@@ -410,7 +523,9 @@ bytesio_readlines(bytesio *self, PyObject *args)
if (!result)
return NULL;
- while ((n = get_line(self, &output)) != 0) {
+ output = self->buf + self->pos;
+ while ((n = scan_eol(self, -1)) != 0) {
+ self->pos += n;
line = PyBytes_FromStringAndSize(output, n);
if (!line)
goto on_error;
@@ -422,6 +537,7 @@ bytesio_readlines(bytesio *self, PyObject *args)
size += n;
if (maxsize > 0 && size >= maxsize)
break;
+ output += n;
}
return result;
@@ -442,7 +558,7 @@ bytesio_readinto(bytesio *self, PyObject *buffer)
void *raw_buffer;
Py_ssize_t len, n;
- CHECK_CLOSED(self);
+ CHECK_CLOSED(self, NULL);
if (PyObject_AsWriteBuffer(buffer, &raw_buffer, &len) == -1)
return NULL;
@@ -475,7 +591,7 @@ bytesio_truncate(bytesio *self, PyObject *args)
Py_ssize_t size;
PyObject *arg = Py_None;
- CHECK_CLOSED(self);
+ CHECK_CLOSED(self, NULL);
CHECK_EXPORTS(self);
if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
@@ -502,6 +618,10 @@ bytesio_truncate(bytesio *self, PyObject *args)
return NULL;
}
+ if (unshare(self, size, 1) < 0) {
+ return NULL;
+ }
+
if (size < self->string_size) {
self->string_size = size;
if (resize_buffer(self, size) < 0)
@@ -514,16 +634,18 @@ bytesio_truncate(bytesio *self, PyObject *args)
static PyObject *
bytesio_iternext(bytesio *self)
{
- char *next;
+ const char *next;
Py_ssize_t n;
- CHECK_CLOSED(self);
+ CHECK_CLOSED(self, NULL);
- n = get_line(self, &next);
+ n = scan_eol(self, -1);
- if (!next || n == 0)
+ if (n == 0)
return NULL;
+ next = self->buf + self->pos;
+ self->pos += n;
return PyBytes_FromStringAndSize(next, n);
}
@@ -542,7 +664,7 @@ bytesio_seek(bytesio *self, PyObject *args)
Py_ssize_t pos;
int mode = 0;
- CHECK_CLOSED(self);
+ CHECK_CLOSED(self, NULL);
if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
return NULL;
@@ -597,7 +719,7 @@ bytesio_write(bytesio *self, PyObject *obj)
Py_buffer buf;
PyObject *result = NULL;
- CHECK_CLOSED(self);
+ CHECK_CLOSED(self, NULL);
CHECK_EXPORTS(self);
if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0)
@@ -625,7 +747,7 @@ bytesio_writelines(bytesio *self, PyObject *v)
PyObject *it, *item;
PyObject *ret;
- CHECK_CLOSED(self);
+ CHECK_CLOSED(self, NULL);
it = PyObject_GetIter(v);
if (it == NULL)
@@ -655,10 +777,7 @@ PyDoc_STRVAR(close_doc,
static PyObject *
bytesio_close(bytesio *self)
{
- if (self->buf != NULL) {
- PyMem_Free(self->buf);
- self->buf = NULL;
- }
+ reset(self);
Py_RETURN_NONE;
}
@@ -706,11 +825,11 @@ bytesio_getstate(bytesio *self)
static PyObject *
bytesio_setstate(bytesio *self, PyObject *state)
{
- PyObject *result;
PyObject *position_obj;
PyObject *dict;
Py_ssize_t pos;
+ CHECK_EXPORTS(self);
assert(state != NULL);
/* We allow the state tuple to be longer than 3, because we may need
@@ -722,18 +841,13 @@ bytesio_setstate(bytesio *self, PyObject *state)
Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
return NULL;
}
- CHECK_EXPORTS(self);
- /* Reset the object to its default state. This is only needed to handle
- the case of repeated calls to __setstate__. */
- self->string_size = 0;
- self->pos = 0;
- /* Set the value of the internal buffer. If state[0] does not support the
- buffer protocol, bytesio_write will raise the appropriate TypeError. */
- result = bytesio_write(self, PyTuple_GET_ITEM(state, 0));
- if (result == NULL)
+ /* Reset the object to its default state and set the value of the internal
+ * buffer. If state[0] does not support the buffer protocol, reinit() will
+ * raise the appropriate TypeError. */
+ if (reinit(self, PyTuple_GET_ITEM(state, 0)) < 0) {
return NULL;
- Py_DECREF(result);
+ }
/* Set carefully the position value. Alternatively, we could use the seek
method instead of modifying self->pos directly to better protect the
@@ -788,10 +902,9 @@ bytesio_dealloc(bytesio *self)
"deallocated BytesIO object has exported buffers");
PyErr_Print();
}
- if (self->buf != NULL) {
- PyMem_Free(self->buf);
- self->buf = NULL;
- }
+
+ reset(self);
+
Py_CLEAR(self->dict);
if (self->weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject *) self);
@@ -830,20 +943,7 @@ bytesio_init(bytesio *self, PyObject *args, PyObject *kwds)
&initvalue))
return -1;
- /* In case, __init__ is called multiple times. */
- self->string_size = 0;
- self->pos = 0;
-
- if (initvalue && initvalue != Py_None) {
- PyObject *res;
- res = bytesio_write(self, initvalue);
- if (res == NULL)
- return -1;
- Py_DECREF(res);
- self->pos = 0;
- }
-
- return 0;
+ return reinit(self, initvalue);
}
static PyObject *