aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Modules/_zstd/decompressor.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_zstd/decompressor.c')
-rw-r--r--Modules/_zstd/decompressor.c332
1 files changed, 154 insertions, 178 deletions
diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c
index dc3e90a22d8..26e568cf433 100644
--- a/Modules/_zstd/decompressor.c
+++ b/Modules/_zstd/decompressor.c
@@ -1,7 +1,4 @@
-/*
-Low level interface to Meta's zstd library for use in the compression.zstd
-Python module.
-*/
+/* Low level interface to the Zstandard algorthm & the zstd library. */
/* ZstdDecompressor class definition */
@@ -20,6 +17,7 @@ class _zstd.ZstdDecompressor "ZstdDecompressor *" "&zstd_decompressor_type_spec"
#include "_zstdmodule.h"
#include "buffer.h"
#include "zstddict.h"
+#include "internal/pycore_lock.h" // PyMutex_IsLocked
#include <stdbool.h> // bool
#include <stddef.h> // offsetof()
@@ -49,8 +47,8 @@ typedef struct {
1 means the end of the first frame has been reached. */
bool eof;
- /* __init__ has been called, 0 or 1. */
- bool initialized;
+ /* Lock to protect the decompression context */
+ PyMutex lock;
} ZstdDecompressor;
#define ZstdDecompressor_CAST(op) ((ZstdDecompressor *)op)
@@ -60,6 +58,7 @@ typedef struct {
static inline ZSTD_DDict *
_get_DDict(ZstdDict *self)
{
+ assert(PyMutex_IsLocked(&self->lock));
ZSTD_DDict *ret;
/* Already created */
@@ -67,98 +66,122 @@ _get_DDict(ZstdDict *self)
return self->d_dict;
}
- Py_BEGIN_CRITICAL_SECTION(self);
if (self->d_dict == NULL) {
/* Create ZSTD_DDict instance from dictionary content */
- char *dict_buffer = PyBytes_AS_STRING(self->dict_content);
- Py_ssize_t dict_len = Py_SIZE(self->dict_content);
Py_BEGIN_ALLOW_THREADS
- self->d_dict = ZSTD_createDDict(dict_buffer,
- dict_len);
+ ret = ZSTD_createDDict(self->dict_buffer, self->dict_len);
Py_END_ALLOW_THREADS
+ self->d_dict = ret;
if (self->d_dict == NULL) {
- _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
if (mod_state != NULL) {
PyErr_SetString(mod_state->ZstdError,
- "Failed to create ZSTD_DDict instance from zstd "
- "dictionary content. Maybe the content is corrupted.");
+ "Failed to create a ZSTD_DDict instance from "
+ "Zstandard dictionary content.");
}
}
}
- /* Don't lose any exception */
- ret = self->d_dict;
- Py_END_CRITICAL_SECTION();
-
- return ret;
+ return self->d_dict;
}
-/* Set decompression parameters to decompression context */
static int
_zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options)
{
- size_t zstd_ret;
- PyObject *key, *value;
- Py_ssize_t pos;
- _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
if (mod_state == NULL) {
return -1;
}
if (!PyDict_Check(options)) {
- PyErr_SetString(PyExc_TypeError,
- "options argument should be dict object.");
+ PyErr_Format(PyExc_TypeError,
+ "ZstdDecompressor() argument 'options' must be dict, not %T",
+ options);
return -1;
}
- pos = 0;
+ Py_ssize_t pos = 0;
+ PyObject *key, *value;
while (PyDict_Next(options, &pos, &key, &value)) {
/* Check key type */
if (Py_TYPE(key) == mod_state->CParameter_type) {
PyErr_SetString(PyExc_TypeError,
- "Key of decompression options dict should "
- "NOT be CompressionParameter.");
+ "compression options dictionary key must not be a "
+ "CompressionParameter attribute");
return -1;
}
- /* Both key & value should be 32-bit signed int */
+ Py_INCREF(key);
+ Py_INCREF(value);
int key_v = PyLong_AsInt(key);
+ Py_DECREF(key);
if (key_v == -1 && PyErr_Occurred()) {
- PyErr_SetString(PyExc_ValueError,
- "Key of options dict should be a DecompressionParameter attribute.");
return -1;
}
- // TODO(emmatyping): check bounds when there is a value error here for better
- // error message?
int value_v = PyLong_AsInt(value);
+ Py_DECREF(value);
if (value_v == -1 && PyErr_Occurred()) {
- PyErr_SetString(PyExc_ValueError,
- "Value of options dict should be an int.");
return -1;
}
/* Set parameter to compression context */
- Py_BEGIN_CRITICAL_SECTION(self);
- zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v);
- Py_END_CRITICAL_SECTION();
+ size_t zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v);
/* Check error */
if (ZSTD_isError(zstd_ret)) {
- set_parameter_error(mod_state, 0, key_v, value_v);
+ set_parameter_error(0, key_v, value_v);
return -1;
}
}
return 0;
}
+static int
+_zstd_load_impl(ZstdDecompressor *self, ZstdDict *zd,
+ _zstd_state *mod_state, int type)
+{
+ size_t zstd_ret;
+ if (type == DICT_TYPE_DIGESTED) {
+ /* Get ZSTD_DDict */
+ ZSTD_DDict *d_dict = _get_DDict(zd);
+ if (d_dict == NULL) {
+ return -1;
+ }
+ /* Reference a prepared dictionary */
+ zstd_ret = ZSTD_DCtx_refDDict(self->dctx, d_dict);
+ }
+ else if (type == DICT_TYPE_UNDIGESTED) {
+ /* Load a dictionary */
+ zstd_ret = ZSTD_DCtx_loadDictionary(self->dctx, zd->dict_buffer,
+ zd->dict_len);
+ }
+ else if (type == DICT_TYPE_PREFIX) {
+ /* Load a prefix */
+ zstd_ret = ZSTD_DCtx_refPrefix(self->dctx, zd->dict_buffer,
+ zd->dict_len);
+ }
+ else {
+ /* Impossible code path */
+ PyErr_SetString(PyExc_SystemError,
+ "load_d_dict() impossible code path");
+ return -1;
+ }
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ set_zstd_error(mod_state, ERR_LOAD_D_DICT, zstd_ret);
+ return -1;
+ }
+ return 0;
+}
+
/* Load dictionary or prefix to decompression context */
static int
_zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict)
{
- size_t zstd_ret;
- _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
if (mod_state == NULL) {
return -1;
}
@@ -174,7 +197,10 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict)
/* When decompressing, use digested dictionary by default. */
zd = (ZstdDict*)dict;
type = DICT_TYPE_DIGESTED;
- goto load;
+ PyMutex_Lock(&zd->lock);
+ ret = _zstd_load_impl(self, zd, mod_state, type);
+ PyMutex_Unlock(&zd->lock);
+ return ret;
}
/* Check (ZstdDict, type) */
@@ -188,13 +214,16 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict)
else if (ret > 0) {
/* type == -1 may indicate an error. */
type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1));
- if (type == DICT_TYPE_DIGESTED ||
- type == DICT_TYPE_UNDIGESTED ||
- type == DICT_TYPE_PREFIX)
+ if (type == DICT_TYPE_DIGESTED
+ || type == DICT_TYPE_UNDIGESTED
+ || type == DICT_TYPE_PREFIX)
{
assert(type >= 0);
zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0);
- goto load;
+ PyMutex_Lock(&zd->lock);
+ ret = _zstd_load_impl(self, zd, mod_state, type);
+ PyMutex_Unlock(&zd->lock);
+ return ret;
}
}
}
@@ -203,50 +232,6 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict)
PyErr_SetString(PyExc_TypeError,
"zstd_dict argument should be ZstdDict object.");
return -1;
-
-load:
- if (type == DICT_TYPE_DIGESTED) {
- /* Get ZSTD_DDict */
- ZSTD_DDict *d_dict = _get_DDict(zd);
- if (d_dict == NULL) {
- return -1;
- }
- /* Reference a prepared dictionary */
- Py_BEGIN_CRITICAL_SECTION(self);
- zstd_ret = ZSTD_DCtx_refDDict(self->dctx, d_dict);
- Py_END_CRITICAL_SECTION();
- }
- else if (type == DICT_TYPE_UNDIGESTED) {
- /* Load a dictionary */
- Py_BEGIN_CRITICAL_SECTION2(self, zd);
- zstd_ret = ZSTD_DCtx_loadDictionary(
- self->dctx,
- PyBytes_AS_STRING(zd->dict_content),
- Py_SIZE(zd->dict_content));
- Py_END_CRITICAL_SECTION2();
- }
- else if (type == DICT_TYPE_PREFIX) {
- /* Load a prefix */
- Py_BEGIN_CRITICAL_SECTION2(self, zd);
- zstd_ret = ZSTD_DCtx_refPrefix(
- self->dctx,
- PyBytes_AS_STRING(zd->dict_content),
- Py_SIZE(zd->dict_content));
- Py_END_CRITICAL_SECTION2();
- }
- else {
- /* Impossible code path */
- PyErr_SetString(PyExc_SystemError,
- "load_d_dict() impossible code path");
- return -1;
- }
-
- /* Check error */
- if (ZSTD_isError(zstd_ret)) {
- set_zstd_error(mod_state, ERR_LOAD_D_DICT, zstd_ret);
- return -1;
- }
- return 0;
}
/*
@@ -265,8 +250,8 @@ load:
finish
ZSTD_decompressStream()'s size_t return value:
- - 0 when a frame is completely decoded and fully flushed, zstd's internal
- buffer has no data.
+ - 0 when a frame is completely decoded and fully flushed,
+ zstd's internal buffer has no data.
- An error code, which can be tested using ZSTD_isError().
- Or any other value > 0, which means there is still some decoding or
flushing to do to complete current frame.
@@ -274,8 +259,8 @@ load:
Note, decompressing "an empty input" in any case will make it > 0.
*/
static PyObject *
-decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
- Py_ssize_t max_length)
+decompress_lock_held(ZstdDecompressor *self, ZSTD_inBuffer *in,
+ Py_ssize_t max_length)
{
size_t zstd_ret;
ZSTD_outBuffer out;
@@ -296,7 +281,7 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
/* Check error */
if (ZSTD_isError(zstd_ret)) {
- _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
if (mod_state != NULL) {
set_zstd_error(mod_state, ERR_DECOMPRESS, zstd_ret);
}
@@ -311,7 +296,7 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
}
/* Need to check out before in. Maybe zstd's internal buffer still has
- a few bytes can be output, grow the buffer and continue. */
+ a few bytes that can be output, grow the buffer and continue. */
if (out.pos == out.size) {
/* Output buffer exhausted */
@@ -345,10 +330,9 @@ error:
}
static void
-decompressor_reset_session(ZstdDecompressor *self)
+decompressor_reset_session_lock_held(ZstdDecompressor *self)
{
- // TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here
- // and ensure lock is always held
+ assert(PyMutex_IsLocked(&self->lock));
/* Reset variables */
self->in_begin = 0;
@@ -365,15 +349,18 @@ decompressor_reset_session(ZstdDecompressor *self)
}
static PyObject *
-stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length)
+stream_decompress_lock_held(ZstdDecompressor *self, Py_buffer *data,
+ Py_ssize_t max_length)
{
+ assert(PyMutex_IsLocked(&self->lock));
ZSTD_inBuffer in;
PyObject *ret = NULL;
int use_input_buffer;
/* Check .eof flag */
if (self->eof) {
- PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame.");
+ PyErr_SetString(PyExc_EOFError,
+ "Already at the end of a Zstandard frame.");
assert(ret == NULL);
return NULL;
}
@@ -462,7 +449,7 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
assert(in.pos == 0);
/* Decompress */
- ret = decompress_impl(self, &in, max_length);
+ ret = decompress_lock_held(self, &in, max_length);
if (ret == NULL) {
goto error;
}
@@ -490,8 +477,8 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
if (!use_input_buffer) {
/* Discard buffer if it's too small
(resizing it may needlessly copy the current contents) */
- if (self->input_buffer != NULL &&
- self->input_buffer_size < data_size)
+ if (self->input_buffer != NULL
+ && self->input_buffer_size < data_size)
{
PyMem_Free(self->input_buffer);
self->input_buffer = NULL;
@@ -523,30 +510,45 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
error:
/* Reset decompressor's states/session */
- decompressor_reset_session(self);
+ decompressor_reset_session_lock_held(self);
Py_CLEAR(ret);
return NULL;
}
+/*[clinic input]
+@classmethod
+_zstd.ZstdDecompressor.__new__ as _zstd_ZstdDecompressor_new
+ zstd_dict: object = None
+ A ZstdDict object, a pre-trained Zstandard dictionary.
+ options: object = None
+ A dict object that contains advanced decompression parameters.
+
+Create a decompressor object for decompressing data incrementally.
+
+Thread-safe at method level. For one-shot decompression, use the decompress()
+function instead.
+[clinic start generated code]*/
+
static PyObject *
-_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+_zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict,
+ PyObject *options)
+/*[clinic end generated code: output=590ca65c1102ff4a input=213daa57e3ea4062]*/
{
- ZstdDecompressor *self;
- self = PyObject_GC_New(ZstdDecompressor, type);
+ ZstdDecompressor* self = PyObject_GC_New(ZstdDecompressor, type);
if (self == NULL) {
goto error;
}
- self->initialized = 0;
- self->dict = NULL;
self->input_buffer = NULL;
self->input_buffer_size = 0;
self->in_begin = -1;
self->in_end = -1;
self->unused_data = NULL;
self->eof = 0;
+ self->dict = NULL;
+ self->lock = (PyMutex){0};
/* needs_input flag */
self->needs_input = 1;
@@ -554,7 +556,7 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
/* Decompression context */
self->dctx = ZSTD_createDCtx();
if (self->dctx == NULL) {
- _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
if (mod_state != NULL) {
PyErr_SetString(mod_state->ZstdError,
"Unable to create ZSTD_DCtx instance.");
@@ -562,12 +564,29 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
goto error;
}
+ /* Load Zstandard dictionary to decompression context */
+ if (zstd_dict != Py_None) {
+ if (_zstd_load_d_dict(self, zstd_dict) < 0) {
+ goto error;
+ }
+ Py_INCREF(zstd_dict);
+ self->dict = zstd_dict;
+ }
+
+ /* Set options dictionary */
+ if (options != Py_None) {
+ if (_zstd_set_d_parameters(self, options) < 0) {
+ goto error;
+ }
+ }
+
+ // We can only start GC tracking once self->dict is set.
+ PyObject_GC_Track(self);
+
return (PyObject*)self;
error:
- if (self != NULL) {
- PyObject_GC_Del(self);
- }
+ Py_XDECREF(self);
return NULL;
}
@@ -579,7 +598,11 @@ ZstdDecompressor_dealloc(PyObject *ob)
PyObject_GC_UnTrack(self);
/* Free decompression context */
- ZSTD_freeDCtx(self->dctx);
+ if (self->dctx) {
+ ZSTD_freeDCtx(self->dctx);
+ }
+
+ assert(!PyMutex_IsLocked(&self->lock));
/* Py_CLEAR the dict after free decompression context */
Py_CLEAR(self->dict);
@@ -596,56 +619,6 @@ ZstdDecompressor_dealloc(PyObject *ob)
}
/*[clinic input]
-_zstd.ZstdDecompressor.__init__
-
- zstd_dict: object = None
- A ZstdDict object, a pre-trained zstd dictionary.
- options: object = None
- A dict object that contains advanced decompression parameters.
-
-Create a decompressor object for decompressing data incrementally.
-
-Thread-safe at method level. For one-shot decompression, use the decompress()
-function instead.
-[clinic start generated code]*/
-
-static int
-_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self,
- PyObject *zstd_dict, PyObject *options)
-/*[clinic end generated code: output=703af2f1ec226642 input=8fd72999acc1a146]*/
-{
- /* Only called once */
- if (self->initialized) {
- PyErr_SetString(PyExc_RuntimeError, "reinitialization not supported");
- return -1;
- }
- self->initialized = 1;
-
- /* Load dictionary to decompression context */
- if (zstd_dict != Py_None) {
- if (_zstd_load_d_dict(self, zstd_dict) < 0) {
- return -1;
- }
-
- /* Py_INCREF the dict */
- Py_INCREF(zstd_dict);
- self->dict = zstd_dict;
- }
-
- /* Set option to decompression context */
- if (options != Py_None) {
- if (_zstd_set_d_parameters(self, options) < 0) {
- return -1;
- }
- }
-
- // We can only start tracking self with the GC once self->dict is set.
- PyObject_GC_Track(self);
- return 0;
-}
-
-/*[clinic input]
-@critical_section
@getter
_zstd.ZstdDecompressor.unused_data
@@ -657,11 +630,14 @@ decompressed, unused input data after the frame. Otherwise this will be b''.
static PyObject *
_zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self)
-/*[clinic end generated code: output=f3a20940f11b6b09 input=5233800bef00df04]*/
+/*[clinic end generated code: output=f3a20940f11b6b09 input=54d41ecd681a3444]*/
{
PyObject *ret;
+ PyMutex_Lock(&self->lock);
+
if (!self->eof) {
+ PyMutex_Unlock(&self->lock);
return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
}
else {
@@ -678,6 +654,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self)
}
}
+ PyMutex_Unlock(&self->lock);
return ret;
}
@@ -685,7 +662,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self)
_zstd.ZstdDecompressor.decompress
data: Py_buffer
- A bytes-like object, zstd data to be decompressed.
+ A bytes-like object, Zstandard data to be decompressed.
max_length: Py_ssize_t = -1
Maximum size of returned data. When it is negative, the size of
output buffer is unlimited. When it is nonnegative, returns at
@@ -711,14 +688,13 @@ static PyObject *
_zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self,
Py_buffer *data,
Py_ssize_t max_length)
-/*[clinic end generated code: output=a4302b3c940dbec6 input=830e455bc9a50b6e]*/
+/*[clinic end generated code: output=a4302b3c940dbec6 input=6463dfdf98091caa]*/
{
PyObject *ret;
/* Thread-safe code */
- Py_BEGIN_CRITICAL_SECTION(self);
-
- ret = stream_decompress(self, data, max_length);
- Py_END_CRITICAL_SECTION();
+ PyMutex_Lock(&self->lock);
+ ret = stream_decompress_lock_held(self, data, max_length);
+ PyMutex_Unlock(&self->lock);
return ret;
}
@@ -732,9 +708,10 @@ PyDoc_STRVAR(ZstdDecompressor_eof_doc,
"after that, an EOFError exception will be raised.");
PyDoc_STRVAR(ZstdDecompressor_needs_input_doc,
-"If the max_length output limit in .decompress() method has been reached, and\n"
-"the decompressor has (or may has) unconsumed input data, it will be set to\n"
-"False. In this case, pass b'' to .decompress() method may output further data.");
+"If the max_length output limit in .decompress() method has been reached,\n"
+"and the decompressor has (or may has) unconsumed input data, it will be set\n"
+"to False. In this case, passing b'' to the .decompress() method may output\n"
+"further data.");
static PyMemberDef ZstdDecompressor_members[] = {
{"eof", Py_T_BOOL, offsetof(ZstdDecompressor, eof),
@@ -769,11 +746,10 @@ ZstdDecompressor_clear(PyObject *ob)
static PyType_Slot ZstdDecompressor_slots[] = {
{Py_tp_new, _zstd_ZstdDecompressor_new},
{Py_tp_dealloc, ZstdDecompressor_dealloc},
- {Py_tp_init, _zstd_ZstdDecompressor___init__},
{Py_tp_methods, ZstdDecompressor_methods},
{Py_tp_members, ZstdDecompressor_members},
{Py_tp_getset, ZstdDecompressor_getset},
- {Py_tp_doc, (char*)_zstd_ZstdDecompressor___init____doc__},
+ {Py_tp_doc, (void *)_zstd_ZstdDecompressor_new__doc__},
{Py_tp_traverse, ZstdDecompressor_traverse},
{Py_tp_clear, ZstdDecompressor_clear},
{0, 0}