aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Modules/_zstd/decompressor.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_zstd/decompressor.c')
-rw-r--r--Modules/_zstd/decompressor.c308
1 files changed, 83 insertions, 225 deletions
diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c
index 2ed88cd3f23..58f9c9f804e 100644
--- a/Modules/_zstd/decompressor.c
+++ b/Modules/_zstd/decompressor.c
@@ -1,7 +1,4 @@
-/*
-Low level interface to Meta's zstd library for use in the compression.zstd
-Python module.
-*/
+/* Low level interface to the Zstandard algorthm & the zstd library. */
/* ZstdDecompressor class definition */
@@ -43,23 +40,11 @@ typedef struct {
PyObject *unused_data;
/* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */
- char needs_input;
-
- /* For decompress(), 0 or 1.
- 1 when both input and output streams are at a frame edge, means a
- frame is completely decoded and fully flushed, or the decompressor
- just be initialized. */
- char at_frame_edge;
+ bool needs_input;
/* For ZstdDecompressor, 0 or 1.
1 means the end of the first frame has been reached. */
- char eof;
-
- /* Used for fast reset above three variables */
- char _unused_char_for_align;
-
- /* __init__ has been called, 0 or 1. */
- bool initialized;
+ bool eof;
} ZstdDecompressor;
#define ZstdDecompressor_CAST(op) ((ZstdDecompressor *)op)
@@ -90,8 +75,8 @@ _get_DDict(ZstdDict *self)
_zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
if (mod_state != NULL) {
PyErr_SetString(mod_state->ZstdError,
- "Failed to create ZSTD_DDict instance from zstd "
- "dictionary content. Maybe the content is corrupted.");
+ "Failed to create a ZSTD_DDict instance from "
+ "Zstandard dictionary content.");
}
}
}
@@ -258,19 +243,13 @@ load:
return 0;
}
-typedef enum {
- TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class
- TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function
-} decompress_type;
-
/*
- Given the two types of decompressors (defined above),
- decompress implementation for <D>, <E>, pseudo code:
+ Decompress implementation in pseudo code:
initialize_output_buffer
while True:
decompress_data
- set_object_flag # .eof for <D>, .at_frame_edge for <E>.
+ set_object_flag # .eof
if output_buffer_exhausted:
if output_buffer_reached_max_length:
@@ -280,70 +259,26 @@ typedef enum {
finish
ZSTD_decompressStream()'s size_t return value:
- - 0 when a frame is completely decoded and fully flushed, zstd's internal
- buffer has no data.
+ - 0 when a frame is completely decoded and fully flushed,
+ zstd's internal buffer has no data.
- An error code, which can be tested using ZSTD_isError().
- Or any other value > 0, which means there is still some decoding or
flushing to do to complete current frame.
Note, decompressing "an empty input" in any case will make it > 0.
-
- <E> supports multiple frames, has an .at_frame_edge flag, it means both the
- input and output streams are at a frame edge. The flag can be set by this
- statement:
-
- .at_frame_edge = (zstd_ret == 0) ? 1 : 0
-
- But if decompressing "an empty input" at "a frame edge", zstd_ret will be
- non-zero, then .at_frame_edge will be wrongly set to false. To solve this
- problem, two AFE checks are needed to ensure that: when at "a frame edge",
- empty input will not be decompressed.
-
- // AFE check
- if (self->at_frame_edge && in->pos == in->size) {
- finish
- }
-
- In <E>, if .at_frame_edge is eventually set to true, but input stream has
- unconsumed data (in->pos < in->size), then the outer function
- stream_decompress() will set .at_frame_edge to false. In this case,
- although the output stream is at a frame edge, for the caller, the input
- stream is not at a frame edge, see below diagram. This behavior does not
- affect the next AFE check, since (in->pos < in->size).
-
- input stream: --------------|---
- ^
- output stream: ====================|
- ^
*/
static PyObject *
decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
- Py_ssize_t max_length,
- Py_ssize_t initial_size,
- decompress_type type)
+ Py_ssize_t max_length)
{
size_t zstd_ret;
ZSTD_outBuffer out;
_BlocksOutputBuffer buffer = {.list = NULL};
PyObject *ret;
- /* The first AFE check for setting .at_frame_edge flag */
- if (type == TYPE_ENDLESS_DECOMPRESSOR) {
- if (self->at_frame_edge && in->pos == in->size) {
- return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
- }
- }
-
/* Initialize the output buffer */
- if (initial_size >= 0) {
- if (_OutputBuffer_InitWithSize(&buffer, &out, max_length, initial_size) < 0) {
- goto error;
- }
- }
- else {
- if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) {
- goto error;
- }
+ if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) {
+ goto error;
}
assert(out.pos == 0);
@@ -362,26 +297,15 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
goto error;
}
- /* Set .eof/.af_frame_edge flag */
- if (type == TYPE_DECOMPRESSOR) {
- /* ZstdDecompressor class stops when a frame is decompressed */
- if (zstd_ret == 0) {
- self->eof = 1;
- break;
- }
- }
- else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
- /* decompress() function supports multiple frames */
- self->at_frame_edge = (zstd_ret == 0) ? 1 : 0;
-
- /* The second AFE check for setting .at_frame_edge flag */
- if (self->at_frame_edge && in->pos == in->size) {
- break;
- }
+ /* Set .eof flag */
+ if (zstd_ret == 0) {
+ /* Stop when a frame is decompressed */
+ self->eof = 1;
+ break;
}
/* Need to check out before in. Maybe zstd's internal buffer still has
- a few bytes can be output, grow the buffer and continue. */
+ a few bytes that can be output, grow the buffer and continue. */
if (out.pos == out.size) {
/* Output buffer exhausted */
@@ -415,8 +339,7 @@ error:
}
static void
-decompressor_reset_session(ZstdDecompressor *self,
- decompress_type type)
+decompressor_reset_session(ZstdDecompressor *self)
{
// TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here
// and ensure lock is always held
@@ -425,56 +348,28 @@ decompressor_reset_session(ZstdDecompressor *self,
self->in_begin = 0;
self->in_end = 0;
- if (type == TYPE_DECOMPRESSOR) {
- Py_CLEAR(self->unused_data);
- }
+ Py_CLEAR(self->unused_data);
/* Reset variables in one operation */
self->needs_input = 1;
- self->at_frame_edge = 1;
self->eof = 0;
- self->_unused_char_for_align = 0;
- /* Resetting session never fail */
+ /* Resetting session is guaranteed to never fail */
ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only);
}
static PyObject *
-stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length,
- decompress_type type)
+stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length)
{
- Py_ssize_t initial_buffer_size = -1;
ZSTD_inBuffer in;
PyObject *ret = NULL;
int use_input_buffer;
- if (type == TYPE_DECOMPRESSOR) {
- /* Check .eof flag */
- if (self->eof) {
- PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame.");
- assert(ret == NULL);
- goto success;
- }
- }
- else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
- /* Fast path for the first frame */
- if (self->at_frame_edge && self->in_begin == self->in_end) {
- /* Read decompressed size */
- uint64_t decompressed_size = ZSTD_getFrameContentSize(data->buf, data->len);
-
- /* These two zstd constants always > PY_SSIZE_T_MAX:
- ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1)
- ZSTD_CONTENTSIZE_ERROR is (0ULL - 2)
-
- Use ZSTD_findFrameCompressedSize() to check complete frame,
- prevent allocating too much memory for small input chunk. */
-
- if (decompressed_size <= (uint64_t) PY_SSIZE_T_MAX &&
- !ZSTD_isError(ZSTD_findFrameCompressedSize(data->buf, data->len)) )
- {
- initial_buffer_size = (Py_ssize_t) decompressed_size;
- }
- }
+ /* Check .eof flag */
+ if (self->eof) {
+ PyErr_SetString(PyExc_EOFError, "Already at the end of a Zstandard frame.");
+ assert(ret == NULL);
+ return NULL;
}
/* Prepare input buffer w/wo unconsumed data */
@@ -561,30 +456,18 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
assert(in.pos == 0);
/* Decompress */
- ret = decompress_impl(self, &in,
- max_length, initial_buffer_size,
- type);
+ ret = decompress_impl(self, &in, max_length);
if (ret == NULL) {
goto error;
}
/* Unconsumed input data */
if (in.pos == in.size) {
- if (type == TYPE_DECOMPRESSOR) {
- if (Py_SIZE(ret) == max_length || self->eof) {
- self->needs_input = 0;
- }
- else {
- self->needs_input = 1;
- }
+ if (Py_SIZE(ret) == max_length || self->eof) {
+ self->needs_input = 0;
}
- else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
- if (Py_SIZE(ret) == max_length && !self->at_frame_edge) {
- self->needs_input = 0;
- }
- else {
- self->needs_input = 1;
- }
+ else {
+ self->needs_input = 1;
}
if (use_input_buffer) {
@@ -598,10 +481,6 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
self->needs_input = 0;
- if (type == TYPE_ENDLESS_DECOMPRESSOR) {
- self->at_frame_edge = 0;
- }
-
if (!use_input_buffer) {
/* Discard buffer if it's too small
(resizing it may needlessly copy the current contents) */
@@ -634,43 +513,52 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
}
}
- goto success;
+ return ret;
error:
/* Reset decompressor's states/session */
- decompressor_reset_session(self, type);
+ decompressor_reset_session(self);
Py_CLEAR(ret);
-success:
-
- return ret;
+ return NULL;
}
+/*[clinic input]
+@classmethod
+_zstd.ZstdDecompressor.__new__ as _zstd_ZstdDecompressor_new
+ zstd_dict: object = None
+ A ZstdDict object, a pre-trained Zstandard dictionary.
+ options: object = None
+ A dict object that contains advanced decompression parameters.
+
+Create a decompressor object for decompressing data incrementally.
+
+Thread-safe at method level. For one-shot decompression, use the decompress()
+function instead.
+[clinic start generated code]*/
+
static PyObject *
-_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+_zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict,
+ PyObject *options)
+/*[clinic end generated code: output=590ca65c1102ff4a input=213daa57e3ea4062]*/
{
- ZstdDecompressor *self;
- self = PyObject_GC_New(ZstdDecompressor, type);
+ ZstdDecompressor* self = PyObject_GC_New(ZstdDecompressor, type);
if (self == NULL) {
goto error;
}
- self->initialized = 0;
- self->dict = NULL;
self->input_buffer = NULL;
self->input_buffer_size = 0;
self->in_begin = -1;
self->in_end = -1;
self->unused_data = NULL;
self->eof = 0;
+ self->dict = NULL;
/* needs_input flag */
self->needs_input = 1;
- /* at_frame_edge flag */
- self->at_frame_edge = 1;
-
/* Decompression context */
self->dctx = ZSTD_createDCtx();
if (self->dctx == NULL) {
@@ -682,12 +570,29 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
goto error;
}
+ /* Load Zstandard dictionary to decompression context */
+ if (zstd_dict != Py_None) {
+ if (_zstd_load_d_dict(self, zstd_dict) < 0) {
+ goto error;
+ }
+ Py_INCREF(zstd_dict);
+ self->dict = zstd_dict;
+ }
+
+ /* Set option to decompression context */
+ if (options != Py_None) {
+ if (_zstd_set_d_parameters(self, options) < 0) {
+ goto error;
+ }
+ }
+
+ // We can only start GC tracking once self->dict is set.
+ PyObject_GC_Track(self);
+
return (PyObject*)self;
error:
- if (self != NULL) {
- PyObject_GC_Del(self);
- }
+ Py_XDECREF(self);
return NULL;
}
@@ -699,7 +604,9 @@ ZstdDecompressor_dealloc(PyObject *ob)
PyObject_GC_UnTrack(self);
/* Free decompression context */
- ZSTD_freeDCtx(self->dctx);
+ if (self->dctx) {
+ ZSTD_freeDCtx(self->dctx);
+ }
/* Py_CLEAR the dict after free decompression context */
Py_CLEAR(self->dict);
@@ -716,55 +623,6 @@ ZstdDecompressor_dealloc(PyObject *ob)
}
/*[clinic input]
-_zstd.ZstdDecompressor.__init__
-
- zstd_dict: object = None
- A ZstdDict object, a pre-trained zstd dictionary.
- options: object = None
- A dict object that contains advanced decompression parameters.
-
-Create a decompressor object for decompressing data incrementally.
-
-Thread-safe at method level. For one-shot decompression, use the decompress()
-function instead.
-[clinic start generated code]*/
-
-static int
-_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self,
- PyObject *zstd_dict, PyObject *options)
-/*[clinic end generated code: output=703af2f1ec226642 input=8fd72999acc1a146]*/
-{
- /* Only called once */
- if (self->initialized) {
- PyErr_SetString(PyExc_RuntimeError, "reinitialization not supported");
- return -1;
- }
- self->initialized = 1;
-
- /* Load dictionary to decompression context */
- if (zstd_dict != Py_None) {
- if (_zstd_load_d_dict(self, zstd_dict) < 0) {
- return -1;
- }
-
- /* Py_INCREF the dict */
- Py_INCREF(zstd_dict);
- self->dict = zstd_dict;
- }
-
- /* Set option to decompression context */
- if (options != Py_None) {
- if (_zstd_set_d_parameters(self, options) < 0) {
- return -1;
- }
- }
-
- // We can only start tracking self with the GC once self->dict is set.
- PyObject_GC_Track(self);
- return 0;
-}
-
-/*[clinic input]
@critical_section
@getter
_zstd.ZstdDecompressor.unused_data
@@ -805,7 +663,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self)
_zstd.ZstdDecompressor.decompress
data: Py_buffer
- A bytes-like object, zstd data to be decompressed.
+ A bytes-like object, Zstandard data to be decompressed.
max_length: Py_ssize_t = -1
Maximum size of returned data. When it is negative, the size of
output buffer is unlimited. When it is nonnegative, returns at
@@ -831,13 +689,13 @@ static PyObject *
_zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self,
Py_buffer *data,
Py_ssize_t max_length)
-/*[clinic end generated code: output=a4302b3c940dbec6 input=830e455bc9a50b6e]*/
+/*[clinic end generated code: output=a4302b3c940dbec6 input=6463dfdf98091caa]*/
{
PyObject *ret;
/* Thread-safe code */
Py_BEGIN_CRITICAL_SECTION(self);
- ret = stream_decompress(self, data, max_length, TYPE_DECOMPRESSOR);
+ ret = stream_decompress(self, data, max_length);
Py_END_CRITICAL_SECTION();
return ret;
}
@@ -889,11 +747,10 @@ ZstdDecompressor_clear(PyObject *ob)
static PyType_Slot ZstdDecompressor_slots[] = {
{Py_tp_new, _zstd_ZstdDecompressor_new},
{Py_tp_dealloc, ZstdDecompressor_dealloc},
- {Py_tp_init, _zstd_ZstdDecompressor___init__},
{Py_tp_methods, ZstdDecompressor_methods},
{Py_tp_members, ZstdDecompressor_members},
{Py_tp_getset, ZstdDecompressor_getset},
- {Py_tp_doc, (char*)_zstd_ZstdDecompressor___init____doc__},
+ {Py_tp_doc, (void *)_zstd_ZstdDecompressor_new__doc__},
{Py_tp_traverse, ZstdDecompressor_traverse},
{Py_tp_clear, ZstdDecompressor_clear},
{0, 0}
@@ -902,6 +759,7 @@ static PyType_Slot ZstdDecompressor_slots[] = {
PyType_Spec zstd_decompressor_type_spec = {
.name = "compression.zstd.ZstdDecompressor",
.basicsize = sizeof(ZstdDecompressor),
- .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
+ .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE
+ | Py_TPFLAGS_HAVE_GC,
.slots = ZstdDecompressor_slots,
};