aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Modules/_zstd/decompressor.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_zstd/decompressor.c')
-rw-r--r--Modules/_zstd/decompressor.c376
1 files changed, 125 insertions, 251 deletions
diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c
index 4ac28d9c987..58f9c9f804e 100644
--- a/Modules/_zstd/decompressor.c
+++ b/Modules/_zstd/decompressor.c
@@ -1,28 +1,56 @@
-/*
-Low level interface to Meta's zstd library for use in the compression.zstd
-Python module.
-*/
+/* Low level interface to the Zstandard algorthm & the zstd library. */
/* ZstdDecompressor class definition */
/*[clinic input]
module _zstd
-class _zstd.ZstdDecompressor "ZstdDecompressor *" "clinic_state()->ZstdDecompressor_type"
+class _zstd.ZstdDecompressor "ZstdDecompressor *" "&zstd_decompressor_type_spec"
[clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4e6eae327c0c0c76]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e2969ddf48a203e0]*/
#ifndef Py_BUILD_CORE_BUILTIN
# define Py_BUILD_CORE_MODULE 1
#endif
-#include "_zstdmodule.h"
+#include "Python.h"
+#include "_zstdmodule.h"
#include "buffer.h"
+#include "zstddict.h"
+#include <stdbool.h> // bool
#include <stddef.h> // offsetof()
+#include <zstd.h> // ZSTD_*()
+
+typedef struct {
+ PyObject_HEAD
+
+ /* Decompression context */
+ ZSTD_DCtx *dctx;
+
+ /* ZstdDict object in use */
+ PyObject *dict;
+
+ /* Unconsumed input data */
+ char *input_buffer;
+ size_t input_buffer_size;
+ size_t in_begin, in_end;
+
+ /* Unused data */
+ PyObject *unused_data;
+
+ /* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */
+ bool needs_input;
+
+ /* For ZstdDecompressor, 0 or 1.
+ 1 means the end of the first frame has been reached. */
+ bool eof;
+} ZstdDecompressor;
#define ZstdDecompressor_CAST(op) ((ZstdDecompressor *)op)
+#include "clinic/decompressor.c.h"
+
static inline ZSTD_DDict *
_get_DDict(ZstdDict *self)
{
@@ -47,8 +75,8 @@ _get_DDict(ZstdDict *self)
_zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
if (mod_state != NULL) {
PyErr_SetString(mod_state->ZstdError,
- "Failed to create ZSTD_DDict instance from zstd "
- "dictionary content. Maybe the content is corrupted.");
+ "Failed to create a ZSTD_DDict instance from "
+ "Zstandard dictionary content.");
}
}
}
@@ -215,22 +243,13 @@ load:
return 0;
}
-
-
/*
- Given the two types of decompressors (defined in _zstdmodule.h):
-
- typedef enum {
- TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class
- TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function
- } decompress_type;
-
- Decompress implementation for <D>, <E>, pseudo code:
+ Decompress implementation in pseudo code:
initialize_output_buffer
while True:
decompress_data
- set_object_flag # .eof for <D>, .at_frame_edge for <E>.
+ set_object_flag # .eof
if output_buffer_exhausted:
if output_buffer_reached_max_length:
@@ -240,76 +259,26 @@ load:
finish
ZSTD_decompressStream()'s size_t return value:
- - 0 when a frame is completely decoded and fully flushed, zstd's internal
- buffer has no data.
+ - 0 when a frame is completely decoded and fully flushed,
+ zstd's internal buffer has no data.
- An error code, which can be tested using ZSTD_isError().
- Or any other value > 0, which means there is still some decoding or
flushing to do to complete current frame.
Note, decompressing "an empty input" in any case will make it > 0.
-
- <E> supports multiple frames, has an .at_frame_edge flag, it means both the
- input and output streams are at a frame edge. The flag can be set by this
- statement:
-
- .at_frame_edge = (zstd_ret == 0) ? 1 : 0
-
- But if decompressing "an empty input" at "a frame edge", zstd_ret will be
- non-zero, then .at_frame_edge will be wrongly set to false. To solve this
- problem, two AFE checks are needed to ensure that: when at "a frame edge",
- empty input will not be decompressed.
-
- // AFE check
- if (self->at_frame_edge && in->pos == in->size) {
- finish
- }
-
- In <E>, if .at_frame_edge is eventually set to true, but input stream has
- unconsumed data (in->pos < in->size), then the outer function
- stream_decompress() will set .at_frame_edge to false. In this case,
- although the output stream is at a frame edge, for the caller, the input
- stream is not at a frame edge, see below diagram. This behavior does not
- affect the next AFE check, since (in->pos < in->size).
-
- input stream: --------------|---
- ^
- output stream: ====================|
- ^
*/
-PyObject *
+static PyObject *
decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
- Py_ssize_t max_length,
- Py_ssize_t initial_size,
- decompress_type type)
+ Py_ssize_t max_length)
{
size_t zstd_ret;
ZSTD_outBuffer out;
_BlocksOutputBuffer buffer = {.list = NULL};
PyObject *ret;
- /* The first AFE check for setting .at_frame_edge flag */
- if (type == TYPE_ENDLESS_DECOMPRESSOR) {
- if (self->at_frame_edge && in->pos == in->size) {
- _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
- if (mod_state == NULL) {
- return NULL;
- }
- ret = mod_state->empty_bytes;
- Py_INCREF(ret);
- return ret;
- }
- }
-
/* Initialize the output buffer */
- if (initial_size >= 0) {
- if (_OutputBuffer_InitWithSize(&buffer, &out, max_length, initial_size) < 0) {
- goto error;
- }
- }
- else {
- if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) {
- goto error;
- }
+ if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) {
+ goto error;
}
assert(out.pos == 0);
@@ -328,26 +297,15 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
goto error;
}
- /* Set .eof/.af_frame_edge flag */
- if (type == TYPE_DECOMPRESSOR) {
- /* ZstdDecompressor class stops when a frame is decompressed */
- if (zstd_ret == 0) {
- self->eof = 1;
- break;
- }
- }
- else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
- /* decompress() function supports multiple frames */
- self->at_frame_edge = (zstd_ret == 0) ? 1 : 0;
-
- /* The second AFE check for setting .at_frame_edge flag */
- if (self->at_frame_edge && in->pos == in->size) {
- break;
- }
+ /* Set .eof flag */
+ if (zstd_ret == 0) {
+ /* Stop when a frame is decompressed */
+ self->eof = 1;
+ break;
}
/* Need to check out before in. Maybe zstd's internal buffer still has
- a few bytes can be output, grow the buffer and continue. */
+ a few bytes that can be output, grow the buffer and continue. */
if (out.pos == out.size) {
/* Output buffer exhausted */
@@ -380,9 +338,8 @@ error:
return NULL;
}
-void
-decompressor_reset_session(ZstdDecompressor *self,
- decompress_type type)
+static void
+decompressor_reset_session(ZstdDecompressor *self)
{
// TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here
// and ensure lock is always held
@@ -391,56 +348,28 @@ decompressor_reset_session(ZstdDecompressor *self,
self->in_begin = 0;
self->in_end = 0;
- if (type == TYPE_DECOMPRESSOR) {
- Py_CLEAR(self->unused_data);
- }
+ Py_CLEAR(self->unused_data);
/* Reset variables in one operation */
self->needs_input = 1;
- self->at_frame_edge = 1;
self->eof = 0;
- self->_unused_char_for_align = 0;
- /* Resetting session never fail */
+ /* Resetting session is guaranteed to never fail */
ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only);
}
-PyObject *
-stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length,
- decompress_type type)
+static PyObject *
+stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length)
{
- Py_ssize_t initial_buffer_size = -1;
ZSTD_inBuffer in;
PyObject *ret = NULL;
int use_input_buffer;
- if (type == TYPE_DECOMPRESSOR) {
- /* Check .eof flag */
- if (self->eof) {
- PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame.");
- assert(ret == NULL);
- goto success;
- }
- }
- else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
- /* Fast path for the first frame */
- if (self->at_frame_edge && self->in_begin == self->in_end) {
- /* Read decompressed size */
- uint64_t decompressed_size = ZSTD_getFrameContentSize(data->buf, data->len);
-
- /* These two zstd constants always > PY_SSIZE_T_MAX:
- ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1)
- ZSTD_CONTENTSIZE_ERROR is (0ULL - 2)
-
- Use ZSTD_findFrameCompressedSize() to check complete frame,
- prevent allocating too much memory for small input chunk. */
-
- if (decompressed_size <= (uint64_t) PY_SSIZE_T_MAX &&
- !ZSTD_isError(ZSTD_findFrameCompressedSize(data->buf, data->len)) )
- {
- initial_buffer_size = (Py_ssize_t) decompressed_size;
- }
- }
+ /* Check .eof flag */
+ if (self->eof) {
+ PyErr_SetString(PyExc_EOFError, "Already at the end of a Zstandard frame.");
+ assert(ret == NULL);
+ return NULL;
}
/* Prepare input buffer w/wo unconsumed data */
@@ -527,30 +456,18 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
assert(in.pos == 0);
/* Decompress */
- ret = decompress_impl(self, &in,
- max_length, initial_buffer_size,
- type);
+ ret = decompress_impl(self, &in, max_length);
if (ret == NULL) {
goto error;
}
/* Unconsumed input data */
if (in.pos == in.size) {
- if (type == TYPE_DECOMPRESSOR) {
- if (Py_SIZE(ret) == max_length || self->eof) {
- self->needs_input = 0;
- }
- else {
- self->needs_input = 1;
- }
+ if (Py_SIZE(ret) == max_length || self->eof) {
+ self->needs_input = 0;
}
- else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
- if (Py_SIZE(ret) == max_length && !self->at_frame_edge) {
- self->needs_input = 0;
- }
- else {
- self->needs_input = 1;
- }
+ else {
+ self->needs_input = 1;
}
if (use_input_buffer) {
@@ -564,10 +481,6 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
self->needs_input = 0;
- if (type == TYPE_ENDLESS_DECOMPRESSOR) {
- self->at_frame_edge = 0;
- }
-
if (!use_input_buffer) {
/* Discard buffer if it's too small
(resizing it may needlessly copy the current contents) */
@@ -600,43 +513,52 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
}
}
- goto success;
+ return ret;
error:
/* Reset decompressor's states/session */
- decompressor_reset_session(self, type);
+ decompressor_reset_session(self);
Py_CLEAR(ret);
-success:
-
- return ret;
+ return NULL;
}
+/*[clinic input]
+@classmethod
+_zstd.ZstdDecompressor.__new__ as _zstd_ZstdDecompressor_new
+ zstd_dict: object = None
+ A ZstdDict object, a pre-trained Zstandard dictionary.
+ options: object = None
+ A dict object that contains advanced decompression parameters.
+
+Create a decompressor object for decompressing data incrementally.
+
+Thread-safe at method level. For one-shot decompression, use the decompress()
+function instead.
+[clinic start generated code]*/
+
static PyObject *
-_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+_zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict,
+ PyObject *options)
+/*[clinic end generated code: output=590ca65c1102ff4a input=213daa57e3ea4062]*/
{
- ZstdDecompressor *self;
- self = PyObject_GC_New(ZstdDecompressor, type);
+ ZstdDecompressor* self = PyObject_GC_New(ZstdDecompressor, type);
if (self == NULL) {
goto error;
}
- self->inited = 0;
- self->dict = NULL;
self->input_buffer = NULL;
self->input_buffer_size = 0;
self->in_begin = -1;
self->in_end = -1;
self->unused_data = NULL;
self->eof = 0;
+ self->dict = NULL;
/* needs_input flag */
self->needs_input = 1;
- /* at_frame_edge flag */
- self->at_frame_edge = 1;
-
/* Decompression context */
self->dctx = ZSTD_createDCtx();
if (self->dctx == NULL) {
@@ -648,12 +570,29 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
goto error;
}
+ /* Load Zstandard dictionary to decompression context */
+ if (zstd_dict != Py_None) {
+ if (_zstd_load_d_dict(self, zstd_dict) < 0) {
+ goto error;
+ }
+ Py_INCREF(zstd_dict);
+ self->dict = zstd_dict;
+ }
+
+ /* Set option to decompression context */
+ if (options != Py_None) {
+ if (_zstd_set_d_parameters(self, options) < 0) {
+ goto error;
+ }
+ }
+
+ // We can only start GC tracking once self->dict is set.
+ PyObject_GC_Track(self);
+
return (PyObject*)self;
error:
- if (self != NULL) {
- PyObject_GC_Del(self);
- }
+ Py_XDECREF(self);
return NULL;
}
@@ -665,7 +604,9 @@ ZstdDecompressor_dealloc(PyObject *ob)
PyObject_GC_UnTrack(self);
/* Free decompression context */
- ZSTD_freeDCtx(self->dctx);
+ if (self->dctx) {
+ ZSTD_freeDCtx(self->dctx);
+ }
/* Py_CLEAR the dict after free decompression context */
Py_CLEAR(self->dict);
@@ -682,55 +623,6 @@ ZstdDecompressor_dealloc(PyObject *ob)
}
/*[clinic input]
-_zstd.ZstdDecompressor.__init__
-
- zstd_dict: object = None
- A ZstdDict object, a pre-trained zstd dictionary.
- options: object = None
- A dict object that contains advanced decompression parameters.
-
-Create a decompressor object for decompressing data incrementally.
-
-Thread-safe at method level. For one-shot decompression, use the decompress()
-function instead.
-[clinic start generated code]*/
-
-static int
-_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self,
- PyObject *zstd_dict, PyObject *options)
-/*[clinic end generated code: output=703af2f1ec226642 input=8fd72999acc1a146]*/
-{
- /* Only called once */
- if (self->inited) {
- PyErr_SetString(PyExc_RuntimeError, init_twice_msg);
- return -1;
- }
- self->inited = 1;
-
- /* Load dictionary to decompression context */
- if (zstd_dict != Py_None) {
- if (_zstd_load_d_dict(self, zstd_dict) < 0) {
- return -1;
- }
-
- /* Py_INCREF the dict */
- Py_INCREF(zstd_dict);
- self->dict = zstd_dict;
- }
-
- /* Set option to decompression context */
- if (options != Py_None) {
- if (_zstd_set_d_parameters(self, options) < 0) {
- return -1;
- }
- }
-
- // We can only start tracking self with the GC once self->dict is set.
- PyObject_GC_Track(self);
- return 0;
-}
-
-/*[clinic input]
@critical_section
@getter
_zstd.ZstdDecompressor.unused_data
@@ -747,16 +639,8 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self)
{
PyObject *ret;
- /* Thread-safe code */
- Py_BEGIN_CRITICAL_SECTION(self);
-
if (!self->eof) {
- _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
- if (mod_state == NULL) {
- return NULL;
- }
- ret = mod_state->empty_bytes;
- Py_INCREF(ret);
+ return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
}
else {
if (self->unused_data == NULL) {
@@ -772,8 +656,6 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self)
}
}
- Py_END_CRITICAL_SECTION();
-
return ret;
}
@@ -781,7 +663,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self)
_zstd.ZstdDecompressor.decompress
data: Py_buffer
- A bytes-like object, zstd data to be decompressed.
+ A bytes-like object, Zstandard data to be decompressed.
max_length: Py_ssize_t = -1
Maximum size of returned data. When it is negative, the size of
output buffer is unlimited. When it is nonnegative, returns at
@@ -807,25 +689,20 @@ static PyObject *
_zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self,
Py_buffer *data,
Py_ssize_t max_length)
-/*[clinic end generated code: output=a4302b3c940dbec6 input=830e455bc9a50b6e]*/
+/*[clinic end generated code: output=a4302b3c940dbec6 input=6463dfdf98091caa]*/
{
PyObject *ret;
/* Thread-safe code */
Py_BEGIN_CRITICAL_SECTION(self);
- ret = stream_decompress(self, data, max_length, TYPE_DECOMPRESSOR);
+ ret = stream_decompress(self, data, max_length);
Py_END_CRITICAL_SECTION();
return ret;
}
-#define clinic_state() (get_zstd_state_from_type(type))
-#include "clinic/decompressor.c.h"
-#undef clinic_state
-
static PyMethodDef ZstdDecompressor_methods[] = {
_ZSTD_ZSTDDECOMPRESSOR_DECOMPRESS_METHODDEF
-
- {0}
+ {NULL, NULL}
};
PyDoc_STRVAR(ZstdDecompressor_eof_doc,
@@ -840,17 +717,14 @@ PyDoc_STRVAR(ZstdDecompressor_needs_input_doc,
static PyMemberDef ZstdDecompressor_members[] = {
{"eof", Py_T_BOOL, offsetof(ZstdDecompressor, eof),
Py_READONLY, ZstdDecompressor_eof_doc},
-
{"needs_input", Py_T_BOOL, offsetof(ZstdDecompressor, needs_input),
Py_READONLY, ZstdDecompressor_needs_input_doc},
-
- {0}
+ {NULL}
};
static PyGetSetDef ZstdDecompressor_getset[] = {
_ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF
-
- {0}
+ {NULL}
};
static int
@@ -873,19 +747,19 @@ ZstdDecompressor_clear(PyObject *ob)
static PyType_Slot ZstdDecompressor_slots[] = {
{Py_tp_new, _zstd_ZstdDecompressor_new},
{Py_tp_dealloc, ZstdDecompressor_dealloc},
- {Py_tp_init, _zstd_ZstdDecompressor___init__},
{Py_tp_methods, ZstdDecompressor_methods},
{Py_tp_members, ZstdDecompressor_members},
{Py_tp_getset, ZstdDecompressor_getset},
- {Py_tp_doc, (char*)_zstd_ZstdDecompressor___init____doc__},
+ {Py_tp_doc, (void *)_zstd_ZstdDecompressor_new__doc__},
{Py_tp_traverse, ZstdDecompressor_traverse},
{Py_tp_clear, ZstdDecompressor_clear},
- {0}
+ {0, 0}
};
-PyType_Spec zstddecompressor_type_spec = {
- .name = "_zstd.ZstdDecompressor",
+PyType_Spec zstd_decompressor_type_spec = {
+ .name = "compression.zstd.ZstdDecompressor",
.basicsize = sizeof(ZstdDecompressor),
- .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
+ .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE
+ | Py_TPFLAGS_HAVE_GC,
.slots = ZstdDecompressor_slots,
};