diff options
Diffstat (limited to 'Modules/_zstd')
-rw-r--r-- | Modules/_zstd/_zstdmodule.c | 370 | ||||
-rw-r--r-- | Modules/_zstd/_zstdmodule.h | 19 | ||||
-rw-r--r-- | Modules/_zstd/buffer.h | 14 | ||||
-rw-r--r-- | Modules/_zstd/clinic/_zstdmodule.c.h | 22 | ||||
-rw-r--r-- | Modules/_zstd/clinic/compressor.c.h | 67 | ||||
-rw-r--r-- | Modules/_zstd/clinic/decompressor.c.h | 31 | ||||
-rw-r--r-- | Modules/_zstd/clinic/zstddict.c.h | 122 | ||||
-rw-r--r-- | Modules/_zstd/compressor.c | 596 | ||||
-rw-r--r-- | Modules/_zstd/decompressor.c | 327 | ||||
-rw-r--r-- | Modules/_zstd/zstddict.c | 217 | ||||
-rw-r--r-- | Modules/_zstd/zstddict.h | 16 |
11 files changed, 919 insertions, 882 deletions
diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c index c3852fe8973..d75c0779474 100644 --- a/Modules/_zstd/_zstdmodule.c +++ b/Modules/_zstd/_zstdmodule.c @@ -1,7 +1,4 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 @@ -10,7 +7,6 @@ Python module. #include "Python.h" #include "_zstdmodule.h" -#include "zstddict.h" #include <zstd.h> // ZSTD_*() #include <zdict.h> // ZDICT_*() @@ -23,49 +19,91 @@ module _zstd #include "clinic/_zstdmodule.c.h" +ZstdDict * +_Py_parse_zstd_dict(const _zstd_state *state, PyObject *dict, int *ptype) +{ + if (state == NULL) { + return NULL; + } + + /* Check ZstdDict */ + if (PyObject_TypeCheck(dict, state->ZstdDict_type)) { + return (ZstdDict*)dict; + } + + /* Check (ZstdDict, type) */ + if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2 + && PyObject_TypeCheck(PyTuple_GET_ITEM(dict, 0), state->ZstdDict_type) + && PyLong_Check(PyTuple_GET_ITEM(dict, 1))) + { + int type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); + if (type == -1 && PyErr_Occurred()) { + return NULL; + } + if (type == DICT_TYPE_DIGESTED + || type == DICT_TYPE_UNDIGESTED + || type == DICT_TYPE_PREFIX) + { + *ptype = type; + return (ZstdDict*)PyTuple_GET_ITEM(dict, 0); + } + } + + /* Wrong type */ + PyErr_SetString(PyExc_TypeError, + "zstd_dict argument should be a ZstdDict object."); + return NULL; +} + /* Format error message and set ZstdError. */ void -set_zstd_error(const _zstd_state* const state, - error_type type, size_t zstd_ret) +set_zstd_error(const _zstd_state *state, error_type type, size_t zstd_ret) { - char *msg; + const char *msg; assert(ZSTD_isError(zstd_ret)); - switch (type) - { - case ERR_DECOMPRESS: - msg = "Unable to decompress zstd data: %s"; - break; - case ERR_COMPRESS: - msg = "Unable to compress zstd data: %s"; - break; - - case ERR_LOAD_D_DICT: - msg = "Unable to load zstd dictionary or prefix for decompression: %s"; - break; - case ERR_LOAD_C_DICT: - msg = "Unable to load zstd dictionary or prefix for compression: %s"; - break; - - case ERR_GET_C_BOUNDS: - msg = "Unable to get zstd compression parameter bounds: %s"; - break; - case ERR_GET_D_BOUNDS: - msg = "Unable to get zstd decompression parameter bounds: %s"; - break; - case ERR_SET_C_LEVEL: - msg = "Unable to set zstd compression level: %s"; - break; - - case ERR_TRAIN_DICT: - msg = "Unable to train zstd dictionary: %s"; - break; - case ERR_FINALIZE_DICT: - msg = "Unable to finalize zstd dictionary: %s"; - break; - - default: - Py_UNREACHABLE(); + if (state == NULL) { + return; + } + switch (type) { + case ERR_DECOMPRESS: + msg = "Unable to decompress Zstandard data: %s"; + break; + case ERR_COMPRESS: + msg = "Unable to compress Zstandard data: %s"; + break; + case ERR_SET_PLEDGED_INPUT_SIZE: + msg = "Unable to set pledged uncompressed content size: %s"; + break; + + case ERR_LOAD_D_DICT: + msg = "Unable to load Zstandard dictionary or prefix for " + "decompression: %s"; + break; + case ERR_LOAD_C_DICT: + msg = "Unable to load Zstandard dictionary or prefix for " + "compression: %s"; + break; + + case ERR_GET_C_BOUNDS: + msg = "Unable to get zstd compression parameter bounds: %s"; + break; + case ERR_GET_D_BOUNDS: + msg = "Unable to get zstd decompression parameter bounds: %s"; + break; + case ERR_SET_C_LEVEL: + msg = "Unable to set zstd compression level: %s"; + break; + + case ERR_TRAIN_DICT: + msg = "Unable to train the Zstandard dictionary: %s"; + break; + case ERR_FINALIZE_DICT: + msg = "Unable to finalize the Zstandard dictionary: %s"; + break; + + default: + Py_UNREACHABLE(); } PyErr_Format(state->ZstdError, msg, ZSTD_getErrorName(zstd_ret)); } @@ -105,16 +143,13 @@ static const ParameterInfo dp_list[] = { }; void -set_parameter_error(const _zstd_state* const state, int is_compress, - int key_v, int value_v) +set_parameter_error(int is_compress, int key_v, int value_v) { ParameterInfo const *list; int list_size; - char const *name; char *type; ZSTD_bounds bounds; - int i; - char pos_msg[128]; + char pos_msg[64]; if (is_compress) { list = cp_list; @@ -128,8 +163,8 @@ set_parameter_error(const _zstd_state* const state, int is_compress, } /* Find parameter's name */ - name = NULL; - for (i = 0; i < list_size; i++) { + char const *name = NULL; + for (int i = 0; i < list_size; i++) { if (key_v == (list+i)->parameter) { name = (list+i)->parameter_name; break; @@ -151,20 +186,16 @@ set_parameter_error(const _zstd_state* const state, int is_compress, bounds = ZSTD_dParam_getBounds(key_v); } if (ZSTD_isError(bounds.error)) { - PyErr_Format(state->ZstdError, - "Zstd %s parameter \"%s\" is invalid.", + PyErr_Format(PyExc_ValueError, "invalid %s parameter '%s'", type, name); return; } /* Error message */ - PyErr_Format(state->ZstdError, - "Error when setting zstd %s parameter \"%s\", it " - "should %d <= value <= %d, provided value is %d. " - "(%d-bit build)", - type, name, - bounds.lowerBound, bounds.upperBound, value_v, - 8*(int)sizeof(Py_ssize_t)); + PyErr_Format(PyExc_ValueError, + "%s parameter '%s' received an illegal value %d; " + "the valid range is [%d, %d]", + type, name, value_v, bounds.lowerBound, bounds.upperBound); } static inline _zstd_state* @@ -175,6 +206,54 @@ get_zstd_state(PyObject *module) return (_zstd_state *)state; } +static Py_ssize_t +calculate_samples_stats(PyBytesObject *samples_bytes, PyObject *samples_sizes, + size_t **chunk_sizes) +{ + Py_ssize_t chunks_number; + Py_ssize_t sizes_sum; + Py_ssize_t i; + + chunks_number = PyTuple_GET_SIZE(samples_sizes); + if ((size_t) chunks_number > UINT32_MAX) { + PyErr_Format(PyExc_ValueError, + "The number of samples should be <= %u.", UINT32_MAX); + return -1; + } + + /* Prepare chunk_sizes */ + *chunk_sizes = PyMem_New(size_t, chunks_number); + if (*chunk_sizes == NULL) { + PyErr_NoMemory(); + return -1; + } + + sizes_sum = PyBytes_GET_SIZE(samples_bytes); + for (i = 0; i < chunks_number; i++) { + size_t size = PyLong_AsSize_t(PyTuple_GET_ITEM(samples_sizes, i)); + (*chunk_sizes)[i] = size; + if (size == (size_t)-1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + goto sum_error; + } + return -1; + } + if ((size_t)sizes_sum < size) { + goto sum_error; + } + sizes_sum -= size; + } + + if (sizes_sum != 0) { +sum_error: + PyErr_SetString(PyExc_ValueError, + "The samples size tuple doesn't match the " + "concatenation's size."); + return -1; + } + return chunks_number; +} + /*[clinic input] _zstd.train_dict @@ -187,59 +266,30 @@ _zstd.train_dict The size of the dictionary. / -Internal function, train a zstd dictionary on sample data. +Train a Zstandard dictionary on sample data. [clinic start generated code]*/ static PyObject * _zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, PyObject *samples_sizes, Py_ssize_t dict_size) -/*[clinic end generated code: output=8e87fe43935e8f77 input=70fcd8937f2528b6]*/ +/*[clinic end generated code: output=8e87fe43935e8f77 input=d20dedb21c72cb62]*/ { - // TODO(emmatyping): The preamble and suffix to this function and _finalize_dict - // are pretty similar. We should see if we can refactor them to share that code. - Py_ssize_t chunks_number; - size_t *chunk_sizes = NULL; PyObject *dst_dict_bytes = NULL; + size_t *chunk_sizes = NULL; + Py_ssize_t chunks_number; size_t zstd_ret; - Py_ssize_t sizes_sum; - Py_ssize_t i; /* Check arguments */ if (dict_size <= 0) { - PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number."); - return NULL; - } - - chunks_number = Py_SIZE(samples_sizes); - if ((size_t) chunks_number > UINT32_MAX) { - PyErr_Format(PyExc_ValueError, - "The number of samples should be <= %u.", UINT32_MAX); + PyErr_SetString(PyExc_ValueError, + "dict_size argument should be positive number."); return NULL; } - /* Prepare chunk_sizes */ - chunk_sizes = PyMem_New(size_t, chunks_number); - if (chunk_sizes == NULL) { - PyErr_NoMemory(); - goto error; - } - - sizes_sum = 0; - for (i = 0; i < chunks_number; i++) { - PyObject *size = PyTuple_GetItem(samples_sizes, i); - chunk_sizes[i] = PyLong_AsSize_t(size); - if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) { - PyErr_Format(PyExc_ValueError, - "Items in samples_sizes should be an int " - "object, with a value between 0 and %u.", SIZE_MAX); - goto error; - } - sizes_sum += chunk_sizes[i]; - } - - if (sizes_sum != Py_SIZE(samples_bytes)) { - PyErr_SetString(PyExc_ValueError, - "The samples size tuple doesn't match the concatenation's size."); + /* Check that the samples are valid and get their sizes */ + chunks_number = calculate_samples_stats(samples_bytes, samples_sizes, + &chunk_sizes); + if (chunks_number < 0) { goto error; } @@ -251,16 +301,16 @@ _zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, /* Train the dictionary */ char *dst_dict_buffer = PyBytes_AS_STRING(dst_dict_bytes); - char *samples_buffer = PyBytes_AS_STRING(samples_bytes); + const char *samples_buffer = PyBytes_AS_STRING(samples_bytes); Py_BEGIN_ALLOW_THREADS zstd_ret = ZDICT_trainFromBuffer(dst_dict_buffer, dict_size, samples_buffer, chunk_sizes, (uint32_t)chunks_number); Py_END_ALLOW_THREADS - /* Check zstd dict error */ + /* Check Zstandard dict error */ if (ZDICT_isError(zstd_ret)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_TRAIN_DICT, zstd_ret); goto error; } @@ -292,10 +342,10 @@ _zstd.finalize_dict dict_size: Py_ssize_t The size of the dictionary. compression_level: int - Optimize for a specific zstd compression level, 0 means default. + Optimize for a specific Zstandard compression level, 0 means default. / -Internal function, finalize a zstd dictionary. +Finalize a Zstandard dictionary. [clinic start generated code]*/ static PyObject * @@ -303,52 +353,25 @@ _zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, PyBytesObject *samples_bytes, PyObject *samples_sizes, Py_ssize_t dict_size, int compression_level) -/*[clinic end generated code: output=f91821ba5ae85bda input=130d1508adb55ba1]*/ +/*[clinic end generated code: output=f91821ba5ae85bda input=3c7e2480aa08fb56]*/ { Py_ssize_t chunks_number; size_t *chunk_sizes = NULL; PyObject *dst_dict_bytes = NULL; size_t zstd_ret; ZDICT_params_t params; - Py_ssize_t sizes_sum; - Py_ssize_t i; /* Check arguments */ if (dict_size <= 0) { - PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number."); - return NULL; - } - - chunks_number = Py_SIZE(samples_sizes); - if ((size_t) chunks_number > UINT32_MAX) { - PyErr_Format(PyExc_ValueError, - "The number of samples should be <= %u.", UINT32_MAX); + PyErr_SetString(PyExc_ValueError, + "dict_size argument should be positive number."); return NULL; } - /* Prepare chunk_sizes */ - chunk_sizes = PyMem_New(size_t, chunks_number); - if (chunk_sizes == NULL) { - PyErr_NoMemory(); - goto error; - } - - sizes_sum = 0; - for (i = 0; i < chunks_number; i++) { - PyObject *size = PyTuple_GetItem(samples_sizes, i); - chunk_sizes[i] = PyLong_AsSize_t(size); - if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) { - PyErr_Format(PyExc_ValueError, - "Items in samples_sizes should be an int " - "object, with a value between 0 and %u.", SIZE_MAX); - goto error; - } - sizes_sum += chunk_sizes[i]; - } - - if (sizes_sum != Py_SIZE(samples_bytes)) { - PyErr_SetString(PyExc_ValueError, - "The samples size tuple doesn't match the concatenation's size."); + /* Check that the samples are valid and get their sizes */ + chunks_number = calculate_samples_stats(samples_bytes, samples_sizes, + &chunk_sizes); + if (chunks_number < 0) { goto error; } @@ -360,7 +383,7 @@ _zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, /* Parameters */ - /* Optimize for a specific zstd compression level, 0 means default. */ + /* Optimize for a specific Zstandard compression level, 0 means default. */ params.compressionLevel = compression_level; /* Write log to stderr, 0 = none. */ params.notificationLevel = 0; @@ -371,14 +394,15 @@ _zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, Py_BEGIN_ALLOW_THREADS zstd_ret = ZDICT_finalizeDictionary( PyBytes_AS_STRING(dst_dict_bytes), dict_size, - PyBytes_AS_STRING(custom_dict_bytes), Py_SIZE(custom_dict_bytes), + PyBytes_AS_STRING(custom_dict_bytes), + Py_SIZE(custom_dict_bytes), PyBytes_AS_STRING(samples_bytes), chunk_sizes, (uint32_t)chunks_number, params); Py_END_ALLOW_THREADS - /* Check zstd dict error */ + /* Check Zstandard dict error */ if (ZDICT_isError(zstd_ret)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_FINALIZE_DICT, zstd_ret); goto error; } @@ -407,18 +431,18 @@ _zstd.get_param_bounds is_compress: bool True for CompressionParameter, False for DecompressionParameter. -Internal function, get CompressionParameter/DecompressionParameter bounds. +Get CompressionParameter/DecompressionParameter bounds. [clinic start generated code]*/ static PyObject * _zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress) -/*[clinic end generated code: output=4acf5a876f0620ca input=84e669591e487008]*/ +/*[clinic end generated code: output=4acf5a876f0620ca input=45742ef0a3531b65]*/ { ZSTD_bounds bound; if (is_compress) { bound = ZSTD_cParam_getBounds(parameter); if (ZSTD_isError(bound.error)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_GET_C_BOUNDS, bound.error); return NULL; } @@ -426,7 +450,7 @@ _zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress) else { bound = ZSTD_dParam_getBounds(parameter); if (ZSTD_isError(bound.error)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_GET_D_BOUNDS, bound.error); return NULL; } @@ -442,24 +466,23 @@ _zstd.get_frame_size A bytes-like object, it should start from the beginning of a frame, and contains at least one complete frame. -Get the size of a zstd frame, including frame header and 4-byte checksum if it has one. - -It will iterate all blocks' headers within a frame, to accumulate the frame size. +Get the size of a Zstandard frame, including the header and optional checksum. [clinic start generated code]*/ static PyObject * _zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer) -/*[clinic end generated code: output=a7384c2f8780f442 input=7d3ad24311893bf3]*/ +/*[clinic end generated code: output=a7384c2f8780f442 input=3b9f73f8c8129d38]*/ { size_t frame_size; - frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf, frame_buffer->len); + frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf, + frame_buffer->len); if (ZSTD_isError(frame_size)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); PyErr_Format(mod_state->ZstdError, - "Error when finding the compressed size of a zstd frame. " - "Make sure the frame_buffer argument starts from the " - "beginning of a frame, and its length not less than this " + "Error when finding the compressed size of a Zstandard frame. " + "Ensure the frame_buffer argument starts from the " + "beginning of a frame, and its length is not less than this " "complete frame. Zstd error message: %s.", ZSTD_getErrorName(frame_size)); return NULL; @@ -472,14 +495,14 @@ _zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer) _zstd.get_frame_info frame_buffer: Py_buffer - A bytes-like object, containing the header of a zstd frame. + A bytes-like object, containing the header of a Zstandard frame. -Internal function, get zstd frame infomation from a frame header. +Get Zstandard frame infomation from a frame header. [clinic start generated code]*/ static PyObject * _zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) -/*[clinic end generated code: output=56e033cf48001929 input=1816f14656b6aa22]*/ +/*[clinic end generated code: output=56e033cf48001929 input=94b240583ae22ca5]*/ { uint64_t decompressed_size; uint32_t dict_id; @@ -491,12 +514,12 @@ _zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) /* #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) */ if (decompressed_size == ZSTD_CONTENTSIZE_ERROR) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); PyErr_SetString(mod_state->ZstdError, "Error when getting information from the header of " - "a zstd frame. Make sure the frame_buffer argument " + "a Zstandard frame. Ensure the frame_buffer argument " "starts from the beginning of a frame, and its length " - "not less than the frame header (6~18 bytes)."); + "is not less than the frame header (6~18 bytes)."); return NULL; } @@ -518,30 +541,20 @@ _zstd.set_parameter_types d_parameter_type: object(subclass_of='&PyType_Type') DecompressionParameter IntEnum type object -Internal function, set CompressionParameter/DecompressionParameter types for validity check. +Set CompressionParameter and DecompressionParameter types for validity check. [clinic start generated code]*/ static PyObject * _zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, PyObject *d_parameter_type) -/*[clinic end generated code: output=f3313b1294f19502 input=30402523871b8280]*/ +/*[clinic end generated code: output=f3313b1294f19502 input=75d7a953580fae5f]*/ { - _zstd_state* const mod_state = get_zstd_state(module); - - if (!PyType_Check(c_parameter_type) || !PyType_Check(d_parameter_type)) { - PyErr_SetString(PyExc_ValueError, - "The two arguments should be CompressionParameter and " - "DecompressionParameter types."); - return NULL; - } + _zstd_state* mod_state = get_zstd_state(module); - Py_XDECREF(mod_state->CParameter_type); Py_INCREF(c_parameter_type); - mod_state->CParameter_type = (PyTypeObject*)c_parameter_type; - - Py_XDECREF(mod_state->DParameter_type); + Py_XSETREF(mod_state->CParameter_type, (PyTypeObject*)c_parameter_type); Py_INCREF(d_parameter_type); - mod_state->DParameter_type = (PyTypeObject*)d_parameter_type; + Py_XSETREF(mod_state->DParameter_type, (PyTypeObject*)d_parameter_type); Py_RETURN_NONE; } @@ -586,7 +599,7 @@ do { \ Py_DECREF(v); \ } while (0) - _zstd_state* const mod_state = get_zstd_state(m); + _zstd_state* mod_state = get_zstd_state(m); /* Reusable objects & variables */ mod_state->CParameter_type = NULL; @@ -604,7 +617,6 @@ do { \ return -1; } if (PyModule_AddType(m, (PyTypeObject *)mod_state->ZstdError) < 0) { - Py_DECREF(mod_state->ZstdError); return -1; } @@ -692,7 +704,7 @@ do { \ static int _zstd_traverse(PyObject *module, visitproc visit, void *arg) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); Py_VISIT(mod_state->ZstdDict_type); Py_VISIT(mod_state->ZstdCompressor_type); @@ -709,7 +721,7 @@ _zstd_traverse(PyObject *module, visitproc visit, void *arg) static int _zstd_clear(PyObject *module) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); Py_CLEAR(mod_state->ZstdDict_type); Py_CLEAR(mod_state->ZstdCompressor_type); diff --git a/Modules/_zstd/_zstdmodule.h b/Modules/_zstd/_zstdmodule.h index 00e0d2177f3..4e8f708f223 100644 --- a/Modules/_zstd/_zstdmodule.h +++ b/Modules/_zstd/_zstdmodule.h @@ -1,13 +1,12 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ /* Declarations shared between different parts of the _zstd module*/ #ifndef ZSTD_MODULE_H #define ZSTD_MODULE_H +#include "zstddict.h" + /* Type specs */ extern PyType_Spec zstd_dict_type_spec; extern PyType_Spec zstd_compressor_type_spec; @@ -28,6 +27,7 @@ typedef struct { typedef enum { ERR_DECOMPRESS, ERR_COMPRESS, + ERR_SET_PLEDGED_INPUT_SIZE, ERR_LOAD_D_DICT, ERR_LOAD_C_DICT, @@ -46,13 +46,16 @@ typedef enum { DICT_TYPE_PREFIX = 2 } dictionary_type; +extern ZstdDict * +_Py_parse_zstd_dict(const _zstd_state *state, + PyObject *dict, int *type); + /* Format error message and set ZstdError. */ extern void -set_zstd_error(const _zstd_state* const state, - const error_type type, size_t zstd_ret); +set_zstd_error(const _zstd_state *state, + error_type type, size_t zstd_ret); extern void -set_parameter_error(const _zstd_state* const state, int is_compress, - int key_v, int value_v); +set_parameter_error(int is_compress, int key_v, int value_v); #endif // !ZSTD_MODULE_H diff --git a/Modules/_zstd/buffer.h b/Modules/_zstd/buffer.h index c902eef4f8e..4c885fa0d72 100644 --- a/Modules/_zstd/buffer.h +++ b/Modules/_zstd/buffer.h @@ -1,7 +1,4 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ #ifndef ZSTD_BUFFER_H #define ZSTD_BUFFER_H @@ -22,7 +19,8 @@ _OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, /* Ensure .list was set to NULL */ assert(buffer->list == NULL); - Py_ssize_t res = _BlocksOutputBuffer_InitAndGrow(buffer, max_length, &ob->dst); + Py_ssize_t res = _BlocksOutputBuffer_InitAndGrow(buffer, max_length, + &ob->dst); if (res < 0) { return -1; } @@ -37,8 +35,7 @@ _OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, Return -1 on failure */ static inline int _OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, - Py_ssize_t max_length, - Py_ssize_t init_size) + Py_ssize_t max_length, Py_ssize_t init_size) { Py_ssize_t block_size; @@ -53,7 +50,8 @@ _OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, block_size = init_size; } - Py_ssize_t res = _BlocksOutputBuffer_InitWithSize(buffer, block_size, &ob->dst); + Py_ssize_t res = _BlocksOutputBuffer_InitWithSize(buffer, block_size, + &ob->dst); if (res < 0) { return -1; } diff --git a/Modules/_zstd/clinic/_zstdmodule.c.h b/Modules/_zstd/clinic/_zstdmodule.c.h index fc9f49813df..766e1cfa776 100644 --- a/Modules/_zstd/clinic/_zstdmodule.c.h +++ b/Modules/_zstd/clinic/_zstdmodule.c.h @@ -13,7 +13,7 @@ PyDoc_STRVAR(_zstd_train_dict__doc__, "train_dict($module, samples_bytes, samples_sizes, dict_size, /)\n" "--\n" "\n" -"Internal function, train a zstd dictionary on sample data.\n" +"Train a Zstandard dictionary on sample data.\n" "\n" " samples_bytes\n" " Concatenation of samples.\n" @@ -73,7 +73,7 @@ PyDoc_STRVAR(_zstd_finalize_dict__doc__, " dict_size, compression_level, /)\n" "--\n" "\n" -"Internal function, finalize a zstd dictionary.\n" +"Finalize a Zstandard dictionary.\n" "\n" " custom_dict_bytes\n" " Custom dictionary content.\n" @@ -84,7 +84,7 @@ PyDoc_STRVAR(_zstd_finalize_dict__doc__, " dict_size\n" " The size of the dictionary.\n" " compression_level\n" -" Optimize for a specific zstd compression level, 0 means default."); +" Optimize for a specific Zstandard compression level, 0 means default."); #define _ZSTD_FINALIZE_DICT_METHODDEF \ {"finalize_dict", _PyCFunction_CAST(_zstd_finalize_dict), METH_FASTCALL, _zstd_finalize_dict__doc__}, @@ -149,7 +149,7 @@ PyDoc_STRVAR(_zstd_get_param_bounds__doc__, "get_param_bounds($module, /, parameter, is_compress)\n" "--\n" "\n" -"Internal function, get CompressionParameter/DecompressionParameter bounds.\n" +"Get CompressionParameter/DecompressionParameter bounds.\n" "\n" " parameter\n" " The parameter to get bounds.\n" @@ -220,13 +220,11 @@ PyDoc_STRVAR(_zstd_get_frame_size__doc__, "get_frame_size($module, /, frame_buffer)\n" "--\n" "\n" -"Get the size of a zstd frame, including frame header and 4-byte checksum if it has one.\n" +"Get the size of a Zstandard frame, including the header and optional checksum.\n" "\n" " frame_buffer\n" " A bytes-like object, it should start from the beginning of a frame,\n" -" and contains at least one complete frame.\n" -"\n" -"It will iterate all blocks\' headers within a frame, to accumulate the frame size."); +" and contains at least one complete frame."); #define _ZSTD_GET_FRAME_SIZE_METHODDEF \ {"get_frame_size", _PyCFunction_CAST(_zstd_get_frame_size), METH_FASTCALL|METH_KEYWORDS, _zstd_get_frame_size__doc__}, @@ -291,10 +289,10 @@ PyDoc_STRVAR(_zstd_get_frame_info__doc__, "get_frame_info($module, /, frame_buffer)\n" "--\n" "\n" -"Internal function, get zstd frame infomation from a frame header.\n" +"Get Zstandard frame infomation from a frame header.\n" "\n" " frame_buffer\n" -" A bytes-like object, containing the header of a zstd frame."); +" A bytes-like object, containing the header of a Zstandard frame."); #define _ZSTD_GET_FRAME_INFO_METHODDEF \ {"get_frame_info", _PyCFunction_CAST(_zstd_get_frame_info), METH_FASTCALL|METH_KEYWORDS, _zstd_get_frame_info__doc__}, @@ -359,7 +357,7 @@ PyDoc_STRVAR(_zstd_set_parameter_types__doc__, "set_parameter_types($module, /, c_parameter_type, d_parameter_type)\n" "--\n" "\n" -"Internal function, set CompressionParameter/DecompressionParameter types for validity check.\n" +"Set CompressionParameter and DecompressionParameter types for validity check.\n" "\n" " c_parameter_type\n" " CompressionParameter IntEnum type object\n" @@ -428,4 +426,4 @@ _zstd_set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t na exit: return return_value; } -/*[clinic end generated code: output=8445b658dcdcbb9c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=437b084f149e68e5 input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/compressor.c.h b/Modules/_zstd/clinic/compressor.c.h index d7909cdf89f..4f8d93fd9e8 100644 --- a/Modules/_zstd/clinic/compressor.c.h +++ b/Modules/_zstd/clinic/compressor.c.h @@ -8,30 +8,30 @@ preserve #endif #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() -PyDoc_STRVAR(_zstd_ZstdCompressor___init____doc__, +PyDoc_STRVAR(_zstd_ZstdCompressor_new__doc__, "ZstdCompressor(level=None, options=None, zstd_dict=None)\n" "--\n" "\n" "Create a compressor object for compressing data incrementally.\n" "\n" " level\n" -" The compression level to use, defaults to ZSTD_CLEVEL_DEFAULT.\n" +" The compression level to use. Defaults to COMPRESSION_LEVEL_DEFAULT.\n" " options\n" " A dict object that contains advanced compression parameters.\n" " zstd_dict\n" -" A ZstdDict object, a pre-trained zstd dictionary.\n" +" A ZstdDict object, a pre-trained Zstandard dictionary.\n" "\n" "Thread-safe at method level. For one-shot compression, use the compress()\n" "function instead."); -static int -_zstd_ZstdCompressor___init___impl(ZstdCompressor *self, PyObject *level, - PyObject *options, PyObject *zstd_dict); +static PyObject * +_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, + PyObject *options, PyObject *zstd_dict); -static int -_zstd_ZstdCompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs) +static PyObject * +_zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { - int return_value = -1; + PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) #define NUM_KEYWORDS 3 @@ -89,7 +89,7 @@ _zstd_ZstdCompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs) } zstd_dict = fastargs[2]; skip_optional_pos: - return_value = _zstd_ZstdCompressor___init___impl((ZstdCompressor *)self, level, options, zstd_dict); + return_value = _zstd_ZstdCompressor_new_impl(type, level, options, zstd_dict); exit: return return_value; @@ -189,9 +189,9 @@ PyDoc_STRVAR(_zstd_ZstdCompressor_flush__doc__, " Can be these 2 values ZstdCompressor.FLUSH_FRAME,\n" " ZstdCompressor.FLUSH_BLOCK\n" "\n" -"Flush any remaining data left in internal buffers. Since zstd data consists\n" -"of one or more independent frames, the compressor object can still be used\n" -"after this method is called."); +"Flush any remaining data left in internal buffers. Since Zstandard data\n" +"consists of one or more independent frames, the compressor object can still\n" +"be used after this method is called."); #define _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF \ {"flush", _PyCFunction_CAST(_zstd_ZstdCompressor_flush), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdCompressor_flush__doc__}, @@ -252,4 +252,43 @@ skip_optional_pos: exit: return return_value; } -/*[clinic end generated code: output=ef69eab155be39f6 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(_zstd_ZstdCompressor_set_pledged_input_size__doc__, +"set_pledged_input_size($self, size, /)\n" +"--\n" +"\n" +"Set the uncompressed content size to be written into the frame header.\n" +"\n" +" size\n" +" The size of the uncompressed data to be provided to the compressor.\n" +"\n" +"This method can be used to ensure the header of the frame about to be written\n" +"includes the size of the data, unless the CompressionParameter.content_size_flag\n" +"is set to False. If last_mode != FLUSH_FRAME, then a RuntimeError is raised.\n" +"\n" +"It is important to ensure that the pledged data size matches the actual data\n" +"size. If they do not match the compressed output data may be corrupted and the\n" +"final chunk written may be lost."); + +#define _ZSTD_ZSTDCOMPRESSOR_SET_PLEDGED_INPUT_SIZE_METHODDEF \ + {"set_pledged_input_size", (PyCFunction)_zstd_ZstdCompressor_set_pledged_input_size, METH_O, _zstd_ZstdCompressor_set_pledged_input_size__doc__}, + +static PyObject * +_zstd_ZstdCompressor_set_pledged_input_size_impl(ZstdCompressor *self, + unsigned long long size); + +static PyObject * +_zstd_ZstdCompressor_set_pledged_input_size(PyObject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + unsigned long long size; + + if (!zstd_contentsize_converter(arg, &size)) { + goto exit; + } + return_value = _zstd_ZstdCompressor_set_pledged_input_size_impl((ZstdCompressor *)self, size); + +exit: + return return_value; +} +/*[clinic end generated code: output=c1d5c2cf06a8becd input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/decompressor.c.h b/Modules/_zstd/clinic/decompressor.c.h index 9359c637203..c6fdae74ab0 100644 --- a/Modules/_zstd/clinic/decompressor.c.h +++ b/Modules/_zstd/clinic/decompressor.c.h @@ -7,31 +7,30 @@ preserve # include "pycore_runtime.h" // _Py_ID() #endif #include "pycore_abstract.h" // _PyNumber_Index() -#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() -PyDoc_STRVAR(_zstd_ZstdDecompressor___init____doc__, +PyDoc_STRVAR(_zstd_ZstdDecompressor_new__doc__, "ZstdDecompressor(zstd_dict=None, options=None)\n" "--\n" "\n" "Create a decompressor object for decompressing data incrementally.\n" "\n" " zstd_dict\n" -" A ZstdDict object, a pre-trained zstd dictionary.\n" +" A ZstdDict object, a pre-trained Zstandard dictionary.\n" " options\n" " A dict object that contains advanced decompression parameters.\n" "\n" "Thread-safe at method level. For one-shot decompression, use the decompress()\n" "function instead."); -static int -_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self, - PyObject *zstd_dict, PyObject *options); +static PyObject * +_zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict, + PyObject *options); -static int -_zstd_ZstdDecompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs) +static PyObject * +_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { - int return_value = -1; + PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) #define NUM_KEYWORDS 2 @@ -82,7 +81,7 @@ _zstd_ZstdDecompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs } options = fastargs[1]; skip_optional_pos: - return_value = _zstd_ZstdDecompressor___init___impl((ZstdDecompressor *)self, zstd_dict, options); + return_value = _zstd_ZstdDecompressor_new_impl(type, zstd_dict, options); exit: return return_value; @@ -114,13 +113,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self); static PyObject * _zstd_ZstdDecompressor_unused_data_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDecompressor_unused_data_get_impl((ZstdDecompressor *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDecompressor_unused_data_get_impl((ZstdDecompressor *)self); } PyDoc_STRVAR(_zstd_ZstdDecompressor_decompress__doc__, @@ -130,7 +123,7 @@ PyDoc_STRVAR(_zstd_ZstdDecompressor_decompress__doc__, "Decompress *data*, returning uncompressed bytes if possible, or b\'\' otherwise.\n" "\n" " data\n" -" A bytes-like object, zstd data to be decompressed.\n" +" A bytes-like object, Zstandard data to be decompressed.\n" " max_length\n" " Maximum size of returned data. When it is negative, the size of\n" " output buffer is unlimited. When it is nonnegative, returns at\n" @@ -227,4 +220,4 @@ exit: return return_value; } -/*[clinic end generated code: output=ae703f0465a2906d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=30c12ef047027ede input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/zstddict.c.h b/Modules/_zstd/clinic/zstddict.c.h index 4e0f7b64172..79db85405d6 100644 --- a/Modules/_zstd/clinic/zstddict.c.h +++ b/Modules/_zstd/clinic/zstddict.c.h @@ -6,38 +6,35 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif -#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() -PyDoc_STRVAR(_zstd_ZstdDict___init____doc__, -"ZstdDict(dict_content, is_raw=False)\n" +PyDoc_STRVAR(_zstd_ZstdDict_new__doc__, +"ZstdDict(dict_content, /, *, is_raw=False)\n" "--\n" "\n" -"Represents a zstd dictionary, which can be used for compression/decompression.\n" +"Represents a Zstandard dictionary.\n" "\n" " dict_content\n" -" A bytes-like object, dictionary\'s content.\n" +" The content of a Zstandard dictionary as a bytes-like object.\n" " is_raw\n" -" This parameter is for advanced user. True means dict_content\n" -" argument is a \"raw content\" dictionary, free of any format\n" -" restriction. False means dict_content argument is an ordinary\n" -" zstd dictionary, was created by zstd functions, follow a\n" -" specified format.\n" +" If true, perform no checks on *dict_content*, useful for some\n" +" advanced cases. Otherwise, check that the content represents\n" +" a Zstandard dictionary created by the zstd library or CLI.\n" "\n" -"It\'s thread-safe, and can be shared by multiple ZstdCompressor /\n" -"ZstdDecompressor objects."); +"The dictionary can be used for compression or decompression, and can be shared\n" +"by multiple ZstdCompressor or ZstdDecompressor objects."); -static int -_zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content, - int is_raw); +static PyObject * +_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content, + int is_raw); -static int -_zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) +static PyObject * +_zstd_ZstdDict_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { - int return_value = -1; + PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 1 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -46,7 +43,7 @@ _zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(dict_content), &_Py_ID(is_raw), }, + .ob_item = { &_Py_ID(is_raw), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -55,7 +52,7 @@ _zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"dict_content", "is_raw", NULL}; + static const char * const _keywords[] = {"", "is_raw", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "ZstdDict", @@ -66,33 +63,68 @@ _zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; - PyObject *dict_content; + Py_buffer dict_content = {NULL, NULL}; int is_raw = 0; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, - /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } - dict_content = fastargs[0]; + if (PyObject_GetBuffer(fastargs[0], &dict_content, PyBUF_SIMPLE) != 0) { + goto exit; + } if (!noptargs) { - goto skip_optional_pos; + goto skip_optional_kwonly; } is_raw = PyObject_IsTrue(fastargs[1]); if (is_raw < 0) { goto exit; } -skip_optional_pos: - return_value = _zstd_ZstdDict___init___impl((ZstdDict *)self, dict_content, is_raw); +skip_optional_kwonly: + return_value = _zstd_ZstdDict_new_impl(type, &dict_content, is_raw); exit: + /* Cleanup for dict_content */ + if (dict_content.obj) { + PyBuffer_Release(&dict_content); + } + return return_value; } +PyDoc_STRVAR(_zstd_ZstdDict_dict_content__doc__, +"The content of a Zstandard dictionary, as a bytes object."); +#if defined(_zstd_ZstdDict_dict_content_DOCSTR) +# undef _zstd_ZstdDict_dict_content_DOCSTR +#endif +#define _zstd_ZstdDict_dict_content_DOCSTR _zstd_ZstdDict_dict_content__doc__ + +#if !defined(_zstd_ZstdDict_dict_content_DOCSTR) +# define _zstd_ZstdDict_dict_content_DOCSTR NULL +#endif +#if defined(_ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF) +# undef _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF +# define _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF {"dict_content", (getter)_zstd_ZstdDict_dict_content_get, (setter)_zstd_ZstdDict_dict_content_set, _zstd_ZstdDict_dict_content_DOCSTR}, +#else +# define _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF {"dict_content", (getter)_zstd_ZstdDict_dict_content_get, NULL, _zstd_ZstdDict_dict_content_DOCSTR}, +#endif + +static PyObject * +_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self); + +static PyObject * +_zstd_ZstdDict_dict_content_get(PyObject *self, void *Py_UNUSED(context)) +{ + return _zstd_ZstdDict_dict_content_get_impl((ZstdDict *)self); +} + PyDoc_STRVAR(_zstd_ZstdDict_as_digested_dict__doc__, "Load as a digested dictionary to compressor.\n" "\n" -"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict)\n" +"Pass this attribute as zstd_dict argument:\n" +"compress(dat, zstd_dict=zd.as_digested_dict)\n" +"\n" "1. Some advanced compression parameters of compressor may be overridden\n" " by parameters of digested dictionary.\n" "2. ZstdDict has a digested dictionaries cache for each compression level.\n" @@ -120,19 +152,15 @@ _zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self); static PyObject * _zstd_ZstdDict_as_digested_dict_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDict_as_digested_dict_get_impl((ZstdDict *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDict_as_digested_dict_get_impl((ZstdDict *)self); } PyDoc_STRVAR(_zstd_ZstdDict_as_undigested_dict__doc__, "Load as an undigested dictionary to compressor.\n" "\n" -"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict)\n" +"Pass this attribute as zstd_dict argument:\n" +"compress(dat, zstd_dict=zd.as_undigested_dict)\n" +"\n" "1. The advanced compression parameters of compressor will not be overridden.\n" "2. Loading an undigested dictionary is costly. If load an undigested dictionary\n" " multiple times, consider reusing a compressor object.\n" @@ -158,19 +186,15 @@ _zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self); static PyObject * _zstd_ZstdDict_as_undigested_dict_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDict_as_undigested_dict_get_impl((ZstdDict *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDict_as_undigested_dict_get_impl((ZstdDict *)self); } PyDoc_STRVAR(_zstd_ZstdDict_as_prefix__doc__, "Load as a prefix to compressor/decompressor.\n" "\n" -"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix)\n" +"Pass this attribute as zstd_dict argument:\n" +"compress(dat, zstd_dict=zd.as_prefix)\n" +"\n" "1. Prefix is compatible with long distance matching, while dictionary is not.\n" "2. It only works for the first frame, then the compressor/decompressor will\n" " return to no prefix state.\n" @@ -196,12 +220,6 @@ _zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self); static PyObject * _zstd_ZstdDict_as_prefix_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDict_as_prefix_get_impl((ZstdDict *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDict_as_prefix_get_impl((ZstdDict *)self); } -/*[clinic end generated code: output=59257c053f74eda7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=4696cbc722e5fdfc input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index 355a27d2734..bc9e6eff89a 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -1,7 +1,4 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ /* ZstdCompressor class definitions */ @@ -19,9 +16,8 @@ class _zstd.ZstdCompressor "ZstdCompressor *" "&zstd_compressor_type_spec" #include "_zstdmodule.h" #include "buffer.h" -#include "zstddict.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked -#include <stdbool.h> // bool #include <stddef.h> // offsetof() #include <zstd.h> // ZSTD_*() @@ -43,107 +39,158 @@ typedef struct { /* Compression level */ int compression_level; - /* __init__ has been called, 0 or 1. */ - bool initialized; + /* Lock to protect the compression context */ + PyMutex lock; } ZstdCompressor; #define ZstdCompressor_CAST(op) ((ZstdCompressor *)op) +/*[python input] + +class zstd_contentsize_converter(CConverter): + type = 'unsigned long long' + converter = 'zstd_contentsize_converter' + +[python start generated code]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=0932c350d633c7de]*/ + + +static int +zstd_contentsize_converter(PyObject *size, unsigned long long *p) +{ + // None means the user indicates the size is unknown. + if (size == Py_None) { + *p = ZSTD_CONTENTSIZE_UNKNOWN; + } + else { + /* ZSTD_CONTENTSIZE_UNKNOWN is 0ULL - 1 + ZSTD_CONTENTSIZE_ERROR is 0ULL - 2 + Users should only pass values < ZSTD_CONTENTSIZE_ERROR */ + unsigned long long pledged_size = PyLong_AsUnsignedLongLong(size); + /* Here we check for (unsigned long long)-1 as a sign of an error in + PyLong_AsUnsignedLongLong */ + if (pledged_size == (unsigned long long)-1 && PyErr_Occurred()) { + *p = ZSTD_CONTENTSIZE_ERROR; + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_Format(PyExc_ValueError, + "size argument should be a positive int less " + "than %ull", ZSTD_CONTENTSIZE_ERROR); + return 0; + } + return 0; + } + if (pledged_size >= ZSTD_CONTENTSIZE_ERROR) { + *p = ZSTD_CONTENTSIZE_ERROR; + PyErr_Format(PyExc_ValueError, + "size argument should be a positive int less " + "than %ull", ZSTD_CONTENTSIZE_ERROR); + return 0; + } + *p = pledged_size; + } + return 1; +} + #include "clinic/compressor.c.h" static int -_zstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options, - const char *arg_name, const char* arg_type) +_zstd_set_c_level(ZstdCompressor *self, int level) { - size_t zstd_ret; - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { + /* Set integer compression level */ + int min_level = ZSTD_minCLevel(); + int max_level = ZSTD_maxCLevel(); + if (level < min_level || level > max_level) { + PyErr_Format(PyExc_ValueError, + "illegal compression level %d; the valid range is [%d, %d]", + level, min_level, max_level); return -1; } - /* Integer compression level */ - if (PyLong_Check(level_or_options)) { - int level = PyLong_AsInt(level_or_options); - if (level == -1 && PyErr_Occurred()) { - PyErr_Format(PyExc_ValueError, - "Compression level should be an int value between %d and %d.", - ZSTD_minCLevel(), ZSTD_maxCLevel()); - return -1; - } + /* Save for generating ZSTD_CDICT */ + self->compression_level = level; - /* Save for generating ZSTD_CDICT */ - self->compression_level = level; + /* Set compressionLevel to compression context */ + size_t zstd_ret = ZSTD_CCtx_setParameter( + self->cctx, ZSTD_c_compressionLevel, level); - /* Set compressionLevel to compression context */ - zstd_ret = ZSTD_CCtx_setParameter(self->cctx, - ZSTD_c_compressionLevel, - level); + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret); + return -1; + } + return 0; +} - /* Check error */ - if (ZSTD_isError(zstd_ret)) { - set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret); +static int +_zstd_set_c_parameters(ZstdCompressor *self, PyObject *options) +{ + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state == NULL) { + return -1; + } + + if (!PyDict_Check(options)) { + PyErr_Format(PyExc_TypeError, + "ZstdCompressor() argument 'options' must be dict, not %T", + options); + return -1; + } + + Py_ssize_t pos = 0; + PyObject *key, *value; + while (PyDict_Next(options, &pos, &key, &value)) { + /* Check key type */ + if (Py_TYPE(key) == mod_state->DParameter_type) { + PyErr_SetString(PyExc_TypeError, + "compression options dictionary key must not be a " + "DecompressionParameter attribute"); return -1; } - return 0; - } - /* Options dict */ - if (PyDict_Check(level_or_options)) { - PyObject *key, *value; - Py_ssize_t pos = 0; + Py_INCREF(key); + Py_INCREF(value); + int key_v = PyLong_AsInt(key); + Py_DECREF(key); + if (key_v == -1 && PyErr_Occurred()) { + Py_DECREF(value); + return -1; + } - while (PyDict_Next(level_or_options, &pos, &key, &value)) { - /* Check key type */ - if (Py_TYPE(key) == mod_state->DParameter_type) { - PyErr_SetString(PyExc_TypeError, - "Key of compression option dict should " - "NOT be DecompressionParameter."); - return -1; - } + int value_v = PyLong_AsInt(value); + Py_DECREF(value); + if (value_v == -1 && PyErr_Occurred()) { + return -1; + } - int key_v = PyLong_AsInt(key); - if (key_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Key of options dict should be a CompressionParameter attribute."); + if (key_v == ZSTD_c_compressionLevel) { + if (_zstd_set_c_level(self, value_v) < 0) { return -1; } - - // TODO(emmatyping): check bounds when there is a value error here for better - // error message? - int value_v = PyLong_AsInt(value); - if (value_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Value of option dict should be an int."); - return -1; + continue; + } + if (key_v == ZSTD_c_nbWorkers) { + /* From the zstd library docs: + 1. When nbWorkers >= 1, triggers asynchronous mode when + used with ZSTD_compressStream2(). + 2, Default value is `0`, aka "single-threaded mode" : no + worker is spawned, compression is performed inside + caller's thread, all invocations are blocking. */ + if (value_v != 0) { + self->use_multithread = 1; } + } - if (key_v == ZSTD_c_compressionLevel) { - /* Save for generating ZSTD_CDICT */ - self->compression_level = value_v; - } - else if (key_v == ZSTD_c_nbWorkers) { - /* From zstd library doc: - 1. When nbWorkers >= 1, triggers asynchronous mode when - used with ZSTD_compressStream2(). - 2, Default value is `0`, aka "single-threaded mode" : no - worker is spawned, compression is performed inside - caller's thread, all invocations are blocking. */ - if (value_v != 0) { - self->use_multithread = 1; - } - } + /* Set parameter to compression context */ + size_t zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); - /* Set parameter to compression context */ - zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); - if (ZSTD_isError(zstd_ret)) { - set_parameter_error(mod_state, 1, key_v, value_v); - return -1; - } + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + set_parameter_error(1, key_v, value_v); + return -1; } - return 0; } - PyErr_Format(PyExc_TypeError, "Invalid type for %s. Expected %s", arg_name, arg_type); - return -1; + return 0; } static void @@ -156,12 +203,12 @@ capsule_free_cdict(PyObject *capsule) ZSTD_CDict * _get_CDict(ZstdDict *self, int compressionLevel) { + assert(PyMutex_IsLocked(&self->lock)); PyObject *level = NULL; - PyObject *capsule; + PyObject *capsule = NULL; ZSTD_CDict *cdict; + int ret; - // TODO(emmatyping): refactor critical section code into a lock_held function - Py_BEGIN_CRITICAL_SECTION(self); /* int level object */ level = PyLong_FromLong(compressionLevel); @@ -170,27 +217,23 @@ _get_CDict(ZstdDict *self, int compressionLevel) } /* Get PyCapsule object from self->c_dicts */ - capsule = PyDict_GetItemWithError(self->c_dicts, level); + ret = PyDict_GetItemRef(self->c_dicts, level, &capsule); + if (ret < 0) { + goto error; + } if (capsule == NULL) { - if (PyErr_Occurred()) { - goto error; - } - /* Create ZSTD_CDict instance */ - char *dict_buffer = PyBytes_AS_STRING(self->dict_content); - Py_ssize_t dict_len = Py_SIZE(self->dict_content); Py_BEGIN_ALLOW_THREADS - cdict = ZSTD_createCDict(dict_buffer, - dict_len, + cdict = ZSTD_createCDict(self->dict_buffer, self->dict_len, compressionLevel); Py_END_ALLOW_THREADS if (cdict == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, - "Failed to create ZSTD_CDict instance from zstd " - "dictionary content. Maybe the content is corrupted."); + "Failed to create a ZSTD_CDict instance from " + "Zstandard dictionary content."); } goto error; } @@ -203,11 +246,10 @@ _get_CDict(ZstdDict *self, int compressionLevel) } /* Add PyCapsule object to self->c_dicts */ - if (PyDict_SetItem(self->c_dicts, level, capsule) < 0) { - Py_DECREF(capsule); + ret = PyDict_SetItem(self->c_dicts, level, capsule); + if (ret < 0) { goto error; } - Py_DECREF(capsule); } else { /* ZSTD_CDict instance already exists */ @@ -219,62 +261,15 @@ error: cdict = NULL; success: Py_XDECREF(level); - Py_END_CRITICAL_SECTION(); + Py_XDECREF(capsule); return cdict; } static int -_zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) +_zstd_load_impl(ZstdCompressor *self, ZstdDict *zd, + _zstd_state *mod_state, int type) { - size_t zstd_ret; - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return -1; - } - ZstdDict *zd; - int type, ret; - - /* Check ZstdDict */ - ret = PyObject_IsInstance(dict, (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { - return -1; - } - else if (ret > 0) { - /* When compressing, use undigested dictionary by default. */ - zd = (ZstdDict*)dict; - type = DICT_TYPE_UNDIGESTED; - goto load; - } - - /* Check (ZstdDict, type) */ - if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2) { - /* Check ZstdDict */ - ret = PyObject_IsInstance(PyTuple_GET_ITEM(dict, 0), - (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { - return -1; - } - else if (ret > 0) { - /* type == -1 may indicate an error. */ - type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); - if (type == DICT_TYPE_DIGESTED || - type == DICT_TYPE_UNDIGESTED || - type == DICT_TYPE_PREFIX) - { - assert(type >= 0); - zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); - goto load; - } - } - } - - /* Wrong type */ - PyErr_SetString(PyExc_TypeError, - "zstd_dict argument should be ZstdDict object."); - return -1; - -load: if (type == DICT_TYPE_DIGESTED) { /* Get ZSTD_CDict */ ZSTD_CDict *c_dict = _get_CDict(zd, self->compression_level); @@ -283,28 +278,18 @@ load: } /* Reference a prepared dictionary. It overrides some compression context's parameters. */ - Py_BEGIN_CRITICAL_SECTION(self); zstd_ret = ZSTD_CCtx_refCDict(self->cctx, c_dict); - Py_END_CRITICAL_SECTION(); } else if (type == DICT_TYPE_UNDIGESTED) { /* Load a dictionary. It doesn't override compression context's parameters. */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_CCtx_loadDictionary( - self->cctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); + zstd_ret = ZSTD_CCtx_loadDictionary(self->cctx, zd->dict_buffer, + zd->dict_len); } else if (type == DICT_TYPE_PREFIX) { /* Load a prefix */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_CCtx_refPrefix( - self->cctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); + zstd_ret = ZSTD_CCtx_refPrefix(self->cctx, zd->dict_buffer, + zd->dict_len); } else { Py_UNREACHABLE(); @@ -318,24 +303,57 @@ load: return 0; } +static int +_zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) +{ + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + /* When compressing, use undigested dictionary by default. */ + int type = DICT_TYPE_UNDIGESTED; + ZstdDict *zd = _Py_parse_zstd_dict(mod_state, dict, &type); + if (zd == NULL) { + return -1; + } + int ret; + PyMutex_Lock(&zd->lock); + ret = _zstd_load_impl(self, zd, mod_state, type); + PyMutex_Unlock(&zd->lock); + return ret; +} + +/*[clinic input] +@classmethod +_zstd.ZstdCompressor.__new__ as _zstd_ZstdCompressor_new + level: object = None + The compression level to use. Defaults to COMPRESSION_LEVEL_DEFAULT. + options: object = None + A dict object that contains advanced compression parameters. + zstd_dict: object = None + A ZstdDict object, a pre-trained Zstandard dictionary. + +Create a compressor object for compressing data incrementally. + +Thread-safe at method level. For one-shot compression, use the compress() +function instead. +[clinic start generated code]*/ + static PyObject * -_zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs)) +_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, + PyObject *options, PyObject *zstd_dict) +/*[clinic end generated code: output=cdef61eafecac3d7 input=92de0211ae20ffdc]*/ { - ZstdCompressor *self; - self = PyObject_GC_New(ZstdCompressor, type); + ZstdCompressor* self = PyObject_GC_New(ZstdCompressor, type); if (self == NULL) { goto error; } - self->initialized = 0; - self->dict = NULL; self->use_multithread = 0; - + self->dict = NULL; + self->lock = (PyMutex){0}; /* Compression context */ self->cctx = ZSTD_createCCtx(); if (self->cctx == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, "Unable to create ZSTD_CCtx instance."); @@ -346,12 +364,56 @@ _zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject /* Last mode */ self->last_mode = ZSTD_e_end; + if (level != Py_None && options != Py_None) { + PyErr_SetString(PyExc_TypeError, + "Only one of level or options should be used."); + goto error; + } + + /* Set compression level */ + if (level != Py_None) { + if (!PyLong_Check(level)) { + PyErr_SetString(PyExc_TypeError, + "invalid type for level, expected int"); + goto error; + } + int level_v = PyLong_AsInt(level); + if (level_v == -1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_Format(PyExc_ValueError, + "illegal compression level; the valid range is [%d, %d]", + ZSTD_minCLevel(), ZSTD_maxCLevel()); + } + goto error; + } + if (_zstd_set_c_level(self, level_v) < 0) { + goto error; + } + } + + /* Set options dictionary */ + if (options != Py_None) { + if (_zstd_set_c_parameters(self, options) < 0) { + goto error; + } + } + + /* Load Zstandard dictionary to compression context */ + if (zstd_dict != Py_None) { + if (_zstd_load_c_dict(self, zstd_dict) < 0) { + goto error; + } + Py_INCREF(zstd_dict); + self->dict = zstd_dict; + } + + // We can only start GC tracking once self->dict is set. + PyObject_GC_Track(self); + return (PyObject*)self; error: - if (self != NULL) { - PyObject_GC_Del(self); - } + Py_XDECREF(self); return NULL; } @@ -363,7 +425,11 @@ ZstdCompressor_dealloc(PyObject *ob) PyObject_GC_UnTrack(self); /* Free compression context */ - ZSTD_freeCCtx(self->cctx); + if (self->cctx) { + ZSTD_freeCCtx(self->cctx); + } + + assert(!PyMutex_IsLocked(&self->lock)); /* Py_XDECREF the dict after free the compression context */ Py_CLEAR(self->dict); @@ -373,71 +439,11 @@ ZstdCompressor_dealloc(PyObject *ob) Py_DECREF(tp); } -/*[clinic input] -_zstd.ZstdCompressor.__init__ - - level: object = None - The compression level to use, defaults to ZSTD_CLEVEL_DEFAULT. - options: object = None - A dict object that contains advanced compression parameters. - zstd_dict: object = None - A ZstdDict object, a pre-trained zstd dictionary. - -Create a compressor object for compressing data incrementally. - -Thread-safe at method level. For one-shot compression, use the compress() -function instead. -[clinic start generated code]*/ - -static int -_zstd_ZstdCompressor___init___impl(ZstdCompressor *self, PyObject *level, - PyObject *options, PyObject *zstd_dict) -/*[clinic end generated code: output=215e6c4342732f96 input=9f79b0d8d34c8ef0]*/ -{ - if (self->initialized) { - PyErr_SetString(PyExc_RuntimeError, "reinitialization not supported"); - return -1; - } - self->initialized = 1; - - if (level != Py_None && options != Py_None) { - PyErr_SetString(PyExc_RuntimeError, "Only one of level or options should be used."); - return -1; - } - - /* Set compressLevel/options to compression context */ - if (level != Py_None) { - if (_zstd_set_c_parameters(self, level, "level", "int") < 0) { - return -1; - } - } - - if (options != Py_None) { - if (_zstd_set_c_parameters(self, options, "options", "dict") < 0) { - return -1; - } - } - - /* Load dictionary to compression context */ - if (zstd_dict != Py_None) { - if (_zstd_load_c_dict(self, zstd_dict) < 0) { - return -1; - } - - /* Py_INCREF the dict */ - Py_INCREF(zstd_dict); - self->dict = zstd_dict; - } - - // We can only start tracking self with the GC once self->dict is set. - PyObject_GC_Track(self); - return 0; -} - static PyObject * -compress_impl(ZstdCompressor *self, Py_buffer *data, - ZSTD_EndDirective end_directive) +compress_lock_held(ZstdCompressor *self, Py_buffer *data, + ZSTD_EndDirective end_directive) { + assert(PyMutex_IsLocked(&self->lock)); ZSTD_inBuffer in; ZSTD_outBuffer out; _BlocksOutputBuffer buffer = {.list = NULL}; @@ -464,12 +470,12 @@ compress_impl(ZstdCompressor *self, Py_buffer *data, } if (_OutputBuffer_InitWithSize(&buffer, &out, -1, - (Py_ssize_t) output_buffer_size) < 0) { + (Py_ssize_t) output_buffer_size) < 0) { goto error; } - /* zstd stream compress */ + /* Zstandard stream compress */ while (1) { Py_BEGIN_ALLOW_THREADS zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, end_directive); @@ -477,10 +483,8 @@ compress_impl(ZstdCompressor *self, Py_buffer *data, /* Check error */ if (ZSTD_isError(zstd_ret)) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state != NULL) { - set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); - } + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); goto error; } @@ -509,15 +513,18 @@ error: return NULL; } +#ifndef NDEBUG static inline int mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out) { return in->size == in->pos && out->size != out->pos; } +#endif static PyObject * -compress_mt_continue_impl(ZstdCompressor *self, Py_buffer *data) +compress_mt_continue_lock_held(ZstdCompressor *self, Py_buffer *data) { + assert(PyMutex_IsLocked(&self->lock)); ZSTD_inBuffer in; ZSTD_outBuffer out; _BlocksOutputBuffer buffer = {.list = NULL}; @@ -533,24 +540,25 @@ compress_mt_continue_impl(ZstdCompressor *self, Py_buffer *data) goto error; } - /* zstd stream compress */ + /* Zstandard stream compress */ while (1) { Py_BEGIN_ALLOW_THREADS do { - zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, ZSTD_e_continue); - } while (out.pos != out.size && in.pos != in.size && !ZSTD_isError(zstd_ret)); + zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, + ZSTD_e_continue); + } while (out.pos != out.size + && in.pos != in.size + && !ZSTD_isError(zstd_ret)); Py_END_ALLOW_THREADS /* Check error */ if (ZSTD_isError(zstd_ret)) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state != NULL) { - set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); - } + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); goto error; } - /* Like compress_impl(), output as much as possible. */ + /* Like compress_lock_held(), output as much as possible. */ if (out.pos == out.size) { if (_OutputBuffer_Grow(&buffer, &out) < 0) { goto error; @@ -609,14 +617,14 @@ _zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data, } /* Thread-safe code */ - Py_BEGIN_CRITICAL_SECTION(self); + PyMutex_Lock(&self->lock); /* Compress */ if (self->use_multithread && mode == ZSTD_e_continue) { - ret = compress_mt_continue_impl(self, data); + ret = compress_mt_continue_lock_held(self, data); } else { - ret = compress_impl(self, data, mode); + ret = compress_lock_held(self, data, mode); } if (ret) { @@ -628,7 +636,7 @@ _zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data, /* Resetting cctx's session never fail */ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); } - Py_END_CRITICAL_SECTION(); + PyMutex_Unlock(&self->lock); return ret; } @@ -642,14 +650,14 @@ _zstd.ZstdCompressor.flush Finish the compression process. -Flush any remaining data left in internal buffers. Since zstd data consists -of one or more independent frames, the compressor object can still be used -after this method is called. +Flush any remaining data left in internal buffers. Since Zstandard data +consists of one or more independent frames, the compressor object can still +be used after this method is called. [clinic start generated code]*/ static PyObject * _zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) -/*[clinic end generated code: output=b7cf2c8d64dcf2e3 input=a766870301932b85]*/ +/*[clinic end generated code: output=b7cf2c8d64dcf2e3 input=0ab19627f323cdbc]*/ { PyObject *ret; @@ -663,8 +671,9 @@ _zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) } /* Thread-safe code */ - Py_BEGIN_CRITICAL_SECTION(self); - ret = compress_impl(self, NULL, mode); + PyMutex_Lock(&self->lock); + + ret = compress_lock_held(self, NULL, mode); if (ret) { self->last_mode = mode; @@ -675,26 +684,78 @@ _zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) /* Resetting cctx's session never fail */ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); } - Py_END_CRITICAL_SECTION(); + PyMutex_Unlock(&self->lock); return ret; } + +/*[clinic input] +_zstd.ZstdCompressor.set_pledged_input_size + + size: zstd_contentsize + The size of the uncompressed data to be provided to the compressor. + / + +Set the uncompressed content size to be written into the frame header. + +This method can be used to ensure the header of the frame about to be written +includes the size of the data, unless the CompressionParameter.content_size_flag +is set to False. If last_mode != FLUSH_FRAME, then a RuntimeError is raised. + +It is important to ensure that the pledged data size matches the actual data +size. If they do not match the compressed output data may be corrupted and the +final chunk written may be lost. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdCompressor_set_pledged_input_size_impl(ZstdCompressor *self, + unsigned long long size) +/*[clinic end generated code: output=3a09e55cc0e3b4f9 input=afd8a7d78cff2eb5]*/ +{ + // Error occured while converting argument, should be unreachable + assert(size != ZSTD_CONTENTSIZE_ERROR); + + /* Thread-safe code */ + PyMutex_Lock(&self->lock); + + /* Check the current mode */ + if (self->last_mode != ZSTD_e_end) { + PyErr_SetString(PyExc_ValueError, + "set_pledged_input_size() method must be called " + "when last_mode == FLUSH_FRAME"); + PyMutex_Unlock(&self->lock); + return NULL; + } + + /* Set pledged content size */ + size_t zstd_ret = ZSTD_CCtx_setPledgedSrcSize(self->cctx, size); + PyMutex_Unlock(&self->lock); + if (ZSTD_isError(zstd_ret)) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_SET_PLEDGED_INPUT_SIZE, zstd_ret); + return NULL; + } + + Py_RETURN_NONE; +} + static PyMethodDef ZstdCompressor_methods[] = { _ZSTD_ZSTDCOMPRESSOR_COMPRESS_METHODDEF _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF + _ZSTD_ZSTDCOMPRESSOR_SET_PLEDGED_INPUT_SIZE_METHODDEF {NULL, NULL} }; PyDoc_STRVAR(ZstdCompressor_last_mode_doc, "The last mode used to this compressor object, its value can be .CONTINUE,\n" ".FLUSH_BLOCK, .FLUSH_FRAME. Initialized to .FLUSH_FRAME.\n\n" -"It can be used to get the current state of a compressor, such as, data flushed,\n" -"a frame ended."); +"It can be used to get the current state of a compressor, such as, data\n" +"flushed, or a frame ended."); static PyMemberDef ZstdCompressor_members[] = { {"last_mode", Py_T_INT, offsetof(ZstdCompressor, last_mode), - Py_READONLY, ZstdCompressor_last_mode_doc}, + Py_READONLY, ZstdCompressor_last_mode_doc}, {NULL} }; @@ -717,10 +778,9 @@ ZstdCompressor_clear(PyObject *ob) static PyType_Slot zstdcompressor_slots[] = { {Py_tp_new, _zstd_ZstdCompressor_new}, {Py_tp_dealloc, ZstdCompressor_dealloc}, - {Py_tp_init, _zstd_ZstdCompressor___init__}, {Py_tp_methods, ZstdCompressor_methods}, {Py_tp_members, ZstdCompressor_members}, - {Py_tp_doc, (char*)_zstd_ZstdCompressor___init____doc__}, + {Py_tp_doc, (void *)_zstd_ZstdCompressor_new__doc__}, {Py_tp_traverse, ZstdCompressor_traverse}, {Py_tp_clear, ZstdCompressor_clear}, {0, 0} diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index dc3e90a22d8..c53d6e4cb05 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -1,7 +1,4 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ /* ZstdDecompressor class definition */ @@ -19,7 +16,7 @@ class _zstd.ZstdDecompressor "ZstdDecompressor *" "&zstd_decompressor_type_spec" #include "_zstdmodule.h" #include "buffer.h" -#include "zstddict.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked #include <stdbool.h> // bool #include <stddef.h> // offsetof() @@ -49,8 +46,8 @@ typedef struct { 1 means the end of the first frame has been reached. */ bool eof; - /* __init__ has been called, 0 or 1. */ - bool initialized; + /* Lock to protect the decompression context */ + PyMutex lock; } ZstdDecompressor; #define ZstdDecompressor_CAST(op) ((ZstdDecompressor *)op) @@ -60,151 +57,86 @@ typedef struct { static inline ZSTD_DDict * _get_DDict(ZstdDict *self) { + assert(PyMutex_IsLocked(&self->lock)); ZSTD_DDict *ret; - /* Already created */ - if (self->d_dict != NULL) { - return self->d_dict; - } - - Py_BEGIN_CRITICAL_SECTION(self); if (self->d_dict == NULL) { /* Create ZSTD_DDict instance from dictionary content */ - char *dict_buffer = PyBytes_AS_STRING(self->dict_content); - Py_ssize_t dict_len = Py_SIZE(self->dict_content); Py_BEGIN_ALLOW_THREADS - self->d_dict = ZSTD_createDDict(dict_buffer, - dict_len); + ret = ZSTD_createDDict(self->dict_buffer, self->dict_len); Py_END_ALLOW_THREADS + self->d_dict = ret; if (self->d_dict == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, - "Failed to create ZSTD_DDict instance from zstd " - "dictionary content. Maybe the content is corrupted."); + "Failed to create a ZSTD_DDict instance from " + "Zstandard dictionary content."); } } } - /* Don't lose any exception */ - ret = self->d_dict; - Py_END_CRITICAL_SECTION(); - - return ret; + return self->d_dict; } -/* Set decompression parameters to decompression context */ static int _zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) { - size_t zstd_ret; - PyObject *key, *value; - Py_ssize_t pos; - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state == NULL) { return -1; } if (!PyDict_Check(options)) { - PyErr_SetString(PyExc_TypeError, - "options argument should be dict object."); + PyErr_Format(PyExc_TypeError, + "ZstdDecompressor() argument 'options' must be dict, not %T", + options); return -1; } - pos = 0; + Py_ssize_t pos = 0; + PyObject *key, *value; while (PyDict_Next(options, &pos, &key, &value)) { /* Check key type */ if (Py_TYPE(key) == mod_state->CParameter_type) { PyErr_SetString(PyExc_TypeError, - "Key of decompression options dict should " - "NOT be CompressionParameter."); + "compression options dictionary key must not be a " + "CompressionParameter attribute"); return -1; } - /* Both key & value should be 32-bit signed int */ + Py_INCREF(key); + Py_INCREF(value); int key_v = PyLong_AsInt(key); + Py_DECREF(key); if (key_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Key of options dict should be a DecompressionParameter attribute."); return -1; } - // TODO(emmatyping): check bounds when there is a value error here for better - // error message? int value_v = PyLong_AsInt(value); + Py_DECREF(value); if (value_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Value of options dict should be an int."); return -1; } /* Set parameter to compression context */ - Py_BEGIN_CRITICAL_SECTION(self); - zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v); - Py_END_CRITICAL_SECTION(); + size_t zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v); /* Check error */ if (ZSTD_isError(zstd_ret)) { - set_parameter_error(mod_state, 0, key_v, value_v); + set_parameter_error(0, key_v, value_v); return -1; } } return 0; } -/* Load dictionary or prefix to decompression context */ static int -_zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) +_zstd_load_impl(ZstdDecompressor *self, ZstdDict *zd, + _zstd_state *mod_state, int type) { size_t zstd_ret; - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return -1; - } - ZstdDict *zd; - int type, ret; - - /* Check ZstdDict */ - ret = PyObject_IsInstance(dict, (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { - return -1; - } - else if (ret > 0) { - /* When decompressing, use digested dictionary by default. */ - zd = (ZstdDict*)dict; - type = DICT_TYPE_DIGESTED; - goto load; - } - - /* Check (ZstdDict, type) */ - if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2) { - /* Check ZstdDict */ - ret = PyObject_IsInstance(PyTuple_GET_ITEM(dict, 0), - (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { - return -1; - } - else if (ret > 0) { - /* type == -1 may indicate an error. */ - type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); - if (type == DICT_TYPE_DIGESTED || - type == DICT_TYPE_UNDIGESTED || - type == DICT_TYPE_PREFIX) - { - assert(type >= 0); - zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); - goto load; - } - } - } - - /* Wrong type */ - PyErr_SetString(PyExc_TypeError, - "zstd_dict argument should be ZstdDict object."); - return -1; - -load: if (type == DICT_TYPE_DIGESTED) { /* Get ZSTD_DDict */ ZSTD_DDict *d_dict = _get_DDict(zd); @@ -212,27 +144,17 @@ load: return -1; } /* Reference a prepared dictionary */ - Py_BEGIN_CRITICAL_SECTION(self); zstd_ret = ZSTD_DCtx_refDDict(self->dctx, d_dict); - Py_END_CRITICAL_SECTION(); } else if (type == DICT_TYPE_UNDIGESTED) { /* Load a dictionary */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_DCtx_loadDictionary( - self->dctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); + zstd_ret = ZSTD_DCtx_loadDictionary(self->dctx, zd->dict_buffer, + zd->dict_len); } else if (type == DICT_TYPE_PREFIX) { /* Load a prefix */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_DCtx_refPrefix( - self->dctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); + zstd_ret = ZSTD_DCtx_refPrefix(self->dctx, zd->dict_buffer, + zd->dict_len); } else { /* Impossible code path */ @@ -249,6 +171,24 @@ load: return 0; } +/* Load dictionary or prefix to decompression context */ +static int +_zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) +{ + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + /* When decompressing, use digested dictionary by default. */ + int type = DICT_TYPE_DIGESTED; + ZstdDict *zd = _Py_parse_zstd_dict(mod_state, dict, &type); + if (zd == NULL) { + return -1; + } + int ret; + PyMutex_Lock(&zd->lock); + ret = _zstd_load_impl(self, zd, mod_state, type); + PyMutex_Unlock(&zd->lock); + return ret; +} + /* Decompress implementation in pseudo code: @@ -265,8 +205,8 @@ load: finish ZSTD_decompressStream()'s size_t return value: - - 0 when a frame is completely decoded and fully flushed, zstd's internal - buffer has no data. + - 0 when a frame is completely decoded and fully flushed, + zstd's internal buffer has no data. - An error code, which can be tested using ZSTD_isError(). - Or any other value > 0, which means there is still some decoding or flushing to do to complete current frame. @@ -274,8 +214,8 @@ load: Note, decompressing "an empty input" in any case will make it > 0. */ static PyObject * -decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, - Py_ssize_t max_length) +decompress_lock_held(ZstdDecompressor *self, ZSTD_inBuffer *in, + Py_ssize_t max_length) { size_t zstd_ret; ZSTD_outBuffer out; @@ -296,10 +236,8 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, /* Check error */ if (ZSTD_isError(zstd_ret)) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state != NULL) { - set_zstd_error(mod_state, ERR_DECOMPRESS, zstd_ret); - } + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_DECOMPRESS, zstd_ret); goto error; } @@ -311,7 +249,7 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, } /* Need to check out before in. Maybe zstd's internal buffer still has - a few bytes can be output, grow the buffer and continue. */ + a few bytes that can be output, grow the buffer and continue. */ if (out.pos == out.size) { /* Output buffer exhausted */ @@ -345,10 +283,9 @@ error: } static void -decompressor_reset_session(ZstdDecompressor *self) +decompressor_reset_session_lock_held(ZstdDecompressor *self) { - // TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here - // and ensure lock is always held + assert(PyMutex_IsLocked(&self->lock)); /* Reset variables */ self->in_begin = 0; @@ -365,15 +302,18 @@ decompressor_reset_session(ZstdDecompressor *self) } static PyObject * -stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length) +stream_decompress_lock_held(ZstdDecompressor *self, Py_buffer *data, + Py_ssize_t max_length) { + assert(PyMutex_IsLocked(&self->lock)); ZSTD_inBuffer in; PyObject *ret = NULL; int use_input_buffer; /* Check .eof flag */ if (self->eof) { - PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame."); + PyErr_SetString(PyExc_EOFError, + "Already at the end of a Zstandard frame."); assert(ret == NULL); return NULL; } @@ -462,7 +402,7 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length assert(in.pos == 0); /* Decompress */ - ret = decompress_impl(self, &in, max_length); + ret = decompress_lock_held(self, &in, max_length); if (ret == NULL) { goto error; } @@ -490,8 +430,8 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length if (!use_input_buffer) { /* Discard buffer if it's too small (resizing it may needlessly copy the current contents) */ - if (self->input_buffer != NULL && - self->input_buffer_size < data_size) + if (self->input_buffer != NULL + && self->input_buffer_size < data_size) { PyMem_Free(self->input_buffer); self->input_buffer = NULL; @@ -523,30 +463,45 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length error: /* Reset decompressor's states/session */ - decompressor_reset_session(self); + decompressor_reset_session_lock_held(self); Py_CLEAR(ret); return NULL; } +/*[clinic input] +@classmethod +_zstd.ZstdDecompressor.__new__ as _zstd_ZstdDecompressor_new + zstd_dict: object = None + A ZstdDict object, a pre-trained Zstandard dictionary. + options: object = None + A dict object that contains advanced decompression parameters. + +Create a decompressor object for decompressing data incrementally. + +Thread-safe at method level. For one-shot decompression, use the decompress() +function instead. +[clinic start generated code]*/ + static PyObject * -_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +_zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict, + PyObject *options) +/*[clinic end generated code: output=590ca65c1102ff4a input=213daa57e3ea4062]*/ { - ZstdDecompressor *self; - self = PyObject_GC_New(ZstdDecompressor, type); + ZstdDecompressor* self = PyObject_GC_New(ZstdDecompressor, type); if (self == NULL) { goto error; } - self->initialized = 0; - self->dict = NULL; self->input_buffer = NULL; self->input_buffer_size = 0; self->in_begin = -1; self->in_end = -1; self->unused_data = NULL; self->eof = 0; + self->dict = NULL; + self->lock = (PyMutex){0}; /* needs_input flag */ self->needs_input = 1; @@ -554,7 +509,7 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds) /* Decompression context */ self->dctx = ZSTD_createDCtx(); if (self->dctx == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, "Unable to create ZSTD_DCtx instance."); @@ -562,12 +517,29 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds) goto error; } + /* Load Zstandard dictionary to decompression context */ + if (zstd_dict != Py_None) { + if (_zstd_load_d_dict(self, zstd_dict) < 0) { + goto error; + } + Py_INCREF(zstd_dict); + self->dict = zstd_dict; + } + + /* Set options dictionary */ + if (options != Py_None) { + if (_zstd_set_d_parameters(self, options) < 0) { + goto error; + } + } + + // We can only start GC tracking once self->dict is set. + PyObject_GC_Track(self); + return (PyObject*)self; error: - if (self != NULL) { - PyObject_GC_Del(self); - } + Py_XDECREF(self); return NULL; } @@ -579,7 +551,11 @@ ZstdDecompressor_dealloc(PyObject *ob) PyObject_GC_UnTrack(self); /* Free decompression context */ - ZSTD_freeDCtx(self->dctx); + if (self->dctx) { + ZSTD_freeDCtx(self->dctx); + } + + assert(!PyMutex_IsLocked(&self->lock)); /* Py_CLEAR the dict after free decompression context */ Py_CLEAR(self->dict); @@ -596,56 +572,6 @@ ZstdDecompressor_dealloc(PyObject *ob) } /*[clinic input] -_zstd.ZstdDecompressor.__init__ - - zstd_dict: object = None - A ZstdDict object, a pre-trained zstd dictionary. - options: object = None - A dict object that contains advanced decompression parameters. - -Create a decompressor object for decompressing data incrementally. - -Thread-safe at method level. For one-shot decompression, use the decompress() -function instead. -[clinic start generated code]*/ - -static int -_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self, - PyObject *zstd_dict, PyObject *options) -/*[clinic end generated code: output=703af2f1ec226642 input=8fd72999acc1a146]*/ -{ - /* Only called once */ - if (self->initialized) { - PyErr_SetString(PyExc_RuntimeError, "reinitialization not supported"); - return -1; - } - self->initialized = 1; - - /* Load dictionary to decompression context */ - if (zstd_dict != Py_None) { - if (_zstd_load_d_dict(self, zstd_dict) < 0) { - return -1; - } - - /* Py_INCREF the dict */ - Py_INCREF(zstd_dict); - self->dict = zstd_dict; - } - - /* Set option to decompression context */ - if (options != Py_None) { - if (_zstd_set_d_parameters(self, options) < 0) { - return -1; - } - } - - // We can only start tracking self with the GC once self->dict is set. - PyObject_GC_Track(self); - return 0; -} - -/*[clinic input] -@critical_section @getter _zstd.ZstdDecompressor.unused_data @@ -657,11 +583,14 @@ decompressed, unused input data after the frame. Otherwise this will be b''. static PyObject * _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) -/*[clinic end generated code: output=f3a20940f11b6b09 input=5233800bef00df04]*/ +/*[clinic end generated code: output=f3a20940f11b6b09 input=54d41ecd681a3444]*/ { PyObject *ret; + PyMutex_Lock(&self->lock); + if (!self->eof) { + PyMutex_Unlock(&self->lock); return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } else { @@ -678,6 +607,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) } } + PyMutex_Unlock(&self->lock); return ret; } @@ -685,7 +615,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) _zstd.ZstdDecompressor.decompress data: Py_buffer - A bytes-like object, zstd data to be decompressed. + A bytes-like object, Zstandard data to be decompressed. max_length: Py_ssize_t = -1 Maximum size of returned data. When it is negative, the size of output buffer is unlimited. When it is nonnegative, returns at @@ -711,14 +641,13 @@ static PyObject * _zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length) -/*[clinic end generated code: output=a4302b3c940dbec6 input=830e455bc9a50b6e]*/ +/*[clinic end generated code: output=a4302b3c940dbec6 input=6463dfdf98091caa]*/ { PyObject *ret; /* Thread-safe code */ - Py_BEGIN_CRITICAL_SECTION(self); - - ret = stream_decompress(self, data, max_length); - Py_END_CRITICAL_SECTION(); + PyMutex_Lock(&self->lock); + ret = stream_decompress_lock_held(self, data, max_length); + PyMutex_Unlock(&self->lock); return ret; } @@ -732,9 +661,10 @@ PyDoc_STRVAR(ZstdDecompressor_eof_doc, "after that, an EOFError exception will be raised."); PyDoc_STRVAR(ZstdDecompressor_needs_input_doc, -"If the max_length output limit in .decompress() method has been reached, and\n" -"the decompressor has (or may has) unconsumed input data, it will be set to\n" -"False. In this case, pass b'' to .decompress() method may output further data."); +"If the max_length output limit in .decompress() method has been reached,\n" +"and the decompressor has (or may has) unconsumed input data, it will be set\n" +"to False. In this case, passing b'' to the .decompress() method may output\n" +"further data."); static PyMemberDef ZstdDecompressor_members[] = { {"eof", Py_T_BOOL, offsetof(ZstdDecompressor, eof), @@ -769,11 +699,10 @@ ZstdDecompressor_clear(PyObject *ob) static PyType_Slot ZstdDecompressor_slots[] = { {Py_tp_new, _zstd_ZstdDecompressor_new}, {Py_tp_dealloc, ZstdDecompressor_dealloc}, - {Py_tp_init, _zstd_ZstdDecompressor___init__}, {Py_tp_methods, ZstdDecompressor_methods}, {Py_tp_members, ZstdDecompressor_members}, {Py_tp_getset, ZstdDecompressor_getset}, - {Py_tp_doc, (char*)_zstd_ZstdDecompressor___init____doc__}, + {Py_tp_doc, (void *)_zstd_ZstdDecompressor_new__doc__}, {Py_tp_traverse, ZstdDecompressor_traverse}, {Py_tp_clear, ZstdDecompressor_clear}, {0, 0} diff --git a/Modules/_zstd/zstddict.c b/Modules/_zstd/zstddict.c index 47bc8a84ca2..14f74aaed46 100644 --- a/Modules/_zstd/zstddict.c +++ b/Modules/_zstd/zstddict.c @@ -1,7 +1,4 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ /* ZstdDict class definitions */ @@ -18,25 +15,53 @@ class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec" #include "Python.h" #include "_zstdmodule.h" -#include "zstddict.h" #include "clinic/zstddict.c.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked #include <zstd.h> // ZSTD_freeDDict(), ZSTD_getDictID_fromDict() #define ZstdDict_CAST(op) ((ZstdDict *)op) +/*[clinic input] +@classmethod +_zstd.ZstdDict.__new__ as _zstd_ZstdDict_new + dict_content: Py_buffer + The content of a Zstandard dictionary as a bytes-like object. + / + * + is_raw: bool = False + If true, perform no checks on *dict_content*, useful for some + advanced cases. Otherwise, check that the content represents + a Zstandard dictionary created by the zstd library or CLI. + +Represents a Zstandard dictionary. + +The dictionary can be used for compression or decompression, and can be shared +by multiple ZstdCompressor or ZstdDecompressor objects. +[clinic start generated code]*/ + static PyObject * -_zstd_ZstdDict_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs)) +_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content, + int is_raw) +/*[clinic end generated code: output=685b7406a48b0949 input=9e8c493e31c98383]*/ { - ZstdDict *self; - self = PyObject_GC_New(ZstdDict, type); + /* All dictionaries must be at least 8 bytes */ + if (dict_content->len < 8) { + PyErr_SetString(PyExc_ValueError, + "Zstandard dictionary content too short " + "(must have at least eight bytes)"); + return NULL; + } + + ZstdDict* self = PyObject_GC_New(ZstdDict, type); if (self == NULL) { - goto error; + return NULL; } - self->dict_content = NULL; - self->initialized = 0; self->d_dict = NULL; + self->dict_buffer = NULL; + self->dict_id = 0; + self->lock = (PyMutex){0}; /* ZSTD_CDict dict */ self->c_dicts = PyDict_New(); @@ -44,12 +69,29 @@ _zstd_ZstdDict_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_U goto error; } - return (PyObject*)self; + self->dict_buffer = PyMem_Malloc(dict_content->len); + if (!self->dict_buffer) { + PyErr_NoMemory(); + goto error; + } + memcpy(self->dict_buffer, dict_content->buf, dict_content->len); + self->dict_len = dict_content->len; + + /* Get dict_id, 0 means "raw content" dictionary. */ + self->dict_id = ZSTD_getDictID_fromDict(self->dict_buffer, self->dict_len); -error: - if (self != NULL) { - PyObject_GC_Del(self); + /* Check validity for ordinary dictionary */ + if (!is_raw && self->dict_id == 0) { + PyErr_SetString(PyExc_ValueError, "invalid Zstandard dictionary"); + goto error; } + + PyObject_GC_Track(self); + + return (PyObject *)self; + +error: + Py_XDECREF(self); return NULL; } @@ -61,117 +103,64 @@ ZstdDict_dealloc(PyObject *ob) PyObject_GC_UnTrack(self); /* Free ZSTD_DDict instance */ - ZSTD_freeDDict(self->d_dict); + if (self->d_dict) { + ZSTD_freeDDict(self->d_dict); + } - /* Release dict_content after Free ZSTD_CDict/ZSTD_DDict instances */ - Py_CLEAR(self->dict_content); + assert(!PyMutex_IsLocked(&self->lock)); + + /* Release dict_buffer after freeing ZSTD_CDict/ZSTD_DDict instances */ + PyMem_Free(self->dict_buffer); Py_CLEAR(self->c_dicts); PyTypeObject *tp = Py_TYPE(self); - PyObject_GC_Del(ob); + tp->tp_free(self); Py_DECREF(tp); } -/*[clinic input] -_zstd.ZstdDict.__init__ - - dict_content: object - A bytes-like object, dictionary's content. - is_raw: bool = False - This parameter is for advanced user. True means dict_content - argument is a "raw content" dictionary, free of any format - restriction. False means dict_content argument is an ordinary - zstd dictionary, was created by zstd functions, follow a - specified format. - -Represents a zstd dictionary, which can be used for compression/decompression. - -It's thread-safe, and can be shared by multiple ZstdCompressor / -ZstdDecompressor objects. -[clinic start generated code]*/ - -static int -_zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content, - int is_raw) -/*[clinic end generated code: output=c5f5a0d8377d037c input=e6750f62a513b3ee]*/ -{ - /* Only called once */ - if (self->initialized) { - PyErr_SetString(PyExc_RuntimeError, "reinitialization not supported"); - return -1; - } - self->initialized = 1; - - /* Check dict_content's type */ - self->dict_content = PyBytes_FromObject(dict_content); - if (self->dict_content == NULL) { - PyErr_SetString(PyExc_TypeError, - "dict_content argument should be bytes-like object."); - return -1; - } - - /* Both ordinary dictionary and "raw content" dictionary should - at least 8 bytes */ - if (Py_SIZE(self->dict_content) < 8) { - PyErr_SetString(PyExc_ValueError, - "Zstd dictionary content should at least 8 bytes."); - return -1; - } - - /* Get dict_id, 0 means "raw content" dictionary. */ - self->dict_id = ZSTD_getDictID_fromDict(PyBytes_AS_STRING(self->dict_content), - Py_SIZE(self->dict_content)); - - /* Check validity for ordinary dictionary */ - if (!is_raw && self->dict_id == 0) { - char *msg = "The dict_content argument is not a valid zstd " - "dictionary. The first 4 bytes of a valid zstd dictionary " - "should be a magic number: b'\\x37\\xA4\\x30\\xEC'.\n" - "If you are an advanced user, and can be sure that " - "dict_content argument is a \"raw content\" zstd " - "dictionary, set is_raw parameter to True."; - PyErr_SetString(PyExc_ValueError, msg); - return -1; - } - - // Can only track self once self->dict_content is included - PyObject_GC_Track(self); - return 0; -} - PyDoc_STRVAR(ZstdDict_dictid_doc, -"ID of zstd dictionary, a 32-bit unsigned int value.\n\n" -"Non-zero means ordinary dictionary, was created by zstd functions, follow\n" -"a specified format.\n\n" -"0 means a \"raw content\" dictionary, free of any format restriction, used\n" -"for advanced user."); - -PyDoc_STRVAR(ZstdDict_dictcontent_doc, -"The content of zstd dictionary, a bytes object, it's the same as dict_content\n" -"argument in ZstdDict.__init__() method. It can be used with other programs."); +"the Zstandard dictionary, an int between 0 and 2**32.\n\n" +"A non-zero value represents an ordinary Zstandard dictionary, " +"conforming to the standardised format.\n\n" +"The special value '0' means a 'raw content' dictionary," +"without any restrictions on format or content."); static PyObject * -ZstdDict_str(PyObject *ob) +ZstdDict_repr(PyObject *ob) { ZstdDict *dict = ZstdDict_CAST(ob); return PyUnicode_FromFormat("<ZstdDict dict_id=%u dict_size=%zd>", - dict->dict_id, Py_SIZE(dict->dict_content)); + (unsigned int)dict->dict_id, dict->dict_len); } static PyMemberDef ZstdDict_members[] = { {"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, ZstdDict_dictid_doc}, - {"dict_content", Py_T_OBJECT_EX, offsetof(ZstdDict, dict_content), Py_READONLY, ZstdDict_dictcontent_doc}, {NULL} }; /*[clinic input] -@critical_section +@getter +_zstd.ZstdDict.dict_content + +The content of a Zstandard dictionary, as a bytes object. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self) +/*[clinic end generated code: output=0d05caa5b550eabb input=4ed526d1c151c596]*/ +{ + return PyBytes_FromStringAndSize(self->dict_buffer, self->dict_len); +} + +/*[clinic input] @getter _zstd.ZstdDict.as_digested_dict Load as a digested dictionary to compressor. -Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict) +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_digested_dict) + 1. Some advanced compression parameters of compressor may be overridden by parameters of digested dictionary. 2. ZstdDict has a digested dictionaries cache for each compression level. @@ -182,19 +171,20 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digeste static PyObject * _zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self) -/*[clinic end generated code: output=09b086e7a7320dbb input=585448c79f31f74a]*/ +/*[clinic end generated code: output=09b086e7a7320dbb input=ee45e1b4a48f6f2c]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_DIGESTED); } /*[clinic input] -@critical_section @getter _zstd.ZstdDict.as_undigested_dict Load as an undigested dictionary to compressor. -Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict) +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_undigested_dict) + 1. The advanced compression parameters of compressor will not be overridden. 2. Loading an undigested dictionary is costly. If load an undigested dictionary multiple times, consider reusing a compressor object. @@ -203,19 +193,20 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undiges static PyObject * _zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self) -/*[clinic end generated code: output=43c7a989e6d4253a input=022b0829ffb1c220]*/ +/*[clinic end generated code: output=43c7a989e6d4253a input=d39210eedec76fed]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_UNDIGESTED); } /*[clinic input] -@critical_section @getter _zstd.ZstdDict.as_prefix Load as a prefix to compressor/decompressor. -Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix) +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_prefix) + 1. Prefix is compatible with long distance matching, while dictionary is not. 2. It only works for the first frame, then the compressor/decompressor will return to no prefix state. @@ -224,12 +215,13 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix) static PyObject * _zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self) -/*[clinic end generated code: output=6f7130c356595a16 input=09fb82a6a5407e87]*/ +/*[clinic end generated code: output=6f7130c356595a16 input=d59757b0b5a9551a]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_PREFIX); } static PyGetSetDef ZstdDict_getset[] = { + _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF @@ -240,8 +232,7 @@ static Py_ssize_t ZstdDict_length(PyObject *ob) { ZstdDict *self = ZstdDict_CAST(ob); - assert(PyBytes_Check(self->dict_content)); - return Py_SIZE(self->dict_content); + return self->dict_len; } static int @@ -249,7 +240,6 @@ ZstdDict_traverse(PyObject *ob, visitproc visit, void *arg) { ZstdDict *self = ZstdDict_CAST(ob); Py_VISIT(self->c_dicts); - Py_VISIT(self->dict_content); return 0; } @@ -257,7 +247,7 @@ static int ZstdDict_clear(PyObject *ob) { ZstdDict *self = ZstdDict_CAST(ob); - Py_CLEAR(self->dict_content); + Py_CLEAR(self->c_dicts); return 0; } @@ -266,9 +256,8 @@ static PyType_Slot zstddict_slots[] = { {Py_tp_getset, ZstdDict_getset}, {Py_tp_new, _zstd_ZstdDict_new}, {Py_tp_dealloc, ZstdDict_dealloc}, - {Py_tp_init, _zstd_ZstdDict___init__}, - {Py_tp_str, ZstdDict_str}, - {Py_tp_doc, (char*)_zstd_ZstdDict___init____doc__}, + {Py_tp_repr, ZstdDict_repr}, + {Py_tp_doc, (void *)_zstd_ZstdDict_new__doc__}, {Py_sq_length, ZstdDict_length}, {Py_tp_traverse, ZstdDict_traverse}, {Py_tp_clear, ZstdDict_clear}, diff --git a/Modules/_zstd/zstddict.h b/Modules/_zstd/zstddict.h index e82498f5dd1..4a403416dbd 100644 --- a/Modules/_zstd/zstddict.h +++ b/Modules/_zstd/zstddict.h @@ -1,12 +1,8 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ #ifndef ZSTD_DICT_H #define ZSTD_DICT_H -#include <stdbool.h> // bool #include <zstd.h> // ZSTD_DDict typedef struct { @@ -19,13 +15,15 @@ typedef struct { ZSTD_DDict *d_dict; PyObject *c_dicts; - /* Content of the dictionary, bytes object. */ - PyObject *dict_content; + /* Dictionary content. */ + char *dict_buffer; + Py_ssize_t dict_len; + /* Dictionary id */ uint32_t dict_id; - /* __init__ has been called, 0 or 1. */ - bool initialized; + /* Lock to protect the digested dictionaries */ + PyMutex lock; } ZstdDict; #endif // !ZSTD_DICT_H |