diff options
Diffstat (limited to 'Modules/_zstd')
-rw-r--r-- | Modules/_zstd/_zstdmodule.c | 767 | ||||
-rw-r--r-- | Modules/_zstd/_zstdmodule.h | 61 | ||||
-rw-r--r-- | Modules/_zstd/buffer.h | 108 | ||||
-rw-r--r-- | Modules/_zstd/clinic/_zstdmodule.c.h | 429 | ||||
-rw-r--r-- | Modules/_zstd/clinic/compressor.c.h | 294 | ||||
-rw-r--r-- | Modules/_zstd/clinic/decompressor.c.h | 223 | ||||
-rw-r--r-- | Modules/_zstd/clinic/zstddict.c.h | 225 | ||||
-rw-r--r-- | Modules/_zstd/compressor.c | 797 | ||||
-rw-r--r-- | Modules/_zstd/decompressor.c | 717 | ||||
-rw-r--r-- | Modules/_zstd/zstddict.c | 273 | ||||
-rw-r--r-- | Modules/_zstd/zstddict.h | 29 |
11 files changed, 3923 insertions, 0 deletions
diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c new file mode 100644 index 00000000000..d75c0779474 --- /dev/null +++ b/Modules/_zstd/_zstdmodule.c @@ -0,0 +1,767 @@ +/* Low level interface to the Zstandard algorthm & the zstd library. */ + +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 +#endif + +#include "Python.h" + +#include "_zstdmodule.h" + +#include <zstd.h> // ZSTD_*() +#include <zdict.h> // ZDICT_*() + +/*[clinic input] +module _zstd + +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b5f5587aac15c14]*/ +#include "clinic/_zstdmodule.c.h" + + +ZstdDict * +_Py_parse_zstd_dict(const _zstd_state *state, PyObject *dict, int *ptype) +{ + if (state == NULL) { + return NULL; + } + + /* Check ZstdDict */ + if (PyObject_TypeCheck(dict, state->ZstdDict_type)) { + return (ZstdDict*)dict; + } + + /* Check (ZstdDict, type) */ + if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2 + && PyObject_TypeCheck(PyTuple_GET_ITEM(dict, 0), state->ZstdDict_type) + && PyLong_Check(PyTuple_GET_ITEM(dict, 1))) + { + int type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); + if (type == -1 && PyErr_Occurred()) { + return NULL; + } + if (type == DICT_TYPE_DIGESTED + || type == DICT_TYPE_UNDIGESTED + || type == DICT_TYPE_PREFIX) + { + *ptype = type; + return (ZstdDict*)PyTuple_GET_ITEM(dict, 0); + } + } + + /* Wrong type */ + PyErr_SetString(PyExc_TypeError, + "zstd_dict argument should be a ZstdDict object."); + return NULL; +} + +/* Format error message and set ZstdError. */ +void +set_zstd_error(const _zstd_state *state, error_type type, size_t zstd_ret) +{ + const char *msg; + assert(ZSTD_isError(zstd_ret)); + + if (state == NULL) { + return; + } + switch (type) { + case ERR_DECOMPRESS: + msg = "Unable to decompress Zstandard data: %s"; + break; + case ERR_COMPRESS: + msg = "Unable to compress Zstandard data: %s"; + break; + case ERR_SET_PLEDGED_INPUT_SIZE: + msg = "Unable to set pledged uncompressed content size: %s"; + break; + + case ERR_LOAD_D_DICT: + msg = "Unable to load Zstandard dictionary or prefix for " + "decompression: %s"; + break; + case ERR_LOAD_C_DICT: + msg = "Unable to load Zstandard dictionary or prefix for " + "compression: %s"; + break; + + case ERR_GET_C_BOUNDS: + msg = "Unable to get zstd compression parameter bounds: %s"; + break; + case ERR_GET_D_BOUNDS: + msg = "Unable to get zstd decompression parameter bounds: %s"; + break; + case ERR_SET_C_LEVEL: + msg = "Unable to set zstd compression level: %s"; + break; + + case ERR_TRAIN_DICT: + msg = "Unable to train the Zstandard dictionary: %s"; + break; + case ERR_FINALIZE_DICT: + msg = "Unable to finalize the Zstandard dictionary: %s"; + break; + + default: + Py_UNREACHABLE(); + } + PyErr_Format(state->ZstdError, msg, ZSTD_getErrorName(zstd_ret)); +} + +typedef struct { + int parameter; + char parameter_name[32]; +} ParameterInfo; + +static const ParameterInfo cp_list[] = { + {ZSTD_c_compressionLevel, "compression_level"}, + {ZSTD_c_windowLog, "window_log"}, + {ZSTD_c_hashLog, "hash_log"}, + {ZSTD_c_chainLog, "chain_log"}, + {ZSTD_c_searchLog, "search_log"}, + {ZSTD_c_minMatch, "min_match"}, + {ZSTD_c_targetLength, "target_length"}, + {ZSTD_c_strategy, "strategy"}, + + {ZSTD_c_enableLongDistanceMatching, "enable_long_distance_matching"}, + {ZSTD_c_ldmHashLog, "ldm_hash_log"}, + {ZSTD_c_ldmMinMatch, "ldm_min_match"}, + {ZSTD_c_ldmBucketSizeLog, "ldm_bucket_size_log"}, + {ZSTD_c_ldmHashRateLog, "ldm_hash_rate_log"}, + + {ZSTD_c_contentSizeFlag, "content_size_flag"}, + {ZSTD_c_checksumFlag, "checksum_flag"}, + {ZSTD_c_dictIDFlag, "dict_id_flag"}, + + {ZSTD_c_nbWorkers, "nb_workers"}, + {ZSTD_c_jobSize, "job_size"}, + {ZSTD_c_overlapLog, "overlap_log"} +}; + +static const ParameterInfo dp_list[] = { + {ZSTD_d_windowLogMax, "window_log_max"} +}; + +void +set_parameter_error(int is_compress, int key_v, int value_v) +{ + ParameterInfo const *list; + int list_size; + char *type; + ZSTD_bounds bounds; + char pos_msg[64]; + + if (is_compress) { + list = cp_list; + list_size = Py_ARRAY_LENGTH(cp_list); + type = "compression"; + } + else { + list = dp_list; + list_size = Py_ARRAY_LENGTH(dp_list); + type = "decompression"; + } + + /* Find parameter's name */ + char const *name = NULL; + for (int i = 0; i < list_size; i++) { + if (key_v == (list+i)->parameter) { + name = (list+i)->parameter_name; + break; + } + } + + /* Unknown parameter */ + if (name == NULL) { + PyOS_snprintf(pos_msg, sizeof(pos_msg), + "unknown parameter (key %d)", key_v); + name = pos_msg; + } + + /* Get parameter bounds */ + if (is_compress) { + bounds = ZSTD_cParam_getBounds(key_v); + } + else { + bounds = ZSTD_dParam_getBounds(key_v); + } + if (ZSTD_isError(bounds.error)) { + PyErr_Format(PyExc_ValueError, "invalid %s parameter '%s'", + type, name); + return; + } + + /* Error message */ + PyErr_Format(PyExc_ValueError, + "%s parameter '%s' received an illegal value %d; " + "the valid range is [%d, %d]", + type, name, value_v, bounds.lowerBound, bounds.upperBound); +} + +static inline _zstd_state* +get_zstd_state(PyObject *module) +{ + void *state = PyModule_GetState(module); + assert(state != NULL); + return (_zstd_state *)state; +} + +static Py_ssize_t +calculate_samples_stats(PyBytesObject *samples_bytes, PyObject *samples_sizes, + size_t **chunk_sizes) +{ + Py_ssize_t chunks_number; + Py_ssize_t sizes_sum; + Py_ssize_t i; + + chunks_number = PyTuple_GET_SIZE(samples_sizes); + if ((size_t) chunks_number > UINT32_MAX) { + PyErr_Format(PyExc_ValueError, + "The number of samples should be <= %u.", UINT32_MAX); + return -1; + } + + /* Prepare chunk_sizes */ + *chunk_sizes = PyMem_New(size_t, chunks_number); + if (*chunk_sizes == NULL) { + PyErr_NoMemory(); + return -1; + } + + sizes_sum = PyBytes_GET_SIZE(samples_bytes); + for (i = 0; i < chunks_number; i++) { + size_t size = PyLong_AsSize_t(PyTuple_GET_ITEM(samples_sizes, i)); + (*chunk_sizes)[i] = size; + if (size == (size_t)-1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + goto sum_error; + } + return -1; + } + if ((size_t)sizes_sum < size) { + goto sum_error; + } + sizes_sum -= size; + } + + if (sizes_sum != 0) { +sum_error: + PyErr_SetString(PyExc_ValueError, + "The samples size tuple doesn't match the " + "concatenation's size."); + return -1; + } + return chunks_number; +} + + +/*[clinic input] +_zstd.train_dict + + samples_bytes: PyBytesObject + Concatenation of samples. + samples_sizes: object(subclass_of='&PyTuple_Type') + Tuple of samples' sizes. + dict_size: Py_ssize_t + The size of the dictionary. + / + +Train a Zstandard dictionary on sample data. +[clinic start generated code]*/ + +static PyObject * +_zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size) +/*[clinic end generated code: output=8e87fe43935e8f77 input=d20dedb21c72cb62]*/ +{ + PyObject *dst_dict_bytes = NULL; + size_t *chunk_sizes = NULL; + Py_ssize_t chunks_number; + size_t zstd_ret; + + /* Check arguments */ + if (dict_size <= 0) { + PyErr_SetString(PyExc_ValueError, + "dict_size argument should be positive number."); + return NULL; + } + + /* Check that the samples are valid and get their sizes */ + chunks_number = calculate_samples_stats(samples_bytes, samples_sizes, + &chunk_sizes); + if (chunks_number < 0) { + goto error; + } + + /* Allocate dict buffer */ + dst_dict_bytes = PyBytes_FromStringAndSize(NULL, dict_size); + if (dst_dict_bytes == NULL) { + goto error; + } + + /* Train the dictionary */ + char *dst_dict_buffer = PyBytes_AS_STRING(dst_dict_bytes); + const char *samples_buffer = PyBytes_AS_STRING(samples_bytes); + Py_BEGIN_ALLOW_THREADS + zstd_ret = ZDICT_trainFromBuffer(dst_dict_buffer, dict_size, + samples_buffer, + chunk_sizes, (uint32_t)chunks_number); + Py_END_ALLOW_THREADS + + /* Check Zstandard dict error */ + if (ZDICT_isError(zstd_ret)) { + _zstd_state* mod_state = get_zstd_state(module); + set_zstd_error(mod_state, ERR_TRAIN_DICT, zstd_ret); + goto error; + } + + /* Resize dict_buffer */ + if (_PyBytes_Resize(&dst_dict_bytes, zstd_ret) < 0) { + goto error; + } + + goto success; + +error: + Py_CLEAR(dst_dict_bytes); + +success: + PyMem_Free(chunk_sizes); + return dst_dict_bytes; +} + +/*[clinic input] +_zstd.finalize_dict + + custom_dict_bytes: PyBytesObject + Custom dictionary content. + samples_bytes: PyBytesObject + Concatenation of samples. + samples_sizes: object(subclass_of='&PyTuple_Type') + Tuple of samples' sizes. + dict_size: Py_ssize_t + The size of the dictionary. + compression_level: int + Optimize for a specific Zstandard compression level, 0 means default. + / + +Finalize a Zstandard dictionary. +[clinic start generated code]*/ + +static PyObject * +_zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, + PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size, + int compression_level) +/*[clinic end generated code: output=f91821ba5ae85bda input=3c7e2480aa08fb56]*/ +{ + Py_ssize_t chunks_number; + size_t *chunk_sizes = NULL; + PyObject *dst_dict_bytes = NULL; + size_t zstd_ret; + ZDICT_params_t params; + + /* Check arguments */ + if (dict_size <= 0) { + PyErr_SetString(PyExc_ValueError, + "dict_size argument should be positive number."); + return NULL; + } + + /* Check that the samples are valid and get their sizes */ + chunks_number = calculate_samples_stats(samples_bytes, samples_sizes, + &chunk_sizes); + if (chunks_number < 0) { + goto error; + } + + /* Allocate dict buffer */ + dst_dict_bytes = PyBytes_FromStringAndSize(NULL, dict_size); + if (dst_dict_bytes == NULL) { + goto error; + } + + /* Parameters */ + + /* Optimize for a specific Zstandard compression level, 0 means default. */ + params.compressionLevel = compression_level; + /* Write log to stderr, 0 = none. */ + params.notificationLevel = 0; + /* Force dictID value, 0 means auto mode (32-bits random value). */ + params.dictID = 0; + + /* Finalize the dictionary */ + Py_BEGIN_ALLOW_THREADS + zstd_ret = ZDICT_finalizeDictionary( + PyBytes_AS_STRING(dst_dict_bytes), dict_size, + PyBytes_AS_STRING(custom_dict_bytes), + Py_SIZE(custom_dict_bytes), + PyBytes_AS_STRING(samples_bytes), chunk_sizes, + (uint32_t)chunks_number, params); + Py_END_ALLOW_THREADS + + /* Check Zstandard dict error */ + if (ZDICT_isError(zstd_ret)) { + _zstd_state* mod_state = get_zstd_state(module); + set_zstd_error(mod_state, ERR_FINALIZE_DICT, zstd_ret); + goto error; + } + + /* Resize dict_buffer */ + if (_PyBytes_Resize(&dst_dict_bytes, zstd_ret) < 0) { + goto error; + } + + goto success; + +error: + Py_CLEAR(dst_dict_bytes); + +success: + PyMem_Free(chunk_sizes); + return dst_dict_bytes; +} + + +/*[clinic input] +_zstd.get_param_bounds + + parameter: int + The parameter to get bounds. + is_compress: bool + True for CompressionParameter, False for DecompressionParameter. + +Get CompressionParameter/DecompressionParameter bounds. +[clinic start generated code]*/ + +static PyObject * +_zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress) +/*[clinic end generated code: output=4acf5a876f0620ca input=45742ef0a3531b65]*/ +{ + ZSTD_bounds bound; + if (is_compress) { + bound = ZSTD_cParam_getBounds(parameter); + if (ZSTD_isError(bound.error)) { + _zstd_state* mod_state = get_zstd_state(module); + set_zstd_error(mod_state, ERR_GET_C_BOUNDS, bound.error); + return NULL; + } + } + else { + bound = ZSTD_dParam_getBounds(parameter); + if (ZSTD_isError(bound.error)) { + _zstd_state* mod_state = get_zstd_state(module); + set_zstd_error(mod_state, ERR_GET_D_BOUNDS, bound.error); + return NULL; + } + } + + return Py_BuildValue("ii", bound.lowerBound, bound.upperBound); +} + +/*[clinic input] +_zstd.get_frame_size + + frame_buffer: Py_buffer + A bytes-like object, it should start from the beginning of a frame, + and contains at least one complete frame. + +Get the size of a Zstandard frame, including the header and optional checksum. +[clinic start generated code]*/ + +static PyObject * +_zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer) +/*[clinic end generated code: output=a7384c2f8780f442 input=3b9f73f8c8129d38]*/ +{ + size_t frame_size; + + frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf, + frame_buffer->len); + if (ZSTD_isError(frame_size)) { + _zstd_state* mod_state = get_zstd_state(module); + PyErr_Format(mod_state->ZstdError, + "Error when finding the compressed size of a Zstandard frame. " + "Ensure the frame_buffer argument starts from the " + "beginning of a frame, and its length is not less than this " + "complete frame. Zstd error message: %s.", + ZSTD_getErrorName(frame_size)); + return NULL; + } + + return PyLong_FromSize_t(frame_size); +} + +/*[clinic input] +_zstd.get_frame_info + + frame_buffer: Py_buffer + A bytes-like object, containing the header of a Zstandard frame. + +Get Zstandard frame infomation from a frame header. +[clinic start generated code]*/ + +static PyObject * +_zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) +/*[clinic end generated code: output=56e033cf48001929 input=94b240583ae22ca5]*/ +{ + uint64_t decompressed_size; + uint32_t dict_id; + + /* ZSTD_getFrameContentSize */ + decompressed_size = ZSTD_getFrameContentSize(frame_buffer->buf, + frame_buffer->len); + + /* #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) + #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) */ + if (decompressed_size == ZSTD_CONTENTSIZE_ERROR) { + _zstd_state* mod_state = get_zstd_state(module); + PyErr_SetString(mod_state->ZstdError, + "Error when getting information from the header of " + "a Zstandard frame. Ensure the frame_buffer argument " + "starts from the beginning of a frame, and its length " + "is not less than the frame header (6~18 bytes)."); + return NULL; + } + + /* ZSTD_getDictID_fromFrame */ + dict_id = ZSTD_getDictID_fromFrame(frame_buffer->buf, frame_buffer->len); + + /* Build tuple */ + if (decompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) { + return Py_BuildValue("OI", Py_None, dict_id); + } + return Py_BuildValue("KI", decompressed_size, dict_id); +} + +/*[clinic input] +_zstd.set_parameter_types + + c_parameter_type: object(subclass_of='&PyType_Type') + CompressionParameter IntEnum type object + d_parameter_type: object(subclass_of='&PyType_Type') + DecompressionParameter IntEnum type object + +Set CompressionParameter and DecompressionParameter types for validity check. +[clinic start generated code]*/ + +static PyObject * +_zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, + PyObject *d_parameter_type) +/*[clinic end generated code: output=f3313b1294f19502 input=75d7a953580fae5f]*/ +{ + _zstd_state* mod_state = get_zstd_state(module); + + Py_INCREF(c_parameter_type); + Py_XSETREF(mod_state->CParameter_type, (PyTypeObject*)c_parameter_type); + Py_INCREF(d_parameter_type); + Py_XSETREF(mod_state->DParameter_type, (PyTypeObject*)d_parameter_type); + + Py_RETURN_NONE; +} + +static PyMethodDef _zstd_methods[] = { + _ZSTD_TRAIN_DICT_METHODDEF + _ZSTD_FINALIZE_DICT_METHODDEF + _ZSTD_GET_PARAM_BOUNDS_METHODDEF + _ZSTD_GET_FRAME_SIZE_METHODDEF + _ZSTD_GET_FRAME_INFO_METHODDEF + _ZSTD_SET_PARAMETER_TYPES_METHODDEF + {NULL, NULL} +}; + +static int +_zstd_exec(PyObject *m) +{ +#define ADD_TYPE(TYPE, SPEC) \ +do { \ + TYPE = (PyTypeObject *)PyType_FromModuleAndSpec(m, &(SPEC), NULL); \ + if (TYPE == NULL) { \ + return -1; \ + } \ + if (PyModule_AddType(m, TYPE) < 0) { \ + return -1; \ + } \ +} while (0) + +#define ADD_INT_MACRO(MACRO) \ + if (PyModule_AddIntConstant((m), #MACRO, (MACRO)) < 0) { \ + return -1; \ + } + +#define ADD_INT_CONST_TO_TYPE(TYPE, NAME, VALUE) \ +do { \ + PyObject *v = PyLong_FromLong((VALUE)); \ + if (v == NULL || PyObject_SetAttrString((PyObject *)(TYPE), \ + (NAME), v) < 0) { \ + Py_XDECREF(v); \ + return -1; \ + } \ + Py_DECREF(v); \ +} while (0) + + _zstd_state* mod_state = get_zstd_state(m); + + /* Reusable objects & variables */ + mod_state->CParameter_type = NULL; + mod_state->DParameter_type = NULL; + + /* Create and add heap types */ + ADD_TYPE(mod_state->ZstdDict_type, zstd_dict_type_spec); + ADD_TYPE(mod_state->ZstdCompressor_type, zstd_compressor_type_spec); + ADD_TYPE(mod_state->ZstdDecompressor_type, zstd_decompressor_type_spec); + mod_state->ZstdError = PyErr_NewExceptionWithDoc( + "compression.zstd.ZstdError", + "An error occurred in the zstd library.", + NULL, NULL); + if (mod_state->ZstdError == NULL) { + return -1; + } + if (PyModule_AddType(m, (PyTypeObject *)mod_state->ZstdError) < 0) { + return -1; + } + + /* Add constants */ + if (PyModule_AddIntConstant(m, "zstd_version_number", + ZSTD_versionNumber()) < 0) { + return -1; + } + + if (PyModule_AddStringConstant(m, "zstd_version", + ZSTD_versionString()) < 0) { + return -1; + } + +#if ZSTD_VERSION_NUMBER >= 10500 + if (PyModule_AddIntConstant(m, "ZSTD_CLEVEL_DEFAULT", + ZSTD_defaultCLevel()) < 0) { + return -1; + } +#else + ADD_INT_MACRO(ZSTD_CLEVEL_DEFAULT); +#endif + + if (PyModule_Add(m, "ZSTD_DStreamOutSize", + PyLong_FromSize_t(ZSTD_DStreamOutSize())) < 0) { + return -1; + } + + /* Add zstd compression parameters. All should also be in cp_list. */ + ADD_INT_MACRO(ZSTD_c_compressionLevel); + ADD_INT_MACRO(ZSTD_c_windowLog); + ADD_INT_MACRO(ZSTD_c_hashLog); + ADD_INT_MACRO(ZSTD_c_chainLog); + ADD_INT_MACRO(ZSTD_c_searchLog); + ADD_INT_MACRO(ZSTD_c_minMatch); + ADD_INT_MACRO(ZSTD_c_targetLength); + ADD_INT_MACRO(ZSTD_c_strategy); + + ADD_INT_MACRO(ZSTD_c_enableLongDistanceMatching); + ADD_INT_MACRO(ZSTD_c_ldmHashLog); + ADD_INT_MACRO(ZSTD_c_ldmMinMatch); + ADD_INT_MACRO(ZSTD_c_ldmBucketSizeLog); + ADD_INT_MACRO(ZSTD_c_ldmHashRateLog); + + ADD_INT_MACRO(ZSTD_c_contentSizeFlag); + ADD_INT_MACRO(ZSTD_c_checksumFlag); + ADD_INT_MACRO(ZSTD_c_dictIDFlag); + + ADD_INT_MACRO(ZSTD_c_nbWorkers); + ADD_INT_MACRO(ZSTD_c_jobSize); + ADD_INT_MACRO(ZSTD_c_overlapLog); + + /* Add zstd decompression parameters. All should also be in dp_list. */ + ADD_INT_MACRO(ZSTD_d_windowLogMax); + + /* Add ZSTD_strategy enum members */ + ADD_INT_MACRO(ZSTD_fast); + ADD_INT_MACRO(ZSTD_dfast); + ADD_INT_MACRO(ZSTD_greedy); + ADD_INT_MACRO(ZSTD_lazy); + ADD_INT_MACRO(ZSTD_lazy2); + ADD_INT_MACRO(ZSTD_btlazy2); + ADD_INT_MACRO(ZSTD_btopt); + ADD_INT_MACRO(ZSTD_btultra); + ADD_INT_MACRO(ZSTD_btultra2); + + /* Add ZSTD_EndDirective enum members to ZstdCompressor */ + ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type, + "CONTINUE", ZSTD_e_continue); + ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type, + "FLUSH_BLOCK", ZSTD_e_flush); + ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type, + "FLUSH_FRAME", ZSTD_e_end); + + /* Make ZstdCompressor immutable (set Py_TPFLAGS_IMMUTABLETYPE) */ + PyType_Freeze(mod_state->ZstdCompressor_type); + +#undef ADD_TYPE +#undef ADD_INT_MACRO +#undef ADD_ZSTD_COMPRESSOR_INT_CONST + + return 0; +} + +static int +_zstd_traverse(PyObject *module, visitproc visit, void *arg) +{ + _zstd_state* mod_state = get_zstd_state(module); + + Py_VISIT(mod_state->ZstdDict_type); + Py_VISIT(mod_state->ZstdCompressor_type); + + Py_VISIT(mod_state->ZstdDecompressor_type); + + Py_VISIT(mod_state->ZstdError); + + Py_VISIT(mod_state->CParameter_type); + Py_VISIT(mod_state->DParameter_type); + return 0; +} + +static int +_zstd_clear(PyObject *module) +{ + _zstd_state* mod_state = get_zstd_state(module); + + Py_CLEAR(mod_state->ZstdDict_type); + Py_CLEAR(mod_state->ZstdCompressor_type); + + Py_CLEAR(mod_state->ZstdDecompressor_type); + + Py_CLEAR(mod_state->ZstdError); + + Py_CLEAR(mod_state->CParameter_type); + Py_CLEAR(mod_state->DParameter_type); + return 0; +} + +static void +_zstd_free(void *module) +{ + (void)_zstd_clear((PyObject *)module); +} + +static struct PyModuleDef_Slot _zstd_slots[] = { + {Py_mod_exec, _zstd_exec}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, + {0, NULL}, +}; + +static struct PyModuleDef _zstdmodule = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "_zstd", + .m_doc = "Implementation module for Zstandard compression.", + .m_size = sizeof(_zstd_state), + .m_slots = _zstd_slots, + .m_methods = _zstd_methods, + .m_traverse = _zstd_traverse, + .m_clear = _zstd_clear, + .m_free = _zstd_free, +}; + +PyMODINIT_FUNC +PyInit__zstd(void) +{ + return PyModuleDef_Init(&_zstdmodule); +} diff --git a/Modules/_zstd/_zstdmodule.h b/Modules/_zstd/_zstdmodule.h new file mode 100644 index 00000000000..4e8f708f223 --- /dev/null +++ b/Modules/_zstd/_zstdmodule.h @@ -0,0 +1,61 @@ +/* Low level interface to the Zstandard algorthm & the zstd library. */ + +/* Declarations shared between different parts of the _zstd module*/ + +#ifndef ZSTD_MODULE_H +#define ZSTD_MODULE_H + +#include "zstddict.h" + +/* Type specs */ +extern PyType_Spec zstd_dict_type_spec; +extern PyType_Spec zstd_compressor_type_spec; +extern PyType_Spec zstd_decompressor_type_spec; + +typedef struct { + /* Module heap types. */ + PyTypeObject *ZstdDict_type; + PyTypeObject *ZstdCompressor_type; + PyTypeObject *ZstdDecompressor_type; + PyObject *ZstdError; + + /* enum types set by set_parameter_types. */ + PyTypeObject *CParameter_type; + PyTypeObject *DParameter_type; +} _zstd_state; + +typedef enum { + ERR_DECOMPRESS, + ERR_COMPRESS, + ERR_SET_PLEDGED_INPUT_SIZE, + + ERR_LOAD_D_DICT, + ERR_LOAD_C_DICT, + + ERR_GET_C_BOUNDS, + ERR_GET_D_BOUNDS, + ERR_SET_C_LEVEL, + + ERR_TRAIN_DICT, + ERR_FINALIZE_DICT, +} error_type; + +typedef enum { + DICT_TYPE_DIGESTED = 0, + DICT_TYPE_UNDIGESTED = 1, + DICT_TYPE_PREFIX = 2 +} dictionary_type; + +extern ZstdDict * +_Py_parse_zstd_dict(const _zstd_state *state, + PyObject *dict, int *type); + +/* Format error message and set ZstdError. */ +extern void +set_zstd_error(const _zstd_state *state, + error_type type, size_t zstd_ret); + +extern void +set_parameter_error(int is_compress, int key_v, int value_v); + +#endif // !ZSTD_MODULE_H diff --git a/Modules/_zstd/buffer.h b/Modules/_zstd/buffer.h new file mode 100644 index 00000000000..4c885fa0d72 --- /dev/null +++ b/Modules/_zstd/buffer.h @@ -0,0 +1,108 @@ +/* Low level interface to the Zstandard algorthm & the zstd library. */ + +#ifndef ZSTD_BUFFER_H +#define ZSTD_BUFFER_H + +#include "pycore_blocks_output_buffer.h" + +#include <zstd.h> // ZSTD_outBuffer + +/* Blocks output buffer wrapper code */ + +/* Initialize the buffer, and grow the buffer. + Return 0 on success + Return -1 on failure */ +static inline int +_OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, + Py_ssize_t max_length) +{ + /* Ensure .list was set to NULL */ + assert(buffer->list == NULL); + + Py_ssize_t res = _BlocksOutputBuffer_InitAndGrow(buffer, max_length, + &ob->dst); + if (res < 0) { + return -1; + } + ob->size = (size_t) res; + ob->pos = 0; + return 0; +} + +/* Initialize the buffer, with an initial size. + init_size: the initial size. + Return 0 on success + Return -1 on failure */ +static inline int +_OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, + Py_ssize_t max_length, Py_ssize_t init_size) +{ + Py_ssize_t block_size; + + /* Ensure .list was set to NULL */ + assert(buffer->list == NULL); + + /* Get block size */ + if (0 <= max_length && max_length < init_size) { + block_size = max_length; + } + else { + block_size = init_size; + } + + Py_ssize_t res = _BlocksOutputBuffer_InitWithSize(buffer, block_size, + &ob->dst); + if (res < 0) { + return -1; + } + // Set max_length, InitWithSize doesn't do this + buffer->max_length = max_length; + ob->size = (size_t) res; + ob->pos = 0; + return 0; +} + +/* Grow the buffer. + Return 0 on success + Return -1 on failure */ +static inline int +_OutputBuffer_Grow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob) +{ + assert(ob->pos == ob->size); + Py_ssize_t res = _BlocksOutputBuffer_Grow(buffer, &ob->dst, 0); + if (res < 0) { + return -1; + } + ob->size = (size_t) res; + ob->pos = 0; + return 0; +} + +/* Finish the buffer. + Return a bytes object on success + Return NULL on failure */ +static inline PyObject * +_OutputBuffer_Finish(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob) +{ + return _BlocksOutputBuffer_Finish(buffer, ob->size - ob->pos); +} + +/* Clean up the buffer */ +static inline void +_OutputBuffer_OnError(_BlocksOutputBuffer *buffer) +{ + _BlocksOutputBuffer_OnError(buffer); +} + +/* Whether the output data has reached max_length. +The avail_out must be 0, please check it before calling. */ +static inline int +_OutputBuffer_ReachedMaxLength(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob) +{ + /* Ensure (data size == allocated size) */ + assert(ob->pos == ob->size); + + return buffer->allocated == buffer->max_length; +} + +#endif // !ZSTD_BUFFER_H diff --git a/Modules/_zstd/clinic/_zstdmodule.c.h b/Modules/_zstd/clinic/_zstdmodule.c.h new file mode 100644 index 00000000000..766e1cfa776 --- /dev/null +++ b/Modules/_zstd/clinic/_zstdmodule.c.h @@ -0,0 +1,429 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_abstract.h" // _PyNumber_Index() +#include "pycore_modsupport.h" // _PyArg_CheckPositional() + +PyDoc_STRVAR(_zstd_train_dict__doc__, +"train_dict($module, samples_bytes, samples_sizes, dict_size, /)\n" +"--\n" +"\n" +"Train a Zstandard dictionary on sample data.\n" +"\n" +" samples_bytes\n" +" Concatenation of samples.\n" +" samples_sizes\n" +" Tuple of samples\' sizes.\n" +" dict_size\n" +" The size of the dictionary."); + +#define _ZSTD_TRAIN_DICT_METHODDEF \ + {"train_dict", _PyCFunction_CAST(_zstd_train_dict), METH_FASTCALL, _zstd_train_dict__doc__}, + +static PyObject * +_zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size); + +static PyObject * +_zstd_train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyBytesObject *samples_bytes; + PyObject *samples_sizes; + Py_ssize_t dict_size; + + if (!_PyArg_CheckPositional("train_dict", nargs, 3, 3)) { + goto exit; + } + if (!PyBytes_Check(args[0])) { + _PyArg_BadArgument("train_dict", "argument 1", "bytes", args[0]); + goto exit; + } + samples_bytes = (PyBytesObject *)args[0]; + if (!PyTuple_Check(args[1])) { + _PyArg_BadArgument("train_dict", "argument 2", "tuple", args[1]); + goto exit; + } + samples_sizes = args[1]; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[2]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + dict_size = ival; + } + return_value = _zstd_train_dict_impl(module, samples_bytes, samples_sizes, dict_size); + +exit: + return return_value; +} + +PyDoc_STRVAR(_zstd_finalize_dict__doc__, +"finalize_dict($module, custom_dict_bytes, samples_bytes, samples_sizes,\n" +" dict_size, compression_level, /)\n" +"--\n" +"\n" +"Finalize a Zstandard dictionary.\n" +"\n" +" custom_dict_bytes\n" +" Custom dictionary content.\n" +" samples_bytes\n" +" Concatenation of samples.\n" +" samples_sizes\n" +" Tuple of samples\' sizes.\n" +" dict_size\n" +" The size of the dictionary.\n" +" compression_level\n" +" Optimize for a specific Zstandard compression level, 0 means default."); + +#define _ZSTD_FINALIZE_DICT_METHODDEF \ + {"finalize_dict", _PyCFunction_CAST(_zstd_finalize_dict), METH_FASTCALL, _zstd_finalize_dict__doc__}, + +static PyObject * +_zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, + PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size, + int compression_level); + +static PyObject * +_zstd_finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyBytesObject *custom_dict_bytes; + PyBytesObject *samples_bytes; + PyObject *samples_sizes; + Py_ssize_t dict_size; + int compression_level; + + if (!_PyArg_CheckPositional("finalize_dict", nargs, 5, 5)) { + goto exit; + } + if (!PyBytes_Check(args[0])) { + _PyArg_BadArgument("finalize_dict", "argument 1", "bytes", args[0]); + goto exit; + } + custom_dict_bytes = (PyBytesObject *)args[0]; + if (!PyBytes_Check(args[1])) { + _PyArg_BadArgument("finalize_dict", "argument 2", "bytes", args[1]); + goto exit; + } + samples_bytes = (PyBytesObject *)args[1]; + if (!PyTuple_Check(args[2])) { + _PyArg_BadArgument("finalize_dict", "argument 3", "tuple", args[2]); + goto exit; + } + samples_sizes = args[2]; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[3]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + dict_size = ival; + } + compression_level = PyLong_AsInt(args[4]); + if (compression_level == -1 && PyErr_Occurred()) { + goto exit; + } + return_value = _zstd_finalize_dict_impl(module, custom_dict_bytes, samples_bytes, samples_sizes, dict_size, compression_level); + +exit: + return return_value; +} + +PyDoc_STRVAR(_zstd_get_param_bounds__doc__, +"get_param_bounds($module, /, parameter, is_compress)\n" +"--\n" +"\n" +"Get CompressionParameter/DecompressionParameter bounds.\n" +"\n" +" parameter\n" +" The parameter to get bounds.\n" +" is_compress\n" +" True for CompressionParameter, False for DecompressionParameter."); + +#define _ZSTD_GET_PARAM_BOUNDS_METHODDEF \ + {"get_param_bounds", _PyCFunction_CAST(_zstd_get_param_bounds), METH_FASTCALL|METH_KEYWORDS, _zstd_get_param_bounds__doc__}, + +static PyObject * +_zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress); + +static PyObject * +_zstd_get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(parameter), &_Py_ID(is_compress), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"parameter", "is_compress", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "get_param_bounds", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + int parameter; + int is_compress; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 2, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + parameter = PyLong_AsInt(args[0]); + if (parameter == -1 && PyErr_Occurred()) { + goto exit; + } + is_compress = PyObject_IsTrue(args[1]); + if (is_compress < 0) { + goto exit; + } + return_value = _zstd_get_param_bounds_impl(module, parameter, is_compress); + +exit: + return return_value; +} + +PyDoc_STRVAR(_zstd_get_frame_size__doc__, +"get_frame_size($module, /, frame_buffer)\n" +"--\n" +"\n" +"Get the size of a Zstandard frame, including the header and optional checksum.\n" +"\n" +" frame_buffer\n" +" A bytes-like object, it should start from the beginning of a frame,\n" +" and contains at least one complete frame."); + +#define _ZSTD_GET_FRAME_SIZE_METHODDEF \ + {"get_frame_size", _PyCFunction_CAST(_zstd_get_frame_size), METH_FASTCALL|METH_KEYWORDS, _zstd_get_frame_size__doc__}, + +static PyObject * +_zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer); + +static PyObject * +_zstd_get_frame_size(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(frame_buffer), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"frame_buffer", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "get_frame_size", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + Py_buffer frame_buffer = {NULL, NULL}; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &frame_buffer, PyBUF_SIMPLE) != 0) { + goto exit; + } + return_value = _zstd_get_frame_size_impl(module, &frame_buffer); + +exit: + /* Cleanup for frame_buffer */ + if (frame_buffer.obj) { + PyBuffer_Release(&frame_buffer); + } + + return return_value; +} + +PyDoc_STRVAR(_zstd_get_frame_info__doc__, +"get_frame_info($module, /, frame_buffer)\n" +"--\n" +"\n" +"Get Zstandard frame infomation from a frame header.\n" +"\n" +" frame_buffer\n" +" A bytes-like object, containing the header of a Zstandard frame."); + +#define _ZSTD_GET_FRAME_INFO_METHODDEF \ + {"get_frame_info", _PyCFunction_CAST(_zstd_get_frame_info), METH_FASTCALL|METH_KEYWORDS, _zstd_get_frame_info__doc__}, + +static PyObject * +_zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer); + +static PyObject * +_zstd_get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(frame_buffer), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"frame_buffer", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "get_frame_info", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + Py_buffer frame_buffer = {NULL, NULL}; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &frame_buffer, PyBUF_SIMPLE) != 0) { + goto exit; + } + return_value = _zstd_get_frame_info_impl(module, &frame_buffer); + +exit: + /* Cleanup for frame_buffer */ + if (frame_buffer.obj) { + PyBuffer_Release(&frame_buffer); + } + + return return_value; +} + +PyDoc_STRVAR(_zstd_set_parameter_types__doc__, +"set_parameter_types($module, /, c_parameter_type, d_parameter_type)\n" +"--\n" +"\n" +"Set CompressionParameter and DecompressionParameter types for validity check.\n" +"\n" +" c_parameter_type\n" +" CompressionParameter IntEnum type object\n" +" d_parameter_type\n" +" DecompressionParameter IntEnum type object"); + +#define _ZSTD_SET_PARAMETER_TYPES_METHODDEF \ + {"set_parameter_types", _PyCFunction_CAST(_zstd_set_parameter_types), METH_FASTCALL|METH_KEYWORDS, _zstd_set_parameter_types__doc__}, + +static PyObject * +_zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, + PyObject *d_parameter_type); + +static PyObject * +_zstd_set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(c_parameter_type), &_Py_ID(d_parameter_type), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"c_parameter_type", "d_parameter_type", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "set_parameter_types", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject *c_parameter_type; + PyObject *d_parameter_type; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 2, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!PyObject_TypeCheck(args[0], &PyType_Type)) { + _PyArg_BadArgument("set_parameter_types", "argument 'c_parameter_type'", (&PyType_Type)->tp_name, args[0]); + goto exit; + } + c_parameter_type = args[0]; + if (!PyObject_TypeCheck(args[1], &PyType_Type)) { + _PyArg_BadArgument("set_parameter_types", "argument 'd_parameter_type'", (&PyType_Type)->tp_name, args[1]); + goto exit; + } + d_parameter_type = args[1]; + return_value = _zstd_set_parameter_types_impl(module, c_parameter_type, d_parameter_type); + +exit: + return return_value; +} +/*[clinic end generated code: output=437b084f149e68e5 input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/compressor.c.h b/Modules/_zstd/clinic/compressor.c.h new file mode 100644 index 00000000000..4f8d93fd9e8 --- /dev/null +++ b/Modules/_zstd/clinic/compressor.c.h @@ -0,0 +1,294 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +PyDoc_STRVAR(_zstd_ZstdCompressor_new__doc__, +"ZstdCompressor(level=None, options=None, zstd_dict=None)\n" +"--\n" +"\n" +"Create a compressor object for compressing data incrementally.\n" +"\n" +" level\n" +" The compression level to use. Defaults to COMPRESSION_LEVEL_DEFAULT.\n" +" options\n" +" A dict object that contains advanced compression parameters.\n" +" zstd_dict\n" +" A ZstdDict object, a pre-trained Zstandard dictionary.\n" +"\n" +"Thread-safe at method level. For one-shot compression, use the compress()\n" +"function instead."); + +static PyObject * +_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, + PyObject *options, PyObject *zstd_dict); + +static PyObject * +_zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 3 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(level), &_Py_ID(options), &_Py_ID(zstd_dict), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"level", "options", "zstd_dict", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "ZstdCompressor", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[3]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; + PyObject *level = Py_None; + PyObject *options = Py_None; + PyObject *zstd_dict = Py_None; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, + /*minpos*/ 0, /*maxpos*/ 3, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!fastargs) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (fastargs[0]) { + level = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (fastargs[1]) { + options = fastargs[1]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + zstd_dict = fastargs[2]; +skip_optional_pos: + return_value = _zstd_ZstdCompressor_new_impl(type, level, options, zstd_dict); + +exit: + return return_value; +} + +PyDoc_STRVAR(_zstd_ZstdCompressor_compress__doc__, +"compress($self, /, data, mode=ZstdCompressor.CONTINUE)\n" +"--\n" +"\n" +"Provide data to the compressor object.\n" +"\n" +" mode\n" +" Can be these 3 values ZstdCompressor.CONTINUE,\n" +" ZstdCompressor.FLUSH_BLOCK, ZstdCompressor.FLUSH_FRAME\n" +"\n" +"Return a chunk of compressed data if possible, or b\'\' otherwise. When you have\n" +"finished providing data to the compressor, call the flush() method to finish\n" +"the compression process."); + +#define _ZSTD_ZSTDCOMPRESSOR_COMPRESS_METHODDEF \ + {"compress", _PyCFunction_CAST(_zstd_ZstdCompressor_compress), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdCompressor_compress__doc__}, + +static PyObject * +_zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data, + int mode); + +static PyObject * +_zstd_ZstdCompressor_compress(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(data), &_Py_ID(mode), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"data", "mode", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "compress", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int mode = ZSTD_e_continue; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + mode = PyLong_AsInt(args[1]); + if (mode == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = _zstd_ZstdCompressor_compress_impl((ZstdCompressor *)self, &data, mode); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} + +PyDoc_STRVAR(_zstd_ZstdCompressor_flush__doc__, +"flush($self, /, mode=ZstdCompressor.FLUSH_FRAME)\n" +"--\n" +"\n" +"Finish the compression process.\n" +"\n" +" mode\n" +" Can be these 2 values ZstdCompressor.FLUSH_FRAME,\n" +" ZstdCompressor.FLUSH_BLOCK\n" +"\n" +"Flush any remaining data left in internal buffers. Since Zstandard data\n" +"consists of one or more independent frames, the compressor object can still\n" +"be used after this method is called."); + +#define _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF \ + {"flush", _PyCFunction_CAST(_zstd_ZstdCompressor_flush), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdCompressor_flush__doc__}, + +static PyObject * +_zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode); + +static PyObject * +_zstd_ZstdCompressor_flush(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(mode), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"mode", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "flush", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + int mode = ZSTD_e_end; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + mode = PyLong_AsInt(args[0]); + if (mode == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = _zstd_ZstdCompressor_flush_impl((ZstdCompressor *)self, mode); + +exit: + return return_value; +} + +PyDoc_STRVAR(_zstd_ZstdCompressor_set_pledged_input_size__doc__, +"set_pledged_input_size($self, size, /)\n" +"--\n" +"\n" +"Set the uncompressed content size to be written into the frame header.\n" +"\n" +" size\n" +" The size of the uncompressed data to be provided to the compressor.\n" +"\n" +"This method can be used to ensure the header of the frame about to be written\n" +"includes the size of the data, unless the CompressionParameter.content_size_flag\n" +"is set to False. If last_mode != FLUSH_FRAME, then a RuntimeError is raised.\n" +"\n" +"It is important to ensure that the pledged data size matches the actual data\n" +"size. If they do not match the compressed output data may be corrupted and the\n" +"final chunk written may be lost."); + +#define _ZSTD_ZSTDCOMPRESSOR_SET_PLEDGED_INPUT_SIZE_METHODDEF \ + {"set_pledged_input_size", (PyCFunction)_zstd_ZstdCompressor_set_pledged_input_size, METH_O, _zstd_ZstdCompressor_set_pledged_input_size__doc__}, + +static PyObject * +_zstd_ZstdCompressor_set_pledged_input_size_impl(ZstdCompressor *self, + unsigned long long size); + +static PyObject * +_zstd_ZstdCompressor_set_pledged_input_size(PyObject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + unsigned long long size; + + if (!zstd_contentsize_converter(arg, &size)) { + goto exit; + } + return_value = _zstd_ZstdCompressor_set_pledged_input_size_impl((ZstdCompressor *)self, size); + +exit: + return return_value; +} +/*[clinic end generated code: output=c1d5c2cf06a8becd input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/decompressor.c.h b/Modules/_zstd/clinic/decompressor.c.h new file mode 100644 index 00000000000..c6fdae74ab0 --- /dev/null +++ b/Modules/_zstd/clinic/decompressor.c.h @@ -0,0 +1,223 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_abstract.h" // _PyNumber_Index() +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +PyDoc_STRVAR(_zstd_ZstdDecompressor_new__doc__, +"ZstdDecompressor(zstd_dict=None, options=None)\n" +"--\n" +"\n" +"Create a decompressor object for decompressing data incrementally.\n" +"\n" +" zstd_dict\n" +" A ZstdDict object, a pre-trained Zstandard dictionary.\n" +" options\n" +" A dict object that contains advanced decompression parameters.\n" +"\n" +"Thread-safe at method level. For one-shot decompression, use the decompress()\n" +"function instead."); + +static PyObject * +_zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict, + PyObject *options); + +static PyObject * +_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(zstd_dict), &_Py_ID(options), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"zstd_dict", "options", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "ZstdDecompressor", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; + PyObject *zstd_dict = Py_None; + PyObject *options = Py_None; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, + /*minpos*/ 0, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!fastargs) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (fastargs[0]) { + zstd_dict = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + options = fastargs[1]; +skip_optional_pos: + return_value = _zstd_ZstdDecompressor_new_impl(type, zstd_dict, options); + +exit: + return return_value; +} + +PyDoc_STRVAR(_zstd_ZstdDecompressor_unused_data__doc__, +"A bytes object of un-consumed input data.\n" +"\n" +"When ZstdDecompressor object stops after a frame is\n" +"decompressed, unused input data after the frame. Otherwise this will be b\'\'."); +#if defined(_zstd_ZstdDecompressor_unused_data_DOCSTR) +# undef _zstd_ZstdDecompressor_unused_data_DOCSTR +#endif +#define _zstd_ZstdDecompressor_unused_data_DOCSTR _zstd_ZstdDecompressor_unused_data__doc__ + +#if !defined(_zstd_ZstdDecompressor_unused_data_DOCSTR) +# define _zstd_ZstdDecompressor_unused_data_DOCSTR NULL +#endif +#if defined(_ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF) +# undef _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF +# define _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF {"unused_data", (getter)_zstd_ZstdDecompressor_unused_data_get, (setter)_zstd_ZstdDecompressor_unused_data_set, _zstd_ZstdDecompressor_unused_data_DOCSTR}, +#else +# define _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF {"unused_data", (getter)_zstd_ZstdDecompressor_unused_data_get, NULL, _zstd_ZstdDecompressor_unused_data_DOCSTR}, +#endif + +static PyObject * +_zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self); + +static PyObject * +_zstd_ZstdDecompressor_unused_data_get(PyObject *self, void *Py_UNUSED(context)) +{ + return _zstd_ZstdDecompressor_unused_data_get_impl((ZstdDecompressor *)self); +} + +PyDoc_STRVAR(_zstd_ZstdDecompressor_decompress__doc__, +"decompress($self, /, data, max_length=-1)\n" +"--\n" +"\n" +"Decompress *data*, returning uncompressed bytes if possible, or b\'\' otherwise.\n" +"\n" +" data\n" +" A bytes-like object, Zstandard data to be decompressed.\n" +" max_length\n" +" Maximum size of returned data. When it is negative, the size of\n" +" output buffer is unlimited. When it is nonnegative, returns at\n" +" most max_length bytes of decompressed data.\n" +"\n" +"If *max_length* is nonnegative, returns at most *max_length* bytes of\n" +"decompressed data. If this limit is reached and further output can be\n" +"produced, *self.needs_input* will be set to ``False``. In this case, the next\n" +"call to *decompress()* may provide *data* as b\'\' to obtain more of the output.\n" +"\n" +"If all of the input data was decompressed and returned (either because this\n" +"was less than *max_length* bytes, or because *max_length* was negative),\n" +"*self.needs_input* will be set to True.\n" +"\n" +"Attempting to decompress data after the end of a frame is reached raises an\n" +"EOFError. Any data found after the end of the frame is ignored and saved in\n" +"the self.unused_data attribute."); + +#define _ZSTD_ZSTDDECOMPRESSOR_DECOMPRESS_METHODDEF \ + {"decompress", _PyCFunction_CAST(_zstd_ZstdDecompressor_decompress), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdDecompressor_decompress__doc__}, + +static PyObject * +_zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self, + Py_buffer *data, + Py_ssize_t max_length); + +static PyObject * +_zstd_ZstdDecompressor_decompress(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(data), &_Py_ID(max_length), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"data", "max_length", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "decompress", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + Py_ssize_t max_length = -1; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[1]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + max_length = ival; + } +skip_optional_pos: + return_value = _zstd_ZstdDecompressor_decompress_impl((ZstdDecompressor *)self, &data, max_length); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} +/*[clinic end generated code: output=30c12ef047027ede input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/zstddict.c.h b/Modules/_zstd/clinic/zstddict.c.h new file mode 100644 index 00000000000..79db85405d6 --- /dev/null +++ b/Modules/_zstd/clinic/zstddict.c.h @@ -0,0 +1,225 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +PyDoc_STRVAR(_zstd_ZstdDict_new__doc__, +"ZstdDict(dict_content, /, *, is_raw=False)\n" +"--\n" +"\n" +"Represents a Zstandard dictionary.\n" +"\n" +" dict_content\n" +" The content of a Zstandard dictionary as a bytes-like object.\n" +" is_raw\n" +" If true, perform no checks on *dict_content*, useful for some\n" +" advanced cases. Otherwise, check that the content represents\n" +" a Zstandard dictionary created by the zstd library or CLI.\n" +"\n" +"The dictionary can be used for compression or decompression, and can be shared\n" +"by multiple ZstdCompressor or ZstdDecompressor objects."); + +static PyObject * +_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content, + int is_raw); + +static PyObject * +_zstd_ZstdDict_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(is_raw), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "is_raw", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "ZstdDict", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; + Py_buffer dict_content = {NULL, NULL}; + int is_raw = 0; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!fastargs) { + goto exit; + } + if (PyObject_GetBuffer(fastargs[0], &dict_content, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + is_raw = PyObject_IsTrue(fastargs[1]); + if (is_raw < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = _zstd_ZstdDict_new_impl(type, &dict_content, is_raw); + +exit: + /* Cleanup for dict_content */ + if (dict_content.obj) { + PyBuffer_Release(&dict_content); + } + + return return_value; +} + +PyDoc_STRVAR(_zstd_ZstdDict_dict_content__doc__, +"The content of a Zstandard dictionary, as a bytes object."); +#if defined(_zstd_ZstdDict_dict_content_DOCSTR) +# undef _zstd_ZstdDict_dict_content_DOCSTR +#endif +#define _zstd_ZstdDict_dict_content_DOCSTR _zstd_ZstdDict_dict_content__doc__ + +#if !defined(_zstd_ZstdDict_dict_content_DOCSTR) +# define _zstd_ZstdDict_dict_content_DOCSTR NULL +#endif +#if defined(_ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF) +# undef _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF +# define _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF {"dict_content", (getter)_zstd_ZstdDict_dict_content_get, (setter)_zstd_ZstdDict_dict_content_set, _zstd_ZstdDict_dict_content_DOCSTR}, +#else +# define _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF {"dict_content", (getter)_zstd_ZstdDict_dict_content_get, NULL, _zstd_ZstdDict_dict_content_DOCSTR}, +#endif + +static PyObject * +_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self); + +static PyObject * +_zstd_ZstdDict_dict_content_get(PyObject *self, void *Py_UNUSED(context)) +{ + return _zstd_ZstdDict_dict_content_get_impl((ZstdDict *)self); +} + +PyDoc_STRVAR(_zstd_ZstdDict_as_digested_dict__doc__, +"Load as a digested dictionary to compressor.\n" +"\n" +"Pass this attribute as zstd_dict argument:\n" +"compress(dat, zstd_dict=zd.as_digested_dict)\n" +"\n" +"1. Some advanced compression parameters of compressor may be overridden\n" +" by parameters of digested dictionary.\n" +"2. ZstdDict has a digested dictionaries cache for each compression level.\n" +" It\'s faster when loading again a digested dictionary with the same\n" +" compression level.\n" +"3. No need to use this for decompression."); +#if defined(_zstd_ZstdDict_as_digested_dict_DOCSTR) +# undef _zstd_ZstdDict_as_digested_dict_DOCSTR +#endif +#define _zstd_ZstdDict_as_digested_dict_DOCSTR _zstd_ZstdDict_as_digested_dict__doc__ + +#if !defined(_zstd_ZstdDict_as_digested_dict_DOCSTR) +# define _zstd_ZstdDict_as_digested_dict_DOCSTR NULL +#endif +#if defined(_ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF) +# undef _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF +# define _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF {"as_digested_dict", (getter)_zstd_ZstdDict_as_digested_dict_get, (setter)_zstd_ZstdDict_as_digested_dict_set, _zstd_ZstdDict_as_digested_dict_DOCSTR}, +#else +# define _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF {"as_digested_dict", (getter)_zstd_ZstdDict_as_digested_dict_get, NULL, _zstd_ZstdDict_as_digested_dict_DOCSTR}, +#endif + +static PyObject * +_zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self); + +static PyObject * +_zstd_ZstdDict_as_digested_dict_get(PyObject *self, void *Py_UNUSED(context)) +{ + return _zstd_ZstdDict_as_digested_dict_get_impl((ZstdDict *)self); +} + +PyDoc_STRVAR(_zstd_ZstdDict_as_undigested_dict__doc__, +"Load as an undigested dictionary to compressor.\n" +"\n" +"Pass this attribute as zstd_dict argument:\n" +"compress(dat, zstd_dict=zd.as_undigested_dict)\n" +"\n" +"1. The advanced compression parameters of compressor will not be overridden.\n" +"2. Loading an undigested dictionary is costly. If load an undigested dictionary\n" +" multiple times, consider reusing a compressor object.\n" +"3. No need to use this for decompression."); +#if defined(_zstd_ZstdDict_as_undigested_dict_DOCSTR) +# undef _zstd_ZstdDict_as_undigested_dict_DOCSTR +#endif +#define _zstd_ZstdDict_as_undigested_dict_DOCSTR _zstd_ZstdDict_as_undigested_dict__doc__ + +#if !defined(_zstd_ZstdDict_as_undigested_dict_DOCSTR) +# define _zstd_ZstdDict_as_undigested_dict_DOCSTR NULL +#endif +#if defined(_ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF) +# undef _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF +# define _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF {"as_undigested_dict", (getter)_zstd_ZstdDict_as_undigested_dict_get, (setter)_zstd_ZstdDict_as_undigested_dict_set, _zstd_ZstdDict_as_undigested_dict_DOCSTR}, +#else +# define _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF {"as_undigested_dict", (getter)_zstd_ZstdDict_as_undigested_dict_get, NULL, _zstd_ZstdDict_as_undigested_dict_DOCSTR}, +#endif + +static PyObject * +_zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self); + +static PyObject * +_zstd_ZstdDict_as_undigested_dict_get(PyObject *self, void *Py_UNUSED(context)) +{ + return _zstd_ZstdDict_as_undigested_dict_get_impl((ZstdDict *)self); +} + +PyDoc_STRVAR(_zstd_ZstdDict_as_prefix__doc__, +"Load as a prefix to compressor/decompressor.\n" +"\n" +"Pass this attribute as zstd_dict argument:\n" +"compress(dat, zstd_dict=zd.as_prefix)\n" +"\n" +"1. Prefix is compatible with long distance matching, while dictionary is not.\n" +"2. It only works for the first frame, then the compressor/decompressor will\n" +" return to no prefix state.\n" +"3. When decompressing, must use the same prefix as when compressing.\""); +#if defined(_zstd_ZstdDict_as_prefix_DOCSTR) +# undef _zstd_ZstdDict_as_prefix_DOCSTR +#endif +#define _zstd_ZstdDict_as_prefix_DOCSTR _zstd_ZstdDict_as_prefix__doc__ + +#if !defined(_zstd_ZstdDict_as_prefix_DOCSTR) +# define _zstd_ZstdDict_as_prefix_DOCSTR NULL +#endif +#if defined(_ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF) +# undef _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF +# define _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF {"as_prefix", (getter)_zstd_ZstdDict_as_prefix_get, (setter)_zstd_ZstdDict_as_prefix_set, _zstd_ZstdDict_as_prefix_DOCSTR}, +#else +# define _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF {"as_prefix", (getter)_zstd_ZstdDict_as_prefix_get, NULL, _zstd_ZstdDict_as_prefix_DOCSTR}, +#endif + +static PyObject * +_zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self); + +static PyObject * +_zstd_ZstdDict_as_prefix_get(PyObject *self, void *Py_UNUSED(context)) +{ + return _zstd_ZstdDict_as_prefix_get_impl((ZstdDict *)self); +} +/*[clinic end generated code: output=4696cbc722e5fdfc input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c new file mode 100644 index 00000000000..bc9e6eff89a --- /dev/null +++ b/Modules/_zstd/compressor.c @@ -0,0 +1,797 @@ +/* Low level interface to the Zstandard algorthm & the zstd library. */ + +/* ZstdCompressor class definitions */ + +/*[clinic input] +module _zstd +class _zstd.ZstdCompressor "ZstdCompressor *" "&zstd_compressor_type_spec" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7166021db1ef7df8]*/ + +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 +#endif + +#include "Python.h" + +#include "_zstdmodule.h" +#include "buffer.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked + +#include <stddef.h> // offsetof() +#include <zstd.h> // ZSTD_*() + +typedef struct { + PyObject_HEAD + + /* Compression context */ + ZSTD_CCtx *cctx; + + /* ZstdDict object in use */ + PyObject *dict; + + /* Last mode, initialized to ZSTD_e_end */ + int last_mode; + + /* (nbWorker >= 1) ? 1 : 0 */ + int use_multithread; + + /* Compression level */ + int compression_level; + + /* Lock to protect the compression context */ + PyMutex lock; +} ZstdCompressor; + +#define ZstdCompressor_CAST(op) ((ZstdCompressor *)op) + +/*[python input] + +class zstd_contentsize_converter(CConverter): + type = 'unsigned long long' + converter = 'zstd_contentsize_converter' + +[python start generated code]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=0932c350d633c7de]*/ + + +static int +zstd_contentsize_converter(PyObject *size, unsigned long long *p) +{ + // None means the user indicates the size is unknown. + if (size == Py_None) { + *p = ZSTD_CONTENTSIZE_UNKNOWN; + } + else { + /* ZSTD_CONTENTSIZE_UNKNOWN is 0ULL - 1 + ZSTD_CONTENTSIZE_ERROR is 0ULL - 2 + Users should only pass values < ZSTD_CONTENTSIZE_ERROR */ + unsigned long long pledged_size = PyLong_AsUnsignedLongLong(size); + /* Here we check for (unsigned long long)-1 as a sign of an error in + PyLong_AsUnsignedLongLong */ + if (pledged_size == (unsigned long long)-1 && PyErr_Occurred()) { + *p = ZSTD_CONTENTSIZE_ERROR; + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_Format(PyExc_ValueError, + "size argument should be a positive int less " + "than %ull", ZSTD_CONTENTSIZE_ERROR); + return 0; + } + return 0; + } + if (pledged_size >= ZSTD_CONTENTSIZE_ERROR) { + *p = ZSTD_CONTENTSIZE_ERROR; + PyErr_Format(PyExc_ValueError, + "size argument should be a positive int less " + "than %ull", ZSTD_CONTENTSIZE_ERROR); + return 0; + } + *p = pledged_size; + } + return 1; +} + +#include "clinic/compressor.c.h" + +static int +_zstd_set_c_level(ZstdCompressor *self, int level) +{ + /* Set integer compression level */ + int min_level = ZSTD_minCLevel(); + int max_level = ZSTD_maxCLevel(); + if (level < min_level || level > max_level) { + PyErr_Format(PyExc_ValueError, + "illegal compression level %d; the valid range is [%d, %d]", + level, min_level, max_level); + return -1; + } + + /* Save for generating ZSTD_CDICT */ + self->compression_level = level; + + /* Set compressionLevel to compression context */ + size_t zstd_ret = ZSTD_CCtx_setParameter( + self->cctx, ZSTD_c_compressionLevel, level); + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret); + return -1; + } + return 0; +} + +static int +_zstd_set_c_parameters(ZstdCompressor *self, PyObject *options) +{ + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state == NULL) { + return -1; + } + + if (!PyDict_Check(options)) { + PyErr_Format(PyExc_TypeError, + "ZstdCompressor() argument 'options' must be dict, not %T", + options); + return -1; + } + + Py_ssize_t pos = 0; + PyObject *key, *value; + while (PyDict_Next(options, &pos, &key, &value)) { + /* Check key type */ + if (Py_TYPE(key) == mod_state->DParameter_type) { + PyErr_SetString(PyExc_TypeError, + "compression options dictionary key must not be a " + "DecompressionParameter attribute"); + return -1; + } + + Py_INCREF(key); + Py_INCREF(value); + int key_v = PyLong_AsInt(key); + Py_DECREF(key); + if (key_v == -1 && PyErr_Occurred()) { + Py_DECREF(value); + return -1; + } + + int value_v = PyLong_AsInt(value); + Py_DECREF(value); + if (value_v == -1 && PyErr_Occurred()) { + return -1; + } + + if (key_v == ZSTD_c_compressionLevel) { + if (_zstd_set_c_level(self, value_v) < 0) { + return -1; + } + continue; + } + if (key_v == ZSTD_c_nbWorkers) { + /* From the zstd library docs: + 1. When nbWorkers >= 1, triggers asynchronous mode when + used with ZSTD_compressStream2(). + 2, Default value is `0`, aka "single-threaded mode" : no + worker is spawned, compression is performed inside + caller's thread, all invocations are blocking. */ + if (value_v != 0) { + self->use_multithread = 1; + } + } + + /* Set parameter to compression context */ + size_t zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + set_parameter_error(1, key_v, value_v); + return -1; + } + } + return 0; +} + +static void +capsule_free_cdict(PyObject *capsule) +{ + ZSTD_CDict *cdict = PyCapsule_GetPointer(capsule, NULL); + ZSTD_freeCDict(cdict); +} + +ZSTD_CDict * +_get_CDict(ZstdDict *self, int compressionLevel) +{ + assert(PyMutex_IsLocked(&self->lock)); + PyObject *level = NULL; + PyObject *capsule = NULL; + ZSTD_CDict *cdict; + int ret; + + + /* int level object */ + level = PyLong_FromLong(compressionLevel); + if (level == NULL) { + goto error; + } + + /* Get PyCapsule object from self->c_dicts */ + ret = PyDict_GetItemRef(self->c_dicts, level, &capsule); + if (ret < 0) { + goto error; + } + if (capsule == NULL) { + /* Create ZSTD_CDict instance */ + Py_BEGIN_ALLOW_THREADS + cdict = ZSTD_createCDict(self->dict_buffer, self->dict_len, + compressionLevel); + Py_END_ALLOW_THREADS + + if (cdict == NULL) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state != NULL) { + PyErr_SetString(mod_state->ZstdError, + "Failed to create a ZSTD_CDict instance from " + "Zstandard dictionary content."); + } + goto error; + } + + /* Put ZSTD_CDict instance into PyCapsule object */ + capsule = PyCapsule_New(cdict, NULL, capsule_free_cdict); + if (capsule == NULL) { + ZSTD_freeCDict(cdict); + goto error; + } + + /* Add PyCapsule object to self->c_dicts */ + ret = PyDict_SetItem(self->c_dicts, level, capsule); + if (ret < 0) { + goto error; + } + } + else { + /* ZSTD_CDict instance already exists */ + cdict = PyCapsule_GetPointer(capsule, NULL); + } + goto success; + +error: + cdict = NULL; +success: + Py_XDECREF(level); + Py_XDECREF(capsule); + return cdict; +} + +static int +_zstd_load_impl(ZstdCompressor *self, ZstdDict *zd, + _zstd_state *mod_state, int type) +{ + size_t zstd_ret; + if (type == DICT_TYPE_DIGESTED) { + /* Get ZSTD_CDict */ + ZSTD_CDict *c_dict = _get_CDict(zd, self->compression_level); + if (c_dict == NULL) { + return -1; + } + /* Reference a prepared dictionary. + It overrides some compression context's parameters. */ + zstd_ret = ZSTD_CCtx_refCDict(self->cctx, c_dict); + } + else if (type == DICT_TYPE_UNDIGESTED) { + /* Load a dictionary. + It doesn't override compression context's parameters. */ + zstd_ret = ZSTD_CCtx_loadDictionary(self->cctx, zd->dict_buffer, + zd->dict_len); + } + else if (type == DICT_TYPE_PREFIX) { + /* Load a prefix */ + zstd_ret = ZSTD_CCtx_refPrefix(self->cctx, zd->dict_buffer, + zd->dict_len); + } + else { + Py_UNREACHABLE(); + } + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + set_zstd_error(mod_state, ERR_LOAD_C_DICT, zstd_ret); + return -1; + } + return 0; +} + +static int +_zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) +{ + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + /* When compressing, use undigested dictionary by default. */ + int type = DICT_TYPE_UNDIGESTED; + ZstdDict *zd = _Py_parse_zstd_dict(mod_state, dict, &type); + if (zd == NULL) { + return -1; + } + int ret; + PyMutex_Lock(&zd->lock); + ret = _zstd_load_impl(self, zd, mod_state, type); + PyMutex_Unlock(&zd->lock); + return ret; +} + +/*[clinic input] +@classmethod +_zstd.ZstdCompressor.__new__ as _zstd_ZstdCompressor_new + level: object = None + The compression level to use. Defaults to COMPRESSION_LEVEL_DEFAULT. + options: object = None + A dict object that contains advanced compression parameters. + zstd_dict: object = None + A ZstdDict object, a pre-trained Zstandard dictionary. + +Create a compressor object for compressing data incrementally. + +Thread-safe at method level. For one-shot compression, use the compress() +function instead. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, + PyObject *options, PyObject *zstd_dict) +/*[clinic end generated code: output=cdef61eafecac3d7 input=92de0211ae20ffdc]*/ +{ + ZstdCompressor* self = PyObject_GC_New(ZstdCompressor, type); + if (self == NULL) { + goto error; + } + + self->use_multithread = 0; + self->dict = NULL; + self->lock = (PyMutex){0}; + + /* Compression context */ + self->cctx = ZSTD_createCCtx(); + if (self->cctx == NULL) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state != NULL) { + PyErr_SetString(mod_state->ZstdError, + "Unable to create ZSTD_CCtx instance."); + } + goto error; + } + + /* Last mode */ + self->last_mode = ZSTD_e_end; + + if (level != Py_None && options != Py_None) { + PyErr_SetString(PyExc_TypeError, + "Only one of level or options should be used."); + goto error; + } + + /* Set compression level */ + if (level != Py_None) { + if (!PyLong_Check(level)) { + PyErr_SetString(PyExc_TypeError, + "invalid type for level, expected int"); + goto error; + } + int level_v = PyLong_AsInt(level); + if (level_v == -1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_Format(PyExc_ValueError, + "illegal compression level; the valid range is [%d, %d]", + ZSTD_minCLevel(), ZSTD_maxCLevel()); + } + goto error; + } + if (_zstd_set_c_level(self, level_v) < 0) { + goto error; + } + } + + /* Set options dictionary */ + if (options != Py_None) { + if (_zstd_set_c_parameters(self, options) < 0) { + goto error; + } + } + + /* Load Zstandard dictionary to compression context */ + if (zstd_dict != Py_None) { + if (_zstd_load_c_dict(self, zstd_dict) < 0) { + goto error; + } + Py_INCREF(zstd_dict); + self->dict = zstd_dict; + } + + // We can only start GC tracking once self->dict is set. + PyObject_GC_Track(self); + + return (PyObject*)self; + +error: + Py_XDECREF(self); + return NULL; +} + +static void +ZstdCompressor_dealloc(PyObject *ob) +{ + ZstdCompressor *self = ZstdCompressor_CAST(ob); + + PyObject_GC_UnTrack(self); + + /* Free compression context */ + if (self->cctx) { + ZSTD_freeCCtx(self->cctx); + } + + assert(!PyMutex_IsLocked(&self->lock)); + + /* Py_XDECREF the dict after free the compression context */ + Py_CLEAR(self->dict); + + PyTypeObject *tp = Py_TYPE(self); + PyObject_GC_Del(ob); + Py_DECREF(tp); +} + +static PyObject * +compress_lock_held(ZstdCompressor *self, Py_buffer *data, + ZSTD_EndDirective end_directive) +{ + assert(PyMutex_IsLocked(&self->lock)); + ZSTD_inBuffer in; + ZSTD_outBuffer out; + _BlocksOutputBuffer buffer = {.list = NULL}; + size_t zstd_ret; + PyObject *ret; + + /* Prepare input & output buffers */ + if (data != NULL) { + in.src = data->buf; + in.size = data->len; + in.pos = 0; + } + else { + in.src = ∈ + in.size = 0; + in.pos = 0; + } + + /* Calculate output buffer's size */ + size_t output_buffer_size = ZSTD_compressBound(in.size); + if (output_buffer_size > (size_t) PY_SSIZE_T_MAX) { + PyErr_NoMemory(); + goto error; + } + + if (_OutputBuffer_InitWithSize(&buffer, &out, -1, + (Py_ssize_t) output_buffer_size) < 0) { + goto error; + } + + + /* Zstandard stream compress */ + while (1) { + Py_BEGIN_ALLOW_THREADS + zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, end_directive); + Py_END_ALLOW_THREADS + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); + goto error; + } + + /* Finished */ + if (zstd_ret == 0) { + break; + } + + /* Output buffer should be exhausted, grow the buffer. */ + assert(out.pos == out.size); + if (out.pos == out.size) { + if (_OutputBuffer_Grow(&buffer, &out) < 0) { + goto error; + } + } + } + + /* Return a bytes object */ + ret = _OutputBuffer_Finish(&buffer, &out); + if (ret != NULL) { + return ret; + } + +error: + _OutputBuffer_OnError(&buffer); + return NULL; +} + +#ifndef NDEBUG +static inline int +mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out) +{ + return in->size == in->pos && out->size != out->pos; +} +#endif + +static PyObject * +compress_mt_continue_lock_held(ZstdCompressor *self, Py_buffer *data) +{ + assert(PyMutex_IsLocked(&self->lock)); + ZSTD_inBuffer in; + ZSTD_outBuffer out; + _BlocksOutputBuffer buffer = {.list = NULL}; + size_t zstd_ret; + PyObject *ret; + + /* Prepare input & output buffers */ + in.src = data->buf; + in.size = data->len; + in.pos = 0; + + if (_OutputBuffer_InitAndGrow(&buffer, &out, -1) < 0) { + goto error; + } + + /* Zstandard stream compress */ + while (1) { + Py_BEGIN_ALLOW_THREADS + do { + zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, + ZSTD_e_continue); + } while (out.pos != out.size + && in.pos != in.size + && !ZSTD_isError(zstd_ret)); + Py_END_ALLOW_THREADS + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); + goto error; + } + + /* Like compress_lock_held(), output as much as possible. */ + if (out.pos == out.size) { + if (_OutputBuffer_Grow(&buffer, &out) < 0) { + goto error; + } + } + else if (in.pos == in.size) { + /* Finished */ + assert(mt_continue_should_break(&in, &out)); + break; + } + } + + /* Return a bytes object */ + ret = _OutputBuffer_Finish(&buffer, &out); + if (ret != NULL) { + return ret; + } + +error: + _OutputBuffer_OnError(&buffer); + return NULL; +} + +/*[clinic input] +_zstd.ZstdCompressor.compress + + data: Py_buffer + mode: int(c_default="ZSTD_e_continue") = ZstdCompressor.CONTINUE + Can be these 3 values ZstdCompressor.CONTINUE, + ZstdCompressor.FLUSH_BLOCK, ZstdCompressor.FLUSH_FRAME + +Provide data to the compressor object. + +Return a chunk of compressed data if possible, or b'' otherwise. When you have +finished providing data to the compressor, call the flush() method to finish +the compression process. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data, + int mode) +/*[clinic end generated code: output=ed7982d1cf7b4f98 input=ac2c21d180f579ea]*/ +{ + PyObject *ret; + + /* Check mode value */ + if (mode != ZSTD_e_continue && + mode != ZSTD_e_flush && + mode != ZSTD_e_end) + { + PyErr_SetString(PyExc_ValueError, + "mode argument wrong value, it should be one of " + "ZstdCompressor.CONTINUE, ZstdCompressor.FLUSH_BLOCK, " + "ZstdCompressor.FLUSH_FRAME."); + return NULL; + } + + /* Thread-safe code */ + PyMutex_Lock(&self->lock); + + /* Compress */ + if (self->use_multithread && mode == ZSTD_e_continue) { + ret = compress_mt_continue_lock_held(self, data); + } + else { + ret = compress_lock_held(self, data, mode); + } + + if (ret) { + self->last_mode = mode; + } + else { + self->last_mode = ZSTD_e_end; + + /* Resetting cctx's session never fail */ + ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); + } + PyMutex_Unlock(&self->lock); + + return ret; +} + +/*[clinic input] +_zstd.ZstdCompressor.flush + + mode: int(c_default="ZSTD_e_end") = ZstdCompressor.FLUSH_FRAME + Can be these 2 values ZstdCompressor.FLUSH_FRAME, + ZstdCompressor.FLUSH_BLOCK + +Finish the compression process. + +Flush any remaining data left in internal buffers. Since Zstandard data +consists of one or more independent frames, the compressor object can still +be used after this method is called. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) +/*[clinic end generated code: output=b7cf2c8d64dcf2e3 input=0ab19627f323cdbc]*/ +{ + PyObject *ret; + + /* Check mode value */ + if (mode != ZSTD_e_end && mode != ZSTD_e_flush) { + PyErr_SetString(PyExc_ValueError, + "mode argument wrong value, it should be " + "ZstdCompressor.FLUSH_FRAME or " + "ZstdCompressor.FLUSH_BLOCK."); + return NULL; + } + + /* Thread-safe code */ + PyMutex_Lock(&self->lock); + + ret = compress_lock_held(self, NULL, mode); + + if (ret) { + self->last_mode = mode; + } + else { + self->last_mode = ZSTD_e_end; + + /* Resetting cctx's session never fail */ + ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); + } + PyMutex_Unlock(&self->lock); + + return ret; +} + + +/*[clinic input] +_zstd.ZstdCompressor.set_pledged_input_size + + size: zstd_contentsize + The size of the uncompressed data to be provided to the compressor. + / + +Set the uncompressed content size to be written into the frame header. + +This method can be used to ensure the header of the frame about to be written +includes the size of the data, unless the CompressionParameter.content_size_flag +is set to False. If last_mode != FLUSH_FRAME, then a RuntimeError is raised. + +It is important to ensure that the pledged data size matches the actual data +size. If they do not match the compressed output data may be corrupted and the +final chunk written may be lost. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdCompressor_set_pledged_input_size_impl(ZstdCompressor *self, + unsigned long long size) +/*[clinic end generated code: output=3a09e55cc0e3b4f9 input=afd8a7d78cff2eb5]*/ +{ + // Error occured while converting argument, should be unreachable + assert(size != ZSTD_CONTENTSIZE_ERROR); + + /* Thread-safe code */ + PyMutex_Lock(&self->lock); + + /* Check the current mode */ + if (self->last_mode != ZSTD_e_end) { + PyErr_SetString(PyExc_ValueError, + "set_pledged_input_size() method must be called " + "when last_mode == FLUSH_FRAME"); + PyMutex_Unlock(&self->lock); + return NULL; + } + + /* Set pledged content size */ + size_t zstd_ret = ZSTD_CCtx_setPledgedSrcSize(self->cctx, size); + PyMutex_Unlock(&self->lock); + if (ZSTD_isError(zstd_ret)) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_SET_PLEDGED_INPUT_SIZE, zstd_ret); + return NULL; + } + + Py_RETURN_NONE; +} + +static PyMethodDef ZstdCompressor_methods[] = { + _ZSTD_ZSTDCOMPRESSOR_COMPRESS_METHODDEF + _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF + _ZSTD_ZSTDCOMPRESSOR_SET_PLEDGED_INPUT_SIZE_METHODDEF + {NULL, NULL} +}; + +PyDoc_STRVAR(ZstdCompressor_last_mode_doc, +"The last mode used to this compressor object, its value can be .CONTINUE,\n" +".FLUSH_BLOCK, .FLUSH_FRAME. Initialized to .FLUSH_FRAME.\n\n" +"It can be used to get the current state of a compressor, such as, data\n" +"flushed, or a frame ended."); + +static PyMemberDef ZstdCompressor_members[] = { + {"last_mode", Py_T_INT, offsetof(ZstdCompressor, last_mode), + Py_READONLY, ZstdCompressor_last_mode_doc}, + {NULL} +}; + +static int +ZstdCompressor_traverse(PyObject *ob, visitproc visit, void *arg) +{ + ZstdCompressor *self = ZstdCompressor_CAST(ob); + Py_VISIT(self->dict); + return 0; +} + +static int +ZstdCompressor_clear(PyObject *ob) +{ + ZstdCompressor *self = ZstdCompressor_CAST(ob); + Py_CLEAR(self->dict); + return 0; +} + +static PyType_Slot zstdcompressor_slots[] = { + {Py_tp_new, _zstd_ZstdCompressor_new}, + {Py_tp_dealloc, ZstdCompressor_dealloc}, + {Py_tp_methods, ZstdCompressor_methods}, + {Py_tp_members, ZstdCompressor_members}, + {Py_tp_doc, (void *)_zstd_ZstdCompressor_new__doc__}, + {Py_tp_traverse, ZstdCompressor_traverse}, + {Py_tp_clear, ZstdCompressor_clear}, + {0, 0} +}; + +PyType_Spec zstd_compressor_type_spec = { + .name = "compression.zstd.ZstdCompressor", + .basicsize = sizeof(ZstdCompressor), + // Py_TPFLAGS_IMMUTABLETYPE is not used here as several + // associated constants need to be added to the type. + // PyType_Freeze is called later to set the flag. + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .slots = zstdcompressor_slots, +}; diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c new file mode 100644 index 00000000000..c53d6e4cb05 --- /dev/null +++ b/Modules/_zstd/decompressor.c @@ -0,0 +1,717 @@ +/* Low level interface to the Zstandard algorthm & the zstd library. */ + +/* ZstdDecompressor class definition */ + +/*[clinic input] +module _zstd +class _zstd.ZstdDecompressor "ZstdDecompressor *" "&zstd_decompressor_type_spec" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e2969ddf48a203e0]*/ + +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 +#endif + +#include "Python.h" + +#include "_zstdmodule.h" +#include "buffer.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked + +#include <stdbool.h> // bool +#include <stddef.h> // offsetof() +#include <zstd.h> // ZSTD_*() + +typedef struct { + PyObject_HEAD + + /* Decompression context */ + ZSTD_DCtx *dctx; + + /* ZstdDict object in use */ + PyObject *dict; + + /* Unconsumed input data */ + char *input_buffer; + size_t input_buffer_size; + size_t in_begin, in_end; + + /* Unused data */ + PyObject *unused_data; + + /* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */ + bool needs_input; + + /* For ZstdDecompressor, 0 or 1. + 1 means the end of the first frame has been reached. */ + bool eof; + + /* Lock to protect the decompression context */ + PyMutex lock; +} ZstdDecompressor; + +#define ZstdDecompressor_CAST(op) ((ZstdDecompressor *)op) + +#include "clinic/decompressor.c.h" + +static inline ZSTD_DDict * +_get_DDict(ZstdDict *self) +{ + assert(PyMutex_IsLocked(&self->lock)); + ZSTD_DDict *ret; + + if (self->d_dict == NULL) { + /* Create ZSTD_DDict instance from dictionary content */ + Py_BEGIN_ALLOW_THREADS + ret = ZSTD_createDDict(self->dict_buffer, self->dict_len); + Py_END_ALLOW_THREADS + self->d_dict = ret; + + if (self->d_dict == NULL) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state != NULL) { + PyErr_SetString(mod_state->ZstdError, + "Failed to create a ZSTD_DDict instance from " + "Zstandard dictionary content."); + } + } + } + + return self->d_dict; +} + +static int +_zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) +{ + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state == NULL) { + return -1; + } + + if (!PyDict_Check(options)) { + PyErr_Format(PyExc_TypeError, + "ZstdDecompressor() argument 'options' must be dict, not %T", + options); + return -1; + } + + Py_ssize_t pos = 0; + PyObject *key, *value; + while (PyDict_Next(options, &pos, &key, &value)) { + /* Check key type */ + if (Py_TYPE(key) == mod_state->CParameter_type) { + PyErr_SetString(PyExc_TypeError, + "compression options dictionary key must not be a " + "CompressionParameter attribute"); + return -1; + } + + Py_INCREF(key); + Py_INCREF(value); + int key_v = PyLong_AsInt(key); + Py_DECREF(key); + if (key_v == -1 && PyErr_Occurred()) { + return -1; + } + + int value_v = PyLong_AsInt(value); + Py_DECREF(value); + if (value_v == -1 && PyErr_Occurred()) { + return -1; + } + + /* Set parameter to compression context */ + size_t zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v); + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + set_parameter_error(0, key_v, value_v); + return -1; + } + } + return 0; +} + +static int +_zstd_load_impl(ZstdDecompressor *self, ZstdDict *zd, + _zstd_state *mod_state, int type) +{ + size_t zstd_ret; + if (type == DICT_TYPE_DIGESTED) { + /* Get ZSTD_DDict */ + ZSTD_DDict *d_dict = _get_DDict(zd); + if (d_dict == NULL) { + return -1; + } + /* Reference a prepared dictionary */ + zstd_ret = ZSTD_DCtx_refDDict(self->dctx, d_dict); + } + else if (type == DICT_TYPE_UNDIGESTED) { + /* Load a dictionary */ + zstd_ret = ZSTD_DCtx_loadDictionary(self->dctx, zd->dict_buffer, + zd->dict_len); + } + else if (type == DICT_TYPE_PREFIX) { + /* Load a prefix */ + zstd_ret = ZSTD_DCtx_refPrefix(self->dctx, zd->dict_buffer, + zd->dict_len); + } + else { + /* Impossible code path */ + PyErr_SetString(PyExc_SystemError, + "load_d_dict() impossible code path"); + return -1; + } + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + set_zstd_error(mod_state, ERR_LOAD_D_DICT, zstd_ret); + return -1; + } + return 0; +} + +/* Load dictionary or prefix to decompression context */ +static int +_zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) +{ + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + /* When decompressing, use digested dictionary by default. */ + int type = DICT_TYPE_DIGESTED; + ZstdDict *zd = _Py_parse_zstd_dict(mod_state, dict, &type); + if (zd == NULL) { + return -1; + } + int ret; + PyMutex_Lock(&zd->lock); + ret = _zstd_load_impl(self, zd, mod_state, type); + PyMutex_Unlock(&zd->lock); + return ret; +} + +/* + Decompress implementation in pseudo code: + + initialize_output_buffer + while True: + decompress_data + set_object_flag # .eof + + if output_buffer_exhausted: + if output_buffer_reached_max_length: + finish + grow_output_buffer + elif input_buffer_exhausted: + finish + + ZSTD_decompressStream()'s size_t return value: + - 0 when a frame is completely decoded and fully flushed, + zstd's internal buffer has no data. + - An error code, which can be tested using ZSTD_isError(). + - Or any other value > 0, which means there is still some decoding or + flushing to do to complete current frame. + + Note, decompressing "an empty input" in any case will make it > 0. +*/ +static PyObject * +decompress_lock_held(ZstdDecompressor *self, ZSTD_inBuffer *in, + Py_ssize_t max_length) +{ + size_t zstd_ret; + ZSTD_outBuffer out; + _BlocksOutputBuffer buffer = {.list = NULL}; + PyObject *ret; + + /* Initialize the output buffer */ + if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) { + goto error; + } + assert(out.pos == 0); + + while (1) { + /* Decompress */ + Py_BEGIN_ALLOW_THREADS + zstd_ret = ZSTD_decompressStream(self->dctx, &out, in); + Py_END_ALLOW_THREADS + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_DECOMPRESS, zstd_ret); + goto error; + } + + /* Set .eof flag */ + if (zstd_ret == 0) { + /* Stop when a frame is decompressed */ + self->eof = 1; + break; + } + + /* Need to check out before in. Maybe zstd's internal buffer still has + a few bytes that can be output, grow the buffer and continue. */ + if (out.pos == out.size) { + /* Output buffer exhausted */ + + /* Output buffer reached max_length */ + if (_OutputBuffer_ReachedMaxLength(&buffer, &out)) { + break; + } + + /* Grow output buffer */ + if (_OutputBuffer_Grow(&buffer, &out) < 0) { + goto error; + } + assert(out.pos == 0); + + } + else if (in->pos == in->size) { + /* Finished */ + break; + } + } + + /* Return a bytes object */ + ret = _OutputBuffer_Finish(&buffer, &out); + if (ret != NULL) { + return ret; + } + +error: + _OutputBuffer_OnError(&buffer); + return NULL; +} + +static void +decompressor_reset_session_lock_held(ZstdDecompressor *self) +{ + assert(PyMutex_IsLocked(&self->lock)); + + /* Reset variables */ + self->in_begin = 0; + self->in_end = 0; + + Py_CLEAR(self->unused_data); + + /* Reset variables in one operation */ + self->needs_input = 1; + self->eof = 0; + + /* Resetting session is guaranteed to never fail */ + ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only); +} + +static PyObject * +stream_decompress_lock_held(ZstdDecompressor *self, Py_buffer *data, + Py_ssize_t max_length) +{ + assert(PyMutex_IsLocked(&self->lock)); + ZSTD_inBuffer in; + PyObject *ret = NULL; + int use_input_buffer; + + /* Check .eof flag */ + if (self->eof) { + PyErr_SetString(PyExc_EOFError, + "Already at the end of a Zstandard frame."); + assert(ret == NULL); + return NULL; + } + + /* Prepare input buffer w/wo unconsumed data */ + if (self->in_begin == self->in_end) { + /* No unconsumed data */ + use_input_buffer = 0; + + in.src = data->buf; + in.size = data->len; + in.pos = 0; + } + else if (data->len == 0) { + /* Has unconsumed data, fast path for b'' */ + assert(self->in_begin < self->in_end); + + use_input_buffer = 1; + + in.src = self->input_buffer + self->in_begin; + in.size = self->in_end - self->in_begin; + in.pos = 0; + } + else { + /* Has unconsumed data */ + use_input_buffer = 1; + + /* Unconsumed data size in input_buffer */ + size_t used_now = self->in_end - self->in_begin; + assert(self->in_end > self->in_begin); + + /* Number of bytes we can append to input buffer */ + size_t avail_now = self->input_buffer_size - self->in_end; + assert(self->input_buffer_size >= self->in_end); + + /* Number of bytes we can append if we move existing contents to + beginning of buffer */ + size_t avail_total = self->input_buffer_size - used_now; + assert(self->input_buffer_size >= used_now); + + if (avail_total < (size_t) data->len) { + char *tmp; + size_t new_size = used_now + data->len; + + /* Allocate with new size */ + tmp = PyMem_Malloc(new_size); + if (tmp == NULL) { + PyErr_NoMemory(); + goto error; + } + + /* Copy unconsumed data to the beginning of new buffer */ + memcpy(tmp, + self->input_buffer + self->in_begin, + used_now); + + /* Switch to new buffer */ + PyMem_Free(self->input_buffer); + self->input_buffer = tmp; + self->input_buffer_size = new_size; + + /* Set begin & end position */ + self->in_begin = 0; + self->in_end = used_now; + } + else if (avail_now < (size_t) data->len) { + /* Move unconsumed data to the beginning. + Overlap is possible, so use memmove(). */ + memmove(self->input_buffer, + self->input_buffer + self->in_begin, + used_now); + + /* Set begin & end position */ + self->in_begin = 0; + self->in_end = used_now; + } + + /* Copy data to input buffer */ + memcpy(self->input_buffer + self->in_end, data->buf, data->len); + self->in_end += data->len; + + in.src = self->input_buffer + self->in_begin; + in.size = used_now + data->len; + in.pos = 0; + } + assert(in.pos == 0); + + /* Decompress */ + ret = decompress_lock_held(self, &in, max_length); + if (ret == NULL) { + goto error; + } + + /* Unconsumed input data */ + if (in.pos == in.size) { + if (Py_SIZE(ret) == max_length || self->eof) { + self->needs_input = 0; + } + else { + self->needs_input = 1; + } + + if (use_input_buffer) { + /* Clear input_buffer */ + self->in_begin = 0; + self->in_end = 0; + } + } + else { + size_t data_size = in.size - in.pos; + + self->needs_input = 0; + + if (!use_input_buffer) { + /* Discard buffer if it's too small + (resizing it may needlessly copy the current contents) */ + if (self->input_buffer != NULL + && self->input_buffer_size < data_size) + { + PyMem_Free(self->input_buffer); + self->input_buffer = NULL; + self->input_buffer_size = 0; + } + + /* Allocate if necessary */ + if (self->input_buffer == NULL) { + self->input_buffer = PyMem_Malloc(data_size); + if (self->input_buffer == NULL) { + PyErr_NoMemory(); + goto error; + } + self->input_buffer_size = data_size; + } + + /* Copy unconsumed data */ + memcpy(self->input_buffer, (char*)in.src + in.pos, data_size); + self->in_begin = 0; + self->in_end = data_size; + } + else { + /* Use input buffer */ + self->in_begin += in.pos; + } + } + + return ret; + +error: + /* Reset decompressor's states/session */ + decompressor_reset_session_lock_held(self); + + Py_CLEAR(ret); + return NULL; +} + + +/*[clinic input] +@classmethod +_zstd.ZstdDecompressor.__new__ as _zstd_ZstdDecompressor_new + zstd_dict: object = None + A ZstdDict object, a pre-trained Zstandard dictionary. + options: object = None + A dict object that contains advanced decompression parameters. + +Create a decompressor object for decompressing data incrementally. + +Thread-safe at method level. For one-shot decompression, use the decompress() +function instead. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict, + PyObject *options) +/*[clinic end generated code: output=590ca65c1102ff4a input=213daa57e3ea4062]*/ +{ + ZstdDecompressor* self = PyObject_GC_New(ZstdDecompressor, type); + if (self == NULL) { + goto error; + } + + self->input_buffer = NULL; + self->input_buffer_size = 0; + self->in_begin = -1; + self->in_end = -1; + self->unused_data = NULL; + self->eof = 0; + self->dict = NULL; + self->lock = (PyMutex){0}; + + /* needs_input flag */ + self->needs_input = 1; + + /* Decompression context */ + self->dctx = ZSTD_createDCtx(); + if (self->dctx == NULL) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state != NULL) { + PyErr_SetString(mod_state->ZstdError, + "Unable to create ZSTD_DCtx instance."); + } + goto error; + } + + /* Load Zstandard dictionary to decompression context */ + if (zstd_dict != Py_None) { + if (_zstd_load_d_dict(self, zstd_dict) < 0) { + goto error; + } + Py_INCREF(zstd_dict); + self->dict = zstd_dict; + } + + /* Set options dictionary */ + if (options != Py_None) { + if (_zstd_set_d_parameters(self, options) < 0) { + goto error; + } + } + + // We can only start GC tracking once self->dict is set. + PyObject_GC_Track(self); + + return (PyObject*)self; + +error: + Py_XDECREF(self); + return NULL; +} + +static void +ZstdDecompressor_dealloc(PyObject *ob) +{ + ZstdDecompressor *self = ZstdDecompressor_CAST(ob); + + PyObject_GC_UnTrack(self); + + /* Free decompression context */ + if (self->dctx) { + ZSTD_freeDCtx(self->dctx); + } + + assert(!PyMutex_IsLocked(&self->lock)); + + /* Py_CLEAR the dict after free decompression context */ + Py_CLEAR(self->dict); + + /* Free unconsumed input data buffer */ + PyMem_Free(self->input_buffer); + + /* Free unused data */ + Py_CLEAR(self->unused_data); + + PyTypeObject *tp = Py_TYPE(self); + PyObject_GC_Del(ob); + Py_DECREF(tp); +} + +/*[clinic input] +@getter +_zstd.ZstdDecompressor.unused_data + +A bytes object of un-consumed input data. + +When ZstdDecompressor object stops after a frame is +decompressed, unused input data after the frame. Otherwise this will be b''. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) +/*[clinic end generated code: output=f3a20940f11b6b09 input=54d41ecd681a3444]*/ +{ + PyObject *ret; + + PyMutex_Lock(&self->lock); + + if (!self->eof) { + PyMutex_Unlock(&self->lock); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); + } + else { + if (self->unused_data == NULL) { + self->unused_data = PyBytes_FromStringAndSize( + self->input_buffer + self->in_begin, + self->in_end - self->in_begin); + ret = self->unused_data; + Py_XINCREF(ret); + } + else { + ret = self->unused_data; + Py_INCREF(ret); + } + } + + PyMutex_Unlock(&self->lock); + return ret; +} + +/*[clinic input] +_zstd.ZstdDecompressor.decompress + + data: Py_buffer + A bytes-like object, Zstandard data to be decompressed. + max_length: Py_ssize_t = -1 + Maximum size of returned data. When it is negative, the size of + output buffer is unlimited. When it is nonnegative, returns at + most max_length bytes of decompressed data. + +Decompress *data*, returning uncompressed bytes if possible, or b'' otherwise. + +If *max_length* is nonnegative, returns at most *max_length* bytes of +decompressed data. If this limit is reached and further output can be +produced, *self.needs_input* will be set to ``False``. In this case, the next +call to *decompress()* may provide *data* as b'' to obtain more of the output. + +If all of the input data was decompressed and returned (either because this +was less than *max_length* bytes, or because *max_length* was negative), +*self.needs_input* will be set to True. + +Attempting to decompress data after the end of a frame is reached raises an +EOFError. Any data found after the end of the frame is ignored and saved in +the self.unused_data attribute. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self, + Py_buffer *data, + Py_ssize_t max_length) +/*[clinic end generated code: output=a4302b3c940dbec6 input=6463dfdf98091caa]*/ +{ + PyObject *ret; + /* Thread-safe code */ + PyMutex_Lock(&self->lock); + ret = stream_decompress_lock_held(self, data, max_length); + PyMutex_Unlock(&self->lock); + return ret; +} + +static PyMethodDef ZstdDecompressor_methods[] = { + _ZSTD_ZSTDDECOMPRESSOR_DECOMPRESS_METHODDEF + {NULL, NULL} +}; + +PyDoc_STRVAR(ZstdDecompressor_eof_doc, +"True means the end of the first frame has been reached. If decompress data\n" +"after that, an EOFError exception will be raised."); + +PyDoc_STRVAR(ZstdDecompressor_needs_input_doc, +"If the max_length output limit in .decompress() method has been reached,\n" +"and the decompressor has (or may has) unconsumed input data, it will be set\n" +"to False. In this case, passing b'' to the .decompress() method may output\n" +"further data."); + +static PyMemberDef ZstdDecompressor_members[] = { + {"eof", Py_T_BOOL, offsetof(ZstdDecompressor, eof), + Py_READONLY, ZstdDecompressor_eof_doc}, + {"needs_input", Py_T_BOOL, offsetof(ZstdDecompressor, needs_input), + Py_READONLY, ZstdDecompressor_needs_input_doc}, + {NULL} +}; + +static PyGetSetDef ZstdDecompressor_getset[] = { + _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF + {NULL} +}; + +static int +ZstdDecompressor_traverse(PyObject *ob, visitproc visit, void *arg) +{ + ZstdDecompressor *self = ZstdDecompressor_CAST(ob); + Py_VISIT(self->dict); + return 0; +} + +static int +ZstdDecompressor_clear(PyObject *ob) +{ + ZstdDecompressor *self = ZstdDecompressor_CAST(ob); + Py_CLEAR(self->dict); + Py_CLEAR(self->unused_data); + return 0; +} + +static PyType_Slot ZstdDecompressor_slots[] = { + {Py_tp_new, _zstd_ZstdDecompressor_new}, + {Py_tp_dealloc, ZstdDecompressor_dealloc}, + {Py_tp_methods, ZstdDecompressor_methods}, + {Py_tp_members, ZstdDecompressor_members}, + {Py_tp_getset, ZstdDecompressor_getset}, + {Py_tp_doc, (void *)_zstd_ZstdDecompressor_new__doc__}, + {Py_tp_traverse, ZstdDecompressor_traverse}, + {Py_tp_clear, ZstdDecompressor_clear}, + {0, 0} +}; + +PyType_Spec zstd_decompressor_type_spec = { + .name = "compression.zstd.ZstdDecompressor", + .basicsize = sizeof(ZstdDecompressor), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE + | Py_TPFLAGS_HAVE_GC, + .slots = ZstdDecompressor_slots, +}; diff --git a/Modules/_zstd/zstddict.c b/Modules/_zstd/zstddict.c new file mode 100644 index 00000000000..14f74aaed46 --- /dev/null +++ b/Modules/_zstd/zstddict.c @@ -0,0 +1,273 @@ +/* Low level interface to the Zstandard algorthm & the zstd library. */ + +/* ZstdDict class definitions */ + +/*[clinic input] +module _zstd +class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dcc175ec974f81c]*/ + +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 +#endif + +#include "Python.h" + +#include "_zstdmodule.h" +#include "clinic/zstddict.c.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked + +#include <zstd.h> // ZSTD_freeDDict(), ZSTD_getDictID_fromDict() + +#define ZstdDict_CAST(op) ((ZstdDict *)op) + +/*[clinic input] +@classmethod +_zstd.ZstdDict.__new__ as _zstd_ZstdDict_new + dict_content: Py_buffer + The content of a Zstandard dictionary as a bytes-like object. + / + * + is_raw: bool = False + If true, perform no checks on *dict_content*, useful for some + advanced cases. Otherwise, check that the content represents + a Zstandard dictionary created by the zstd library or CLI. + +Represents a Zstandard dictionary. + +The dictionary can be used for compression or decompression, and can be shared +by multiple ZstdCompressor or ZstdDecompressor objects. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content, + int is_raw) +/*[clinic end generated code: output=685b7406a48b0949 input=9e8c493e31c98383]*/ +{ + /* All dictionaries must be at least 8 bytes */ + if (dict_content->len < 8) { + PyErr_SetString(PyExc_ValueError, + "Zstandard dictionary content too short " + "(must have at least eight bytes)"); + return NULL; + } + + ZstdDict* self = PyObject_GC_New(ZstdDict, type); + if (self == NULL) { + return NULL; + } + + self->d_dict = NULL; + self->dict_buffer = NULL; + self->dict_id = 0; + self->lock = (PyMutex){0}; + + /* ZSTD_CDict dict */ + self->c_dicts = PyDict_New(); + if (self->c_dicts == NULL) { + goto error; + } + + self->dict_buffer = PyMem_Malloc(dict_content->len); + if (!self->dict_buffer) { + PyErr_NoMemory(); + goto error; + } + memcpy(self->dict_buffer, dict_content->buf, dict_content->len); + self->dict_len = dict_content->len; + + /* Get dict_id, 0 means "raw content" dictionary. */ + self->dict_id = ZSTD_getDictID_fromDict(self->dict_buffer, self->dict_len); + + /* Check validity for ordinary dictionary */ + if (!is_raw && self->dict_id == 0) { + PyErr_SetString(PyExc_ValueError, "invalid Zstandard dictionary"); + goto error; + } + + PyObject_GC_Track(self); + + return (PyObject *)self; + +error: + Py_XDECREF(self); + return NULL; +} + +static void +ZstdDict_dealloc(PyObject *ob) +{ + ZstdDict *self = ZstdDict_CAST(ob); + + PyObject_GC_UnTrack(self); + + /* Free ZSTD_DDict instance */ + if (self->d_dict) { + ZSTD_freeDDict(self->d_dict); + } + + assert(!PyMutex_IsLocked(&self->lock)); + + /* Release dict_buffer after freeing ZSTD_CDict/ZSTD_DDict instances */ + PyMem_Free(self->dict_buffer); + Py_CLEAR(self->c_dicts); + + PyTypeObject *tp = Py_TYPE(self); + tp->tp_free(self); + Py_DECREF(tp); +} + +PyDoc_STRVAR(ZstdDict_dictid_doc, +"the Zstandard dictionary, an int between 0 and 2**32.\n\n" +"A non-zero value represents an ordinary Zstandard dictionary, " +"conforming to the standardised format.\n\n" +"The special value '0' means a 'raw content' dictionary," +"without any restrictions on format or content."); + +static PyObject * +ZstdDict_repr(PyObject *ob) +{ + ZstdDict *dict = ZstdDict_CAST(ob); + return PyUnicode_FromFormat("<ZstdDict dict_id=%u dict_size=%zd>", + (unsigned int)dict->dict_id, dict->dict_len); +} + +static PyMemberDef ZstdDict_members[] = { + {"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, ZstdDict_dictid_doc}, + {NULL} +}; + +/*[clinic input] +@getter +_zstd.ZstdDict.dict_content + +The content of a Zstandard dictionary, as a bytes object. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self) +/*[clinic end generated code: output=0d05caa5b550eabb input=4ed526d1c151c596]*/ +{ + return PyBytes_FromStringAndSize(self->dict_buffer, self->dict_len); +} + +/*[clinic input] +@getter +_zstd.ZstdDict.as_digested_dict + +Load as a digested dictionary to compressor. + +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_digested_dict) + +1. Some advanced compression parameters of compressor may be overridden + by parameters of digested dictionary. +2. ZstdDict has a digested dictionaries cache for each compression level. + It's faster when loading again a digested dictionary with the same + compression level. +3. No need to use this for decompression. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self) +/*[clinic end generated code: output=09b086e7a7320dbb input=ee45e1b4a48f6f2c]*/ +{ + return Py_BuildValue("Oi", self, DICT_TYPE_DIGESTED); +} + +/*[clinic input] +@getter +_zstd.ZstdDict.as_undigested_dict + +Load as an undigested dictionary to compressor. + +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_undigested_dict) + +1. The advanced compression parameters of compressor will not be overridden. +2. Loading an undigested dictionary is costly. If load an undigested dictionary + multiple times, consider reusing a compressor object. +3. No need to use this for decompression. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self) +/*[clinic end generated code: output=43c7a989e6d4253a input=d39210eedec76fed]*/ +{ + return Py_BuildValue("Oi", self, DICT_TYPE_UNDIGESTED); +} + +/*[clinic input] +@getter +_zstd.ZstdDict.as_prefix + +Load as a prefix to compressor/decompressor. + +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_prefix) + +1. Prefix is compatible with long distance matching, while dictionary is not. +2. It only works for the first frame, then the compressor/decompressor will + return to no prefix state. +3. When decompressing, must use the same prefix as when compressing." +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self) +/*[clinic end generated code: output=6f7130c356595a16 input=d59757b0b5a9551a]*/ +{ + return Py_BuildValue("Oi", self, DICT_TYPE_PREFIX); +} + +static PyGetSetDef ZstdDict_getset[] = { + _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF + _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF + _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF + _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF + {NULL} +}; + +static Py_ssize_t +ZstdDict_length(PyObject *ob) +{ + ZstdDict *self = ZstdDict_CAST(ob); + return self->dict_len; +} + +static int +ZstdDict_traverse(PyObject *ob, visitproc visit, void *arg) +{ + ZstdDict *self = ZstdDict_CAST(ob); + Py_VISIT(self->c_dicts); + return 0; +} + +static int +ZstdDict_clear(PyObject *ob) +{ + ZstdDict *self = ZstdDict_CAST(ob); + Py_CLEAR(self->c_dicts); + return 0; +} + +static PyType_Slot zstddict_slots[] = { + {Py_tp_members, ZstdDict_members}, + {Py_tp_getset, ZstdDict_getset}, + {Py_tp_new, _zstd_ZstdDict_new}, + {Py_tp_dealloc, ZstdDict_dealloc}, + {Py_tp_repr, ZstdDict_repr}, + {Py_tp_doc, (void *)_zstd_ZstdDict_new__doc__}, + {Py_sq_length, ZstdDict_length}, + {Py_tp_traverse, ZstdDict_traverse}, + {Py_tp_clear, ZstdDict_clear}, + {0, 0} +}; + +PyType_Spec zstd_dict_type_spec = { + .name = "compression.zstd.ZstdDict", + .basicsize = sizeof(ZstdDict), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE + | Py_TPFLAGS_HAVE_GC, + .slots = zstddict_slots, +}; diff --git a/Modules/_zstd/zstddict.h b/Modules/_zstd/zstddict.h new file mode 100644 index 00000000000..4a403416dbd --- /dev/null +++ b/Modules/_zstd/zstddict.h @@ -0,0 +1,29 @@ +/* Low level interface to the Zstandard algorthm & the zstd library. */ + +#ifndef ZSTD_DICT_H +#define ZSTD_DICT_H + +#include <zstd.h> // ZSTD_DDict + +typedef struct { + PyObject_HEAD + + /* Reusable compress/decompress dictionary, they are created once and + can be shared by multiple threads concurrently, since its usage is + read-only. + c_dicts is a dict, int(compressionLevel):PyCapsule(ZSTD_CDict*) */ + ZSTD_DDict *d_dict; + PyObject *c_dicts; + + /* Dictionary content. */ + char *dict_buffer; + Py_ssize_t dict_len; + + /* Dictionary id */ + uint32_t dict_id; + + /* Lock to protect the digested dictionaries */ + PyMutex lock; +} ZstdDict; + +#endif // !ZSTD_DICT_H |