diff options
Diffstat (limited to 'Modules/_zstd')
-rw-r--r-- | Modules/_zstd/_zstdmodule.c | 499 | ||||
-rw-r--r-- | Modules/_zstd/_zstdmodule.h | 154 | ||||
-rw-r--r-- | Modules/_zstd/buffer.h | 13 | ||||
-rw-r--r-- | Modules/_zstd/clinic/_zstdmodule.c.h | 129 | ||||
-rw-r--r-- | Modules/_zstd/clinic/compressor.c.h | 28 | ||||
-rw-r--r-- | Modules/_zstd/clinic/decompressor.c.h | 22 | ||||
-rw-r--r-- | Modules/_zstd/clinic/zstddict.c.h | 48 | ||||
-rw-r--r-- | Modules/_zstd/compressor.c | 208 | ||||
-rw-r--r-- | Modules/_zstd/decompressor.c | 376 | ||||
-rw-r--r-- | Modules/_zstd/zstddict.c | 177 | ||||
-rw-r--r-- | Modules/_zstd/zstddict.h | 24 |
11 files changed, 633 insertions, 1045 deletions
diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c index 4b14315462b..0294828aa10 100644 --- a/Modules/_zstd/_zstdmodule.c +++ b/Modules/_zstd/_zstdmodule.c @@ -1,13 +1,16 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif +#include "Python.h" + #include "_zstdmodule.h" +#include "zstddict.h" + +#include <zstd.h> // ZSTD_*() +#include <zdict.h> // ZDICT_*() /*[clinic input] module _zstd @@ -28,20 +31,17 @@ set_zstd_error(const _zstd_state* const state, switch (type) { case ERR_DECOMPRESS: - msg = "Unable to decompress zstd data: %s"; + msg = "Unable to decompress Zstandard data: %s"; break; case ERR_COMPRESS: - msg = "Unable to compress zstd data: %s"; - break; - case ERR_SET_PLEDGED_INPUT_SIZE: - msg = "Unable to set pledged uncompressed content size: %s"; + msg = "Unable to compress Zstandard data: %s"; break; case ERR_LOAD_D_DICT: - msg = "Unable to load zstd dictionary or prefix for decompression: %s"; + msg = "Unable to load Zstandard dictionary or prefix for decompression: %s"; break; case ERR_LOAD_C_DICT: - msg = "Unable to load zstd dictionary or prefix for compression: %s"; + msg = "Unable to load Zstandard dictionary or prefix for compression: %s"; break; case ERR_GET_C_BOUNDS: @@ -55,10 +55,10 @@ set_zstd_error(const _zstd_state* const state, break; case ERR_TRAIN_DICT: - msg = "Unable to train zstd dictionary: %s"; + msg = "Unable to train the Zstandard dictionary: %s"; break; case ERR_FINALIZE_DICT: - msg = "Unable to finalize zstd dictionary: %s"; + msg = "Unable to finalize the Zstandard dictionary: %s"; break; default: @@ -72,8 +72,7 @@ typedef struct { char parameter_name[32]; } ParameterInfo; -static const ParameterInfo cp_list[] = -{ +static const ParameterInfo cp_list[] = { {ZSTD_c_compressionLevel, "compression_level"}, {ZSTD_c_windowLog, "window_log"}, {ZSTD_c_hashLog, "hash_log"}, @@ -98,8 +97,7 @@ static const ParameterInfo cp_list[] = {ZSTD_c_overlapLog, "overlap_log"} }; -static const ParameterInfo dp_list[] = -{ +static const ParameterInfo dp_list[] = { {ZSTD_d_windowLogMax, "window_log_max"} }; @@ -151,8 +149,8 @@ set_parameter_error(const _zstd_state* const state, int is_compress, } if (ZSTD_isError(bounds.error)) { PyErr_Format(state->ZstdError, - "Zstd %s parameter \"%s\" is invalid. (zstd v%s)", - type, name, ZSTD_versionString()); + "Invalid zstd %s parameter \"%s\".", + type, name); return; } @@ -160,10 +158,10 @@ set_parameter_error(const _zstd_state* const state, int is_compress, PyErr_Format(state->ZstdError, "Error when setting zstd %s parameter \"%s\", it " "should %d <= value <= %d, provided value is %d. " - "(zstd v%s, %d-bit build)", + "(%d-bit build)", type, name, bounds.lowerBound, bounds.upperBound, value_v, - ZSTD_versionString(), 8*(int)sizeof(Py_ssize_t)); + 8*(int)sizeof(Py_ssize_t)); } static inline _zstd_state* @@ -176,7 +174,7 @@ get_zstd_state(PyObject *module) /*[clinic input] -_zstd._train_dict +_zstd.train_dict samples_bytes: PyBytesObject Concatenation of samples. @@ -186,13 +184,13 @@ _zstd._train_dict The size of the dictionary. / -Internal function, train a zstd dictionary on sample data. +Train a Zstandard dictionary on sample data. [clinic start generated code]*/ static PyObject * -_zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, - PyObject *samples_sizes, Py_ssize_t dict_size) -/*[clinic end generated code: output=b5b4f36347c0addd input=2dce5b57d63923e2]*/ +_zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size) +/*[clinic end generated code: output=8e87fe43935e8f77 input=d20dedb21c72cb62]*/ { // TODO(emmatyping): The preamble and suffix to this function and _finalize_dict // are pretty similar. We should see if we can refactor them to share that code. @@ -257,7 +255,7 @@ _zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, chunk_sizes, (uint32_t)chunks_number); Py_END_ALLOW_THREADS - /* Check zstd dict error */ + /* Check Zstandard dict error */ if (ZDICT_isError(zstd_ret)) { _zstd_state* const mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_TRAIN_DICT, zstd_ret); @@ -280,7 +278,7 @@ success: } /*[clinic input] -_zstd._finalize_dict +_zstd.finalize_dict custom_dict_bytes: PyBytesObject Custom dictionary content. @@ -291,18 +289,18 @@ _zstd._finalize_dict dict_size: Py_ssize_t The size of the dictionary. compression_level: int - Optimize for a specific zstd compression level, 0 means default. + Optimize for a specific Zstandard compression level, 0 means default. / -Internal function, finalize a zstd dictionary. +Finalize a Zstandard dictionary. [clinic start generated code]*/ static PyObject * -_zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, - PyBytesObject *samples_bytes, - PyObject *samples_sizes, Py_ssize_t dict_size, - int compression_level) -/*[clinic end generated code: output=5dc5b520fddba37f input=8afd42a249078460]*/ +_zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, + PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size, + int compression_level) +/*[clinic end generated code: output=f91821ba5ae85bda input=3c7e2480aa08fb56]*/ { Py_ssize_t chunks_number; size_t *chunk_sizes = NULL; @@ -359,7 +357,7 @@ _zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, /* Parameters */ - /* Optimize for a specific zstd compression level, 0 means default. */ + /* Optimize for a specific Zstandard compression level, 0 means default. */ params.compressionLevel = compression_level; /* Write log to stderr, 0 = none. */ params.notificationLevel = 0; @@ -375,7 +373,7 @@ _zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, (uint32_t)chunks_number, params); Py_END_ALLOW_THREADS - /* Check zstd dict error */ + /* Check Zstandard dict error */ if (ZDICT_isError(zstd_ret)) { _zstd_state* const mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_FINALIZE_DICT, zstd_ret); @@ -399,20 +397,19 @@ success: /*[clinic input] -_zstd._get_param_bounds +_zstd.get_param_bounds parameter: int The parameter to get bounds. is_compress: bool True for CompressionParameter, False for DecompressionParameter. -Internal function, get CompressionParameter/DecompressionParameter bounds. +Get CompressionParameter/DecompressionParameter bounds. [clinic start generated code]*/ static PyObject * -_zstd__get_param_bounds_impl(PyObject *module, int parameter, - int is_compress) -/*[clinic end generated code: output=9892cd822f937e79 input=884cd1a01125267d]*/ +_zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress) +/*[clinic end generated code: output=4acf5a876f0620ca input=45742ef0a3531b65]*/ { ZSTD_bounds bound; if (is_compress) { @@ -442,14 +439,12 @@ _zstd.get_frame_size A bytes-like object, it should start from the beginning of a frame, and contains at least one complete frame. -Get the size of a zstd frame, including frame header and 4-byte checksum if it has one. - -It will iterate all blocks' headers within a frame, to accumulate the frame size. +Get the size of a Zstandard frame, including the header and optional checksum. [clinic start generated code]*/ static PyObject * _zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer) -/*[clinic end generated code: output=a7384c2f8780f442 input=7d3ad24311893bf3]*/ +/*[clinic end generated code: output=a7384c2f8780f442 input=3b9f73f8c8129d38]*/ { size_t frame_size; @@ -457,9 +452,9 @@ _zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer) if (ZSTD_isError(frame_size)) { _zstd_state* const mod_state = get_zstd_state(module); PyErr_Format(mod_state->ZstdError, - "Error when finding the compressed size of a zstd frame. " - "Make sure the frame_buffer argument starts from the " - "beginning of a frame, and its length not less than this " + "Error when finding the compressed size of a Zstandard frame. " + "Ensure the frame_buffer argument starts from the " + "beginning of a frame, and its length is not less than this " "complete frame. Zstd error message: %s.", ZSTD_getErrorName(frame_size)); return NULL; @@ -469,17 +464,17 @@ _zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer) } /*[clinic input] -_zstd._get_frame_info +_zstd.get_frame_info frame_buffer: Py_buffer - A bytes-like object, containing the header of a zstd frame. + A bytes-like object, containing the header of a Zstandard frame. -Internal function, get zstd frame infomation from a frame header. +Get Zstandard frame infomation from a frame header. [clinic start generated code]*/ static PyObject * -_zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) -/*[clinic end generated code: output=5462855464ecdf81 input=67f1f8e4b7b89c4d]*/ +_zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) +/*[clinic end generated code: output=56e033cf48001929 input=94b240583ae22ca5]*/ { uint64_t decompressed_size; uint32_t dict_id; @@ -494,9 +489,9 @@ _zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) _zstd_state* const mod_state = get_zstd_state(module); PyErr_SetString(mod_state->ZstdError, "Error when getting information from the header of " - "a zstd frame. Make sure the frame_buffer argument " + "a Zstandard frame. Ensure the frame_buffer argument " "starts from the beginning of a frame, and its length " - "not less than the frame header (6~18 bytes)."); + "is not less than the frame header (6~18 bytes)."); return NULL; } @@ -511,20 +506,20 @@ _zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) } /*[clinic input] -_zstd._set_parameter_types +_zstd.set_parameter_types c_parameter_type: object(subclass_of='&PyType_Type') CompressionParameter IntEnum type object d_parameter_type: object(subclass_of='&PyType_Type') DecompressionParameter IntEnum type object -Internal function, set CompressionParameter/DecompressionParameter types for validity check. +Set CompressionParameter and DecompressionParameter types for validity check. [clinic start generated code]*/ static PyObject * -_zstd__set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, - PyObject *d_parameter_type) -/*[clinic end generated code: output=a13d4890ccbd2873 input=4535545d903853d3]*/ +_zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, + PyObject *d_parameter_type) +/*[clinic end generated code: output=f3313b1294f19502 input=75d7a953580fae5f]*/ { _zstd_state* const mod_state = get_zstd_state(module); @@ -547,289 +542,144 @@ _zstd__set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, } static PyMethodDef _zstd_methods[] = { - _ZSTD__TRAIN_DICT_METHODDEF - _ZSTD__FINALIZE_DICT_METHODDEF - _ZSTD__GET_PARAM_BOUNDS_METHODDEF + _ZSTD_TRAIN_DICT_METHODDEF + _ZSTD_FINALIZE_DICT_METHODDEF + _ZSTD_GET_PARAM_BOUNDS_METHODDEF _ZSTD_GET_FRAME_SIZE_METHODDEF - _ZSTD__GET_FRAME_INFO_METHODDEF - _ZSTD__SET_PARAMETER_TYPES_METHODDEF - - {0} + _ZSTD_GET_FRAME_INFO_METHODDEF + _ZSTD_SET_PARAMETER_TYPES_METHODDEF + {NULL, NULL} }; - -#define ADD_INT_PREFIX_MACRO(module, macro) \ - do { \ - if (PyModule_AddIntConstant(module, "_" #macro, macro) < 0) { \ - return -1; \ - } \ - } while(0) - static int -add_parameters(PyObject *module) -{ - /* If add new parameters, please also add to cp_list/dp_list above. */ - - /* Compression parameters */ - ADD_INT_PREFIX_MACRO(module, ZSTD_c_compressionLevel); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_windowLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_hashLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_chainLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_searchLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_minMatch); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_targetLength); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_strategy); - - ADD_INT_PREFIX_MACRO(module, ZSTD_c_enableLongDistanceMatching); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmMinMatch); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmBucketSizeLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashRateLog); - - ADD_INT_PREFIX_MACRO(module, ZSTD_c_contentSizeFlag); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_checksumFlag); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_dictIDFlag); - - ADD_INT_PREFIX_MACRO(module, ZSTD_c_nbWorkers); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_jobSize); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_overlapLog); - - /* Decompression parameters */ - ADD_INT_PREFIX_MACRO(module, ZSTD_d_windowLogMax); - - /* ZSTD_strategy enum */ - ADD_INT_PREFIX_MACRO(module, ZSTD_fast); - ADD_INT_PREFIX_MACRO(module, ZSTD_dfast); - ADD_INT_PREFIX_MACRO(module, ZSTD_greedy); - ADD_INT_PREFIX_MACRO(module, ZSTD_lazy); - ADD_INT_PREFIX_MACRO(module, ZSTD_lazy2); - ADD_INT_PREFIX_MACRO(module, ZSTD_btlazy2); - ADD_INT_PREFIX_MACRO(module, ZSTD_btopt); - ADD_INT_PREFIX_MACRO(module, ZSTD_btultra); - ADD_INT_PREFIX_MACRO(module, ZSTD_btultra2); - - return 0; -} - -static inline PyObject * -get_zstd_version_info(void) +_zstd_exec(PyObject *m) { - uint32_t ver = ZSTD_versionNumber(); - uint32_t major, minor, release; - - major = ver / 10000; - minor = (ver / 100) % 100; - release = ver % 100; - - return Py_BuildValue("III", major, minor, release); -} - -static inline int -add_vars_to_module(PyObject *module) -{ - PyObject *obj; - - /* zstd_version, a str. */ - if (PyModule_AddStringConstant(module, "zstd_version", - ZSTD_versionString()) < 0) { - return -1; - } - - /* zstd_version_info, a tuple. */ - obj = get_zstd_version_info(); - if (PyModule_AddObjectRef(module, "zstd_version_info", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - Py_DECREF(obj); - - /* Add zstd parameters */ - if (add_parameters(module) < 0) { - return -1; - } - - /* _compressionLevel_values: (default, min, max) - ZSTD_defaultCLevel() was added in zstd v1.5.0 */ - obj = Py_BuildValue("iii", -#if ZSTD_VERSION_NUMBER < 10500 - ZSTD_CLEVEL_DEFAULT, -#else - ZSTD_defaultCLevel(), -#endif - ZSTD_minCLevel(), - ZSTD_maxCLevel()); - if (PyModule_AddObjectRef(module, - "_compressionLevel_values", - obj) < 0) { - Py_XDECREF(obj); - return -1; - } - Py_DECREF(obj); - - /* _ZSTD_CStreamSizes */ - obj = Py_BuildValue("II", - (uint32_t)ZSTD_CStreamInSize(), - (uint32_t)ZSTD_CStreamOutSize()); - if (PyModule_AddObjectRef(module, "_ZSTD_CStreamSizes", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - Py_DECREF(obj); - - /* _ZSTD_DStreamSizes */ - obj = Py_BuildValue("II", - (uint32_t)ZSTD_DStreamInSize(), - (uint32_t)ZSTD_DStreamOutSize()); - if (PyModule_AddObjectRef(module, "_ZSTD_DStreamSizes", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - Py_DECREF(obj); - - /* _ZSTD_CONFIG */ - obj = Py_BuildValue("isOOO", 8*(int)sizeof(Py_ssize_t), "c", - Py_False, - Py_True, -/* User mremap output buffer */ -#if defined(HAVE_MREMAP) - Py_True -#else - Py_False -#endif - ); - if (PyModule_AddObjectRef(module, "_ZSTD_CONFIG", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - Py_DECREF(obj); - - return 0; -} - -#define ADD_STR_TO_STATE_MACRO(STR) \ - do { \ - mod_state->str_##STR = PyUnicode_FromString(#STR); \ - if (mod_state->str_##STR == NULL) { \ - return -1; \ - } \ - } while(0) - -static inline int -add_type_to_module(PyObject *module, const char *name, - PyType_Spec *type_spec, PyTypeObject **dest) -{ - PyObject *temp = PyType_FromModuleAndSpec(module, type_spec, NULL); - - if (PyModule_AddObjectRef(module, name, temp) < 0) { - Py_XDECREF(temp); - return -1; - } - - *dest = (PyTypeObject*) temp; - - return 0; -} - -static inline int -add_constant_to_type(PyTypeObject *type, const char *name, long value) -{ - PyObject *temp; - - temp = PyLong_FromLong(value); - if (temp == NULL) { - return -1; - } - - int rc = PyObject_SetAttrString((PyObject*) type, name, temp); - Py_DECREF(temp); - return rc; -} - -static int _zstd_exec(PyObject *module) { - _zstd_state* const mod_state = get_zstd_state(module); +#define ADD_TYPE(TYPE, SPEC) \ +do { \ + TYPE = (PyTypeObject *)PyType_FromModuleAndSpec(m, &(SPEC), NULL); \ + if (TYPE == NULL) { \ + return -1; \ + } \ + if (PyModule_AddType(m, TYPE) < 0) { \ + return -1; \ + } \ +} while (0) + +#define ADD_INT_MACRO(MACRO) \ + if (PyModule_AddIntConstant((m), #MACRO, (MACRO)) < 0) { \ + return -1; \ + } + +#define ADD_INT_CONST_TO_TYPE(TYPE, NAME, VALUE) \ +do { \ + PyObject *v = PyLong_FromLong((VALUE)); \ + if (v == NULL || PyObject_SetAttrString((PyObject *)(TYPE), \ + (NAME), v) < 0) { \ + Py_XDECREF(v); \ + return -1; \ + } \ + Py_DECREF(v); \ +} while (0) + + _zstd_state* const mod_state = get_zstd_state(m); /* Reusable objects & variables */ - mod_state->empty_bytes = PyBytes_FromStringAndSize(NULL, 0); - if (mod_state->empty_bytes == NULL) { - return -1; - } - - mod_state->empty_readonly_memoryview = - PyMemoryView_FromMemory((char*)mod_state, 0, PyBUF_READ); - if (mod_state->empty_readonly_memoryview == NULL) { - return -1; - } - - /* Add str to module state */ - ADD_STR_TO_STATE_MACRO(read); - ADD_STR_TO_STATE_MACRO(readinto); - ADD_STR_TO_STATE_MACRO(write); - ADD_STR_TO_STATE_MACRO(flush); - mod_state->CParameter_type = NULL; mod_state->DParameter_type = NULL; - /* Add variables to module */ - if (add_vars_to_module(module) < 0) { - return -1; - } - - /* ZstdError */ + /* Create and add heap types */ + ADD_TYPE(mod_state->ZstdDict_type, zstd_dict_type_spec); + ADD_TYPE(mod_state->ZstdCompressor_type, zstd_compressor_type_spec); + ADD_TYPE(mod_state->ZstdDecompressor_type, zstd_decompressor_type_spec); mod_state->ZstdError = PyErr_NewExceptionWithDoc( - "_zstd.ZstdError", - "Call to the underlying zstd library failed.", - NULL, NULL); + "compression.zstd.ZstdError", + "An error occurred in the zstd library.", + NULL, NULL); if (mod_state->ZstdError == NULL) { return -1; } - - if (PyModule_AddObjectRef(module, "ZstdError", mod_state->ZstdError) < 0) { + if (PyModule_AddType(m, (PyTypeObject *)mod_state->ZstdError) < 0) { Py_DECREF(mod_state->ZstdError); return -1; } - /* ZstdDict */ - if (add_type_to_module(module, - "ZstdDict", - &zstddict_type_spec, - &mod_state->ZstdDict_type) < 0) { + /* Add constants */ + if (PyModule_AddIntConstant(m, "zstd_version_number", + ZSTD_versionNumber()) < 0) { return -1; } - // ZstdCompressor - if (add_type_to_module(module, - "ZstdCompressor", - &zstdcompressor_type_spec, - &mod_state->ZstdCompressor_type) < 0) { - return -1; - } - - // Add EndDirective enum to ZstdCompressor - if (add_constant_to_type(mod_state->ZstdCompressor_type, - "CONTINUE", - ZSTD_e_continue) < 0) { + if (PyModule_AddStringConstant(m, "zstd_version", + ZSTD_versionString()) < 0) { return -1; } - if (add_constant_to_type(mod_state->ZstdCompressor_type, - "FLUSH_BLOCK", - ZSTD_e_flush) < 0) { +#if ZSTD_VERSION_NUMBER >= 10500 + if (PyModule_AddIntConstant(m, "ZSTD_CLEVEL_DEFAULT", + ZSTD_defaultCLevel()) < 0) { return -1; } +#else + ADD_INT_MACRO(ZSTD_CLEVEL_DEFAULT); +#endif - if (add_constant_to_type(mod_state->ZstdCompressor_type, - "FLUSH_FRAME", - ZSTD_e_end) < 0) { + if (PyModule_Add(m, "ZSTD_DStreamOutSize", + PyLong_FromSize_t(ZSTD_DStreamOutSize())) < 0) { return -1; } - // ZstdDecompressor - if (add_type_to_module(module, - "ZstdDecompressor", - &zstddecompressor_type_spec, - &mod_state->ZstdDecompressor_type) < 0) { - return -1; - } + /* Add zstd compression parameters. All should also be in cp_list. */ + ADD_INT_MACRO(ZSTD_c_compressionLevel); + ADD_INT_MACRO(ZSTD_c_windowLog); + ADD_INT_MACRO(ZSTD_c_hashLog); + ADD_INT_MACRO(ZSTD_c_chainLog); + ADD_INT_MACRO(ZSTD_c_searchLog); + ADD_INT_MACRO(ZSTD_c_minMatch); + ADD_INT_MACRO(ZSTD_c_targetLength); + ADD_INT_MACRO(ZSTD_c_strategy); + + ADD_INT_MACRO(ZSTD_c_enableLongDistanceMatching); + ADD_INT_MACRO(ZSTD_c_ldmHashLog); + ADD_INT_MACRO(ZSTD_c_ldmMinMatch); + ADD_INT_MACRO(ZSTD_c_ldmBucketSizeLog); + ADD_INT_MACRO(ZSTD_c_ldmHashRateLog); + + ADD_INT_MACRO(ZSTD_c_contentSizeFlag); + ADD_INT_MACRO(ZSTD_c_checksumFlag); + ADD_INT_MACRO(ZSTD_c_dictIDFlag); + + ADD_INT_MACRO(ZSTD_c_nbWorkers); + ADD_INT_MACRO(ZSTD_c_jobSize); + ADD_INT_MACRO(ZSTD_c_overlapLog); + + /* Add zstd decompression parameters. All should also be in dp_list. */ + ADD_INT_MACRO(ZSTD_d_windowLogMax); + + /* Add ZSTD_strategy enum members */ + ADD_INT_MACRO(ZSTD_fast); + ADD_INT_MACRO(ZSTD_dfast); + ADD_INT_MACRO(ZSTD_greedy); + ADD_INT_MACRO(ZSTD_lazy); + ADD_INT_MACRO(ZSTD_lazy2); + ADD_INT_MACRO(ZSTD_btlazy2); + ADD_INT_MACRO(ZSTD_btopt); + ADD_INT_MACRO(ZSTD_btultra); + ADD_INT_MACRO(ZSTD_btultra2); + + /* Add ZSTD_EndDirective enum members to ZstdCompressor */ + ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type, + "CONTINUE", ZSTD_e_continue); + ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type, + "FLUSH_BLOCK", ZSTD_e_flush); + ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type, + "FLUSH_FRAME", ZSTD_e_end); + + /* Make ZstdCompressor immutable (set Py_TPFLAGS_IMMUTABLETYPE) */ + PyType_Freeze(mod_state->ZstdCompressor_type); + +#undef ADD_TYPE +#undef ADD_INT_MACRO +#undef ADD_ZSTD_COMPRESSOR_INT_CONST return 0; } @@ -839,13 +689,6 @@ _zstd_traverse(PyObject *module, visitproc visit, void *arg) { _zstd_state* const mod_state = get_zstd_state(module); - Py_VISIT(mod_state->empty_bytes); - Py_VISIT(mod_state->empty_readonly_memoryview); - Py_VISIT(mod_state->str_read); - Py_VISIT(mod_state->str_readinto); - Py_VISIT(mod_state->str_write); - Py_VISIT(mod_state->str_flush); - Py_VISIT(mod_state->ZstdDict_type); Py_VISIT(mod_state->ZstdCompressor_type); @@ -863,13 +706,6 @@ _zstd_clear(PyObject *module) { _zstd_state* const mod_state = get_zstd_state(module); - Py_CLEAR(mod_state->empty_bytes); - Py_CLEAR(mod_state->empty_readonly_memoryview); - Py_CLEAR(mod_state->str_read); - Py_CLEAR(mod_state->str_readinto); - Py_CLEAR(mod_state->str_write); - Py_CLEAR(mod_state->str_flush); - Py_CLEAR(mod_state->ZstdDict_type); Py_CLEAR(mod_state->ZstdCompressor_type); @@ -895,15 +731,16 @@ static struct PyModuleDef_Slot _zstd_slots[] = { {0, NULL}, }; -struct PyModuleDef _zstdmodule = { - PyModuleDef_HEAD_INIT, +static struct PyModuleDef _zstdmodule = { + .m_base = PyModuleDef_HEAD_INIT, .m_name = "_zstd", + .m_doc = "Implementation module for Zstandard compression.", .m_size = sizeof(_zstd_state), .m_slots = _zstd_slots, .m_methods = _zstd_methods, .m_traverse = _zstd_traverse, .m_clear = _zstd_clear, - .m_free = _zstd_free + .m_free = _zstd_free, }; PyMODINIT_FUNC diff --git a/Modules/_zstd/_zstdmodule.h b/Modules/_zstd/_zstdmodule.h index 120fe9f84c7..b36486442c6 100644 --- a/Modules/_zstd/_zstdmodule.h +++ b/Modules/_zstd/_zstdmodule.h @@ -1,143 +1,30 @@ -#pragma once -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ /* Declarations shared between different parts of the _zstd module*/ -#include "Python.h" +#ifndef ZSTD_MODULE_H +#define ZSTD_MODULE_H -#include "zstd.h" -#include "zdict.h" - - -/* Forward declaration of module state */ -typedef struct _zstd_state _zstd_state; - -/* Forward reference of module def */ -extern PyModuleDef _zstdmodule; - -/* For clinic type calculations */ -static inline _zstd_state * -get_zstd_state_from_type(PyTypeObject *type) { - PyObject *module = PyType_GetModuleByDef(type, &_zstdmodule); - if (module == NULL) { - return NULL; - } - void *state = PyModule_GetState(module); - assert(state != NULL); - return (_zstd_state *)state; -} - -extern PyType_Spec zstddict_type_spec; -extern PyType_Spec zstdcompressor_type_spec; -extern PyType_Spec zstddecompressor_type_spec; - -struct _zstd_state { - PyObject *empty_bytes; - PyObject *empty_readonly_memoryview; - PyObject *str_read; - PyObject *str_readinto; - PyObject *str_write; - PyObject *str_flush; +/* Type specs */ +extern PyType_Spec zstd_dict_type_spec; +extern PyType_Spec zstd_compressor_type_spec; +extern PyType_Spec zstd_decompressor_type_spec; +typedef struct { + /* Module heap types. */ PyTypeObject *ZstdDict_type; PyTypeObject *ZstdCompressor_type; PyTypeObject *ZstdDecompressor_type; PyObject *ZstdError; + /* enum types set by set_parameter_types. */ PyTypeObject *CParameter_type; PyTypeObject *DParameter_type; -}; - -typedef struct { - PyObject_HEAD - - /* Reusable compress/decompress dictionary, they are created once and - can be shared by multiple threads concurrently, since its usage is - read-only. - c_dicts is a dict, int(compressionLevel):PyCapsule(ZSTD_CDict*) */ - ZSTD_DDict *d_dict; - PyObject *c_dicts; - - /* Content of the dictionary, bytes object. */ - PyObject *dict_content; - /* Dictionary id */ - uint32_t dict_id; - - /* __init__ has been called, 0 or 1. */ - int inited; -} ZstdDict; - -typedef struct { - PyObject_HEAD - - /* Compression context */ - ZSTD_CCtx *cctx; - - /* ZstdDict object in use */ - PyObject *dict; - - /* Last mode, initialized to ZSTD_e_end */ - int last_mode; - - /* (nbWorker >= 1) ? 1 : 0 */ - int use_multithread; - - /* Compression level */ - int compression_level; - - /* __init__ has been called, 0 or 1. */ - int inited; -} ZstdCompressor; - -typedef struct { - PyObject_HEAD - - /* Decompression context */ - ZSTD_DCtx *dctx; - - /* ZstdDict object in use */ - PyObject *dict; - - /* Unconsumed input data */ - char *input_buffer; - size_t input_buffer_size; - size_t in_begin, in_end; - - /* Unused data */ - PyObject *unused_data; - - /* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */ - char needs_input; - - /* For decompress(), 0 or 1. - 1 when both input and output streams are at a frame edge, means a - frame is completely decoded and fully flushed, or the decompressor - just be initialized. */ - char at_frame_edge; - - /* For ZstdDecompressor, 0 or 1. - 1 means the end of the first frame has been reached. */ - char eof; - - /* Used for fast reset above three variables */ - char _unused_char_for_align; - - /* __init__ has been called, 0 or 1. */ - int inited; -} ZstdDecompressor; - -typedef enum { - TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class - TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function -} decompress_type; +} _zstd_state; typedef enum { ERR_DECOMPRESS, ERR_COMPRESS, - ERR_SET_PLEDGED_INPUT_SIZE, ERR_LOAD_D_DICT, ERR_LOAD_C_DICT, @@ -147,7 +34,7 @@ typedef enum { ERR_SET_C_LEVEL, ERR_TRAIN_DICT, - ERR_FINALIZE_DICT + ERR_FINALIZE_DICT, } error_type; typedef enum { @@ -156,11 +43,6 @@ typedef enum { DICT_TYPE_PREFIX = 2 } dictionary_type; -static inline int -mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out) { - return in->size == in->pos && out->size != out->pos; -} - /* Format error message and set ZstdError. */ extern void set_zstd_error(const _zstd_state* const state, @@ -170,14 +52,4 @@ extern void set_parameter_error(const _zstd_state* const state, int is_compress, int key_v, int value_v); -static const char init_twice_msg[] = "__init__ method is called twice."; - -extern PyObject * -decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, - Py_ssize_t max_length, - Py_ssize_t initial_size, - decompress_type type); - -extern PyObject * -compress_impl(ZstdCompressor *self, Py_buffer *data, - ZSTD_EndDirective end_directive); +#endif // !ZSTD_MODULE_H diff --git a/Modules/_zstd/buffer.h b/Modules/_zstd/buffer.h index 319b1214833..bff3a81d8aa 100644 --- a/Modules/_zstd/buffer.h +++ b/Modules/_zstd/buffer.h @@ -1,11 +1,12 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ + +#ifndef ZSTD_BUFFER_H +#define ZSTD_BUFFER_H -#include "_zstdmodule.h" #include "pycore_blocks_output_buffer.h" +#include <zstd.h> // ZSTD_outBuffer + /* Blocks output buffer wrapper code */ /* Initialize the buffer, and grow the buffer. @@ -102,3 +103,5 @@ _OutputBuffer_ReachedMaxLength(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob) return buffer->allocated == buffer->max_length; } + +#endif // !ZSTD_BUFFER_H diff --git a/Modules/_zstd/clinic/_zstdmodule.c.h b/Modules/_zstd/clinic/_zstdmodule.c.h index 2f8225389b7..766e1cfa776 100644 --- a/Modules/_zstd/clinic/_zstdmodule.c.h +++ b/Modules/_zstd/clinic/_zstdmodule.c.h @@ -9,11 +9,11 @@ preserve #include "pycore_abstract.h" // _PyNumber_Index() #include "pycore_modsupport.h" // _PyArg_CheckPositional() -PyDoc_STRVAR(_zstd__train_dict__doc__, -"_train_dict($module, samples_bytes, samples_sizes, dict_size, /)\n" +PyDoc_STRVAR(_zstd_train_dict__doc__, +"train_dict($module, samples_bytes, samples_sizes, dict_size, /)\n" "--\n" "\n" -"Internal function, train a zstd dictionary on sample data.\n" +"Train a Zstandard dictionary on sample data.\n" "\n" " samples_bytes\n" " Concatenation of samples.\n" @@ -22,31 +22,31 @@ PyDoc_STRVAR(_zstd__train_dict__doc__, " dict_size\n" " The size of the dictionary."); -#define _ZSTD__TRAIN_DICT_METHODDEF \ - {"_train_dict", _PyCFunction_CAST(_zstd__train_dict), METH_FASTCALL, _zstd__train_dict__doc__}, +#define _ZSTD_TRAIN_DICT_METHODDEF \ + {"train_dict", _PyCFunction_CAST(_zstd_train_dict), METH_FASTCALL, _zstd_train_dict__doc__}, static PyObject * -_zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, - PyObject *samples_sizes, Py_ssize_t dict_size); +_zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size); static PyObject * -_zstd__train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +_zstd_train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; PyBytesObject *samples_bytes; PyObject *samples_sizes; Py_ssize_t dict_size; - if (!_PyArg_CheckPositional("_train_dict", nargs, 3, 3)) { + if (!_PyArg_CheckPositional("train_dict", nargs, 3, 3)) { goto exit; } if (!PyBytes_Check(args[0])) { - _PyArg_BadArgument("_train_dict", "argument 1", "bytes", args[0]); + _PyArg_BadArgument("train_dict", "argument 1", "bytes", args[0]); goto exit; } samples_bytes = (PyBytesObject *)args[0]; if (!PyTuple_Check(args[1])) { - _PyArg_BadArgument("_train_dict", "argument 2", "tuple", args[1]); + _PyArg_BadArgument("train_dict", "argument 2", "tuple", args[1]); goto exit; } samples_sizes = args[1]; @@ -62,18 +62,18 @@ _zstd__train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } dict_size = ival; } - return_value = _zstd__train_dict_impl(module, samples_bytes, samples_sizes, dict_size); + return_value = _zstd_train_dict_impl(module, samples_bytes, samples_sizes, dict_size); exit: return return_value; } -PyDoc_STRVAR(_zstd__finalize_dict__doc__, -"_finalize_dict($module, custom_dict_bytes, samples_bytes,\n" -" samples_sizes, dict_size, compression_level, /)\n" +PyDoc_STRVAR(_zstd_finalize_dict__doc__, +"finalize_dict($module, custom_dict_bytes, samples_bytes, samples_sizes,\n" +" dict_size, compression_level, /)\n" "--\n" "\n" -"Internal function, finalize a zstd dictionary.\n" +"Finalize a Zstandard dictionary.\n" "\n" " custom_dict_bytes\n" " Custom dictionary content.\n" @@ -84,19 +84,19 @@ PyDoc_STRVAR(_zstd__finalize_dict__doc__, " dict_size\n" " The size of the dictionary.\n" " compression_level\n" -" Optimize for a specific zstd compression level, 0 means default."); +" Optimize for a specific Zstandard compression level, 0 means default."); -#define _ZSTD__FINALIZE_DICT_METHODDEF \ - {"_finalize_dict", _PyCFunction_CAST(_zstd__finalize_dict), METH_FASTCALL, _zstd__finalize_dict__doc__}, +#define _ZSTD_FINALIZE_DICT_METHODDEF \ + {"finalize_dict", _PyCFunction_CAST(_zstd_finalize_dict), METH_FASTCALL, _zstd_finalize_dict__doc__}, static PyObject * -_zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, - PyBytesObject *samples_bytes, - PyObject *samples_sizes, Py_ssize_t dict_size, - int compression_level); +_zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, + PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size, + int compression_level); static PyObject * -_zstd__finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +_zstd_finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; PyBytesObject *custom_dict_bytes; @@ -105,21 +105,21 @@ _zstd__finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) Py_ssize_t dict_size; int compression_level; - if (!_PyArg_CheckPositional("_finalize_dict", nargs, 5, 5)) { + if (!_PyArg_CheckPositional("finalize_dict", nargs, 5, 5)) { goto exit; } if (!PyBytes_Check(args[0])) { - _PyArg_BadArgument("_finalize_dict", "argument 1", "bytes", args[0]); + _PyArg_BadArgument("finalize_dict", "argument 1", "bytes", args[0]); goto exit; } custom_dict_bytes = (PyBytesObject *)args[0]; if (!PyBytes_Check(args[1])) { - _PyArg_BadArgument("_finalize_dict", "argument 2", "bytes", args[1]); + _PyArg_BadArgument("finalize_dict", "argument 2", "bytes", args[1]); goto exit; } samples_bytes = (PyBytesObject *)args[1]; if (!PyTuple_Check(args[2])) { - _PyArg_BadArgument("_finalize_dict", "argument 3", "tuple", args[2]); + _PyArg_BadArgument("finalize_dict", "argument 3", "tuple", args[2]); goto exit; } samples_sizes = args[2]; @@ -139,32 +139,31 @@ _zstd__finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (compression_level == -1 && PyErr_Occurred()) { goto exit; } - return_value = _zstd__finalize_dict_impl(module, custom_dict_bytes, samples_bytes, samples_sizes, dict_size, compression_level); + return_value = _zstd_finalize_dict_impl(module, custom_dict_bytes, samples_bytes, samples_sizes, dict_size, compression_level); exit: return return_value; } -PyDoc_STRVAR(_zstd__get_param_bounds__doc__, -"_get_param_bounds($module, /, parameter, is_compress)\n" +PyDoc_STRVAR(_zstd_get_param_bounds__doc__, +"get_param_bounds($module, /, parameter, is_compress)\n" "--\n" "\n" -"Internal function, get CompressionParameter/DecompressionParameter bounds.\n" +"Get CompressionParameter/DecompressionParameter bounds.\n" "\n" " parameter\n" " The parameter to get bounds.\n" " is_compress\n" " True for CompressionParameter, False for DecompressionParameter."); -#define _ZSTD__GET_PARAM_BOUNDS_METHODDEF \ - {"_get_param_bounds", _PyCFunction_CAST(_zstd__get_param_bounds), METH_FASTCALL|METH_KEYWORDS, _zstd__get_param_bounds__doc__}, +#define _ZSTD_GET_PARAM_BOUNDS_METHODDEF \ + {"get_param_bounds", _PyCFunction_CAST(_zstd_get_param_bounds), METH_FASTCALL|METH_KEYWORDS, _zstd_get_param_bounds__doc__}, static PyObject * -_zstd__get_param_bounds_impl(PyObject *module, int parameter, - int is_compress); +_zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress); static PyObject * -_zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_zstd_get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -190,7 +189,7 @@ _zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t narg static const char * const _keywords[] = {"parameter", "is_compress", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .fname = "_get_param_bounds", + .fname = "get_param_bounds", .kwtuple = KWTUPLE, }; #undef KWTUPLE @@ -211,7 +210,7 @@ _zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t narg if (is_compress < 0) { goto exit; } - return_value = _zstd__get_param_bounds_impl(module, parameter, is_compress); + return_value = _zstd_get_param_bounds_impl(module, parameter, is_compress); exit: return return_value; @@ -221,13 +220,11 @@ PyDoc_STRVAR(_zstd_get_frame_size__doc__, "get_frame_size($module, /, frame_buffer)\n" "--\n" "\n" -"Get the size of a zstd frame, including frame header and 4-byte checksum if it has one.\n" +"Get the size of a Zstandard frame, including the header and optional checksum.\n" "\n" " frame_buffer\n" " A bytes-like object, it should start from the beginning of a frame,\n" -" and contains at least one complete frame.\n" -"\n" -"It will iterate all blocks\' headers within a frame, to accumulate the frame size."); +" and contains at least one complete frame."); #define _ZSTD_GET_FRAME_SIZE_METHODDEF \ {"get_frame_size", _PyCFunction_CAST(_zstd_get_frame_size), METH_FASTCALL|METH_KEYWORDS, _zstd_get_frame_size__doc__}, @@ -288,23 +285,23 @@ exit: return return_value; } -PyDoc_STRVAR(_zstd__get_frame_info__doc__, -"_get_frame_info($module, /, frame_buffer)\n" +PyDoc_STRVAR(_zstd_get_frame_info__doc__, +"get_frame_info($module, /, frame_buffer)\n" "--\n" "\n" -"Internal function, get zstd frame infomation from a frame header.\n" +"Get Zstandard frame infomation from a frame header.\n" "\n" " frame_buffer\n" -" A bytes-like object, containing the header of a zstd frame."); +" A bytes-like object, containing the header of a Zstandard frame."); -#define _ZSTD__GET_FRAME_INFO_METHODDEF \ - {"_get_frame_info", _PyCFunction_CAST(_zstd__get_frame_info), METH_FASTCALL|METH_KEYWORDS, _zstd__get_frame_info__doc__}, +#define _ZSTD_GET_FRAME_INFO_METHODDEF \ + {"get_frame_info", _PyCFunction_CAST(_zstd_get_frame_info), METH_FASTCALL|METH_KEYWORDS, _zstd_get_frame_info__doc__}, static PyObject * -_zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer); +_zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer); static PyObject * -_zstd__get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_zstd_get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -330,7 +327,7 @@ _zstd__get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, static const char * const _keywords[] = {"frame_buffer", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .fname = "_get_frame_info", + .fname = "get_frame_info", .kwtuple = KWTUPLE, }; #undef KWTUPLE @@ -345,7 +342,7 @@ _zstd__get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, if (PyObject_GetBuffer(args[0], &frame_buffer, PyBUF_SIMPLE) != 0) { goto exit; } - return_value = _zstd__get_frame_info_impl(module, &frame_buffer); + return_value = _zstd_get_frame_info_impl(module, &frame_buffer); exit: /* Cleanup for frame_buffer */ @@ -356,26 +353,26 @@ exit: return return_value; } -PyDoc_STRVAR(_zstd__set_parameter_types__doc__, -"_set_parameter_types($module, /, c_parameter_type, d_parameter_type)\n" +PyDoc_STRVAR(_zstd_set_parameter_types__doc__, +"set_parameter_types($module, /, c_parameter_type, d_parameter_type)\n" "--\n" "\n" -"Internal function, set CompressionParameter/DecompressionParameter types for validity check.\n" +"Set CompressionParameter and DecompressionParameter types for validity check.\n" "\n" " c_parameter_type\n" " CompressionParameter IntEnum type object\n" " d_parameter_type\n" " DecompressionParameter IntEnum type object"); -#define _ZSTD__SET_PARAMETER_TYPES_METHODDEF \ - {"_set_parameter_types", _PyCFunction_CAST(_zstd__set_parameter_types), METH_FASTCALL|METH_KEYWORDS, _zstd__set_parameter_types__doc__}, +#define _ZSTD_SET_PARAMETER_TYPES_METHODDEF \ + {"set_parameter_types", _PyCFunction_CAST(_zstd_set_parameter_types), METH_FASTCALL|METH_KEYWORDS, _zstd_set_parameter_types__doc__}, static PyObject * -_zstd__set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, - PyObject *d_parameter_type); +_zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, + PyObject *d_parameter_type); static PyObject * -_zstd__set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_zstd_set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -401,7 +398,7 @@ _zstd__set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t n static const char * const _keywords[] = {"c_parameter_type", "d_parameter_type", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .fname = "_set_parameter_types", + .fname = "set_parameter_types", .kwtuple = KWTUPLE, }; #undef KWTUPLE @@ -415,18 +412,18 @@ _zstd__set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t n goto exit; } if (!PyObject_TypeCheck(args[0], &PyType_Type)) { - _PyArg_BadArgument("_set_parameter_types", "argument 'c_parameter_type'", (&PyType_Type)->tp_name, args[0]); + _PyArg_BadArgument("set_parameter_types", "argument 'c_parameter_type'", (&PyType_Type)->tp_name, args[0]); goto exit; } c_parameter_type = args[0]; if (!PyObject_TypeCheck(args[1], &PyType_Type)) { - _PyArg_BadArgument("_set_parameter_types", "argument 'd_parameter_type'", (&PyType_Type)->tp_name, args[1]); + _PyArg_BadArgument("set_parameter_types", "argument 'd_parameter_type'", (&PyType_Type)->tp_name, args[1]); goto exit; } d_parameter_type = args[1]; - return_value = _zstd__set_parameter_types_impl(module, c_parameter_type, d_parameter_type); + return_value = _zstd_set_parameter_types_impl(module, c_parameter_type, d_parameter_type); exit: return return_value; } -/*[clinic end generated code: output=189c462236a7096c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=437b084f149e68e5 input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/compressor.c.h b/Modules/_zstd/clinic/compressor.c.h index d7909cdf89f..f69161b590e 100644 --- a/Modules/_zstd/clinic/compressor.c.h +++ b/Modules/_zstd/clinic/compressor.c.h @@ -8,30 +8,30 @@ preserve #endif #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() -PyDoc_STRVAR(_zstd_ZstdCompressor___init____doc__, +PyDoc_STRVAR(_zstd_ZstdCompressor_new__doc__, "ZstdCompressor(level=None, options=None, zstd_dict=None)\n" "--\n" "\n" "Create a compressor object for compressing data incrementally.\n" "\n" " level\n" -" The compression level to use, defaults to ZSTD_CLEVEL_DEFAULT.\n" +" The compression level to use. Defaults to COMPRESSION_LEVEL_DEFAULT.\n" " options\n" " A dict object that contains advanced compression parameters.\n" " zstd_dict\n" -" A ZstdDict object, a pre-trained zstd dictionary.\n" +" A ZstdDict object, a pre-trained Zstandard dictionary.\n" "\n" "Thread-safe at method level. For one-shot compression, use the compress()\n" "function instead."); -static int -_zstd_ZstdCompressor___init___impl(ZstdCompressor *self, PyObject *level, - PyObject *options, PyObject *zstd_dict); +static PyObject * +_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, + PyObject *options, PyObject *zstd_dict); -static int -_zstd_ZstdCompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs) +static PyObject * +_zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { - int return_value = -1; + PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) #define NUM_KEYWORDS 3 @@ -89,7 +89,7 @@ _zstd_ZstdCompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs) } zstd_dict = fastargs[2]; skip_optional_pos: - return_value = _zstd_ZstdCompressor___init___impl((ZstdCompressor *)self, level, options, zstd_dict); + return_value = _zstd_ZstdCompressor_new_impl(type, level, options, zstd_dict); exit: return return_value; @@ -189,9 +189,9 @@ PyDoc_STRVAR(_zstd_ZstdCompressor_flush__doc__, " Can be these 2 values ZstdCompressor.FLUSH_FRAME,\n" " ZstdCompressor.FLUSH_BLOCK\n" "\n" -"Flush any remaining data left in internal buffers. Since zstd data consists\n" -"of one or more independent frames, the compressor object can still be used\n" -"after this method is called."); +"Flush any remaining data left in internal buffers. Since Zstandard data\n" +"consists of one or more independent frames, the compressor object can still\n" +"be used after this method is called."); #define _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF \ {"flush", _PyCFunction_CAST(_zstd_ZstdCompressor_flush), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdCompressor_flush__doc__}, @@ -252,4 +252,4 @@ skip_optional_pos: exit: return return_value; } -/*[clinic end generated code: output=ef69eab155be39f6 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ee2d1dc298de790c input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/decompressor.c.h b/Modules/_zstd/clinic/decompressor.c.h index 9359c637203..4ecb19e9bde 100644 --- a/Modules/_zstd/clinic/decompressor.c.h +++ b/Modules/_zstd/clinic/decompressor.c.h @@ -10,28 +10,28 @@ preserve #include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() -PyDoc_STRVAR(_zstd_ZstdDecompressor___init____doc__, +PyDoc_STRVAR(_zstd_ZstdDecompressor_new__doc__, "ZstdDecompressor(zstd_dict=None, options=None)\n" "--\n" "\n" "Create a decompressor object for decompressing data incrementally.\n" "\n" " zstd_dict\n" -" A ZstdDict object, a pre-trained zstd dictionary.\n" +" A ZstdDict object, a pre-trained Zstandard dictionary.\n" " options\n" " A dict object that contains advanced decompression parameters.\n" "\n" "Thread-safe at method level. For one-shot decompression, use the decompress()\n" "function instead."); -static int -_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self, - PyObject *zstd_dict, PyObject *options); +static PyObject * +_zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict, + PyObject *options); -static int -_zstd_ZstdDecompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs) +static PyObject * +_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { - int return_value = -1; + PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) #define NUM_KEYWORDS 2 @@ -82,7 +82,7 @@ _zstd_ZstdDecompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs } options = fastargs[1]; skip_optional_pos: - return_value = _zstd_ZstdDecompressor___init___impl((ZstdDecompressor *)self, zstd_dict, options); + return_value = _zstd_ZstdDecompressor_new_impl(type, zstd_dict, options); exit: return return_value; @@ -130,7 +130,7 @@ PyDoc_STRVAR(_zstd_ZstdDecompressor_decompress__doc__, "Decompress *data*, returning uncompressed bytes if possible, or b\'\' otherwise.\n" "\n" " data\n" -" A bytes-like object, zstd data to be decompressed.\n" +" A bytes-like object, Zstandard data to be decompressed.\n" " max_length\n" " Maximum size of returned data. When it is negative, the size of\n" " output buffer is unlimited. When it is nonnegative, returns at\n" @@ -227,4 +227,4 @@ exit: return return_value; } -/*[clinic end generated code: output=ae703f0465a2906d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7a4d278f9244e684 input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/zstddict.c.h b/Modules/_zstd/clinic/zstddict.c.h index 4e0f7b64172..34e0e4b3ecf 100644 --- a/Modules/_zstd/clinic/zstddict.c.h +++ b/Modules/_zstd/clinic/zstddict.c.h @@ -9,35 +9,33 @@ preserve #include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() -PyDoc_STRVAR(_zstd_ZstdDict___init____doc__, -"ZstdDict(dict_content, is_raw=False)\n" +PyDoc_STRVAR(_zstd_ZstdDict_new__doc__, +"ZstdDict(dict_content, /, *, is_raw=False)\n" "--\n" "\n" -"Represents a zstd dictionary, which can be used for compression/decompression.\n" +"Represents a Zstandard dictionary.\n" "\n" " dict_content\n" -" A bytes-like object, dictionary\'s content.\n" +" The content of a Zstandard dictionary as a bytes-like object.\n" " is_raw\n" -" This parameter is for advanced user. True means dict_content\n" -" argument is a \"raw content\" dictionary, free of any format\n" -" restriction. False means dict_content argument is an ordinary\n" -" zstd dictionary, was created by zstd functions, follow a\n" -" specified format.\n" +" If true, perform no checks on *dict_content*, useful for some\n" +" advanced cases. Otherwise, check that the content represents\n" +" a Zstandard dictionary created by the zstd library or CLI.\n" "\n" -"It\'s thread-safe, and can be shared by multiple ZstdCompressor /\n" -"ZstdDecompressor objects."); +"The dictionary can be used for compression or decompression, and can be shared\n" +"by multiple ZstdCompressor or ZstdDecompressor objects."); -static int -_zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content, - int is_raw); +static PyObject * +_zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content, + int is_raw); -static int -_zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) +static PyObject * +_zstd_ZstdDict_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { - int return_value = -1; + PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 1 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -46,7 +44,7 @@ _zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(dict_content), &_Py_ID(is_raw), }, + .ob_item = { &_Py_ID(is_raw), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -55,7 +53,7 @@ _zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"dict_content", "is_raw", NULL}; + static const char * const _keywords[] = {"", "is_raw", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "ZstdDict", @@ -70,20 +68,20 @@ _zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) int is_raw = 0; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, - /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } dict_content = fastargs[0]; if (!noptargs) { - goto skip_optional_pos; + goto skip_optional_kwonly; } is_raw = PyObject_IsTrue(fastargs[1]); if (is_raw < 0) { goto exit; } -skip_optional_pos: - return_value = _zstd_ZstdDict___init___impl((ZstdDict *)self, dict_content, is_raw); +skip_optional_kwonly: + return_value = _zstd_ZstdDict_new_impl(type, dict_content, is_raw); exit: return return_value; @@ -204,4 +202,4 @@ _zstd_ZstdDict_as_prefix_get(PyObject *self, void *Py_UNUSED(context)) return return_value; } -/*[clinic end generated code: output=59257c053f74eda7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=bfb31c1187477afd input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index fc1d3b9d210..38baee2be1e 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -1,30 +1,49 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ /* ZstdCompressor class definitions */ /*[clinic input] module _zstd -class _zstd.ZstdCompressor "ZstdCompressor *" "clinic_state()->ZstdCompressor_type" +class _zstd.ZstdCompressor "ZstdCompressor *" "&zstd_compressor_type_spec" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=875bf614798f80cb]*/ - +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7166021db1ef7df8]*/ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif -#include "_zstdmodule.h" +#include "Python.h" +#include "_zstdmodule.h" #include "buffer.h" +#include "zstddict.h" #include <stddef.h> // offsetof() +#include <zstd.h> // ZSTD_*() + +typedef struct { + PyObject_HEAD + + /* Compression context */ + ZSTD_CCtx *cctx; + + /* ZstdDict object in use */ + PyObject *dict; + /* Last mode, initialized to ZSTD_e_end */ + int last_mode; + + /* (nbWorker >= 1) ? 1 : 0 */ + int use_multithread; + + /* Compression level */ + int compression_level; +} ZstdCompressor; #define ZstdCompressor_CAST(op) ((ZstdCompressor *)op) +#include "clinic/compressor.c.h" + static int _zstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options, const char *arg_name, const char* arg_type) @@ -96,7 +115,7 @@ _zstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options, self->compression_level = value_v; } else if (key_v == ZSTD_c_nbWorkers) { - /* From zstd library doc: + /* From the zstd library docs: 1. When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream2(). 2, Default value is `0`, aka "single-threaded mode" : no @@ -163,8 +182,8 @@ _get_CDict(ZstdDict *self, int compressionLevel) _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, - "Failed to create ZSTD_CDict instance from zstd " - "dictionary content. Maybe the content is corrupted."); + "Failed to create a ZSTD_CDict instance from " + "Zstandard dictionary content."); } goto error; } @@ -198,7 +217,8 @@ success: } static int -_zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) { +_zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) +{ size_t zstd_ret; _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); @@ -291,23 +311,34 @@ load: return 0; } -#define clinic_state() (get_zstd_state_from_type(type)) -#include "clinic/compressor.c.h" -#undef clinic_state +/*[clinic input] +@classmethod +_zstd.ZstdCompressor.__new__ as _zstd_ZstdCompressor_new + level: object = None + The compression level to use. Defaults to COMPRESSION_LEVEL_DEFAULT. + options: object = None + A dict object that contains advanced compression parameters. + zstd_dict: object = None + A ZstdDict object, a pre-trained Zstandard dictionary. + +Create a compressor object for compressing data incrementally. + +Thread-safe at method level. For one-shot compression, use the compress() +function instead. +[clinic start generated code]*/ static PyObject * -_zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs)) +_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, + PyObject *options, PyObject *zstd_dict) +/*[clinic end generated code: output=cdef61eafecac3d7 input=92de0211ae20ffdc]*/ { - ZstdCompressor *self; - self = PyObject_GC_New(ZstdCompressor, type); + ZstdCompressor* self = PyObject_GC_New(ZstdCompressor, type); if (self == NULL) { goto error; } - self->inited = 0; - self->dict = NULL; self->use_multithread = 0; - + self->dict = NULL; /* Compression context */ self->cctx = ZSTD_createCCtx(); @@ -323,96 +354,64 @@ _zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject /* Last mode */ self->last_mode = ZSTD_e_end; - return (PyObject*)self; - -error: - if (self != NULL) { - PyObject_GC_Del(self); - } - return NULL; -} - -static void -ZstdCompressor_dealloc(PyObject *ob) -{ - ZstdCompressor *self = ZstdCompressor_CAST(ob); - - PyObject_GC_UnTrack(self); - - /* Free compression context */ - ZSTD_freeCCtx(self->cctx); - - /* Py_XDECREF the dict after free the compression context */ - Py_CLEAR(self->dict); - - PyTypeObject *tp = Py_TYPE(self); - PyObject_GC_Del(ob); - Py_DECREF(tp); -} - -/*[clinic input] -_zstd.ZstdCompressor.__init__ - - level: object = None - The compression level to use, defaults to ZSTD_CLEVEL_DEFAULT. - options: object = None - A dict object that contains advanced compression parameters. - zstd_dict: object = None - A ZstdDict object, a pre-trained zstd dictionary. - -Create a compressor object for compressing data incrementally. - -Thread-safe at method level. For one-shot compression, use the compress() -function instead. -[clinic start generated code]*/ - -static int -_zstd_ZstdCompressor___init___impl(ZstdCompressor *self, PyObject *level, - PyObject *options, PyObject *zstd_dict) -/*[clinic end generated code: output=215e6c4342732f96 input=9f79b0d8d34c8ef0]*/ -{ - /* Only called once */ - if (self->inited) { - PyErr_SetString(PyExc_RuntimeError, init_twice_msg); - return -1; - } - self->inited = 1; - if (level != Py_None && options != Py_None) { PyErr_SetString(PyExc_RuntimeError, "Only one of level or options should be used."); - return -1; + goto error; } /* Set compressLevel/options to compression context */ if (level != Py_None) { if (_zstd_set_c_parameters(self, level, "level", "int") < 0) { - return -1; + goto error; } } if (options != Py_None) { if (_zstd_set_c_parameters(self, options, "options", "dict") < 0) { - return -1; + goto error; } } - /* Load dictionary to compression context */ + /* Load Zstandard dictionary to compression context */ if (zstd_dict != Py_None) { if (_zstd_load_c_dict(self, zstd_dict) < 0) { - return -1; + goto error; } - - /* Py_INCREF the dict */ Py_INCREF(zstd_dict); self->dict = zstd_dict; } - // We can only start tracking self with the GC once self->dict is set. + // We can only start GC tracking once self->dict is set. PyObject_GC_Track(self); - return 0; + + return (PyObject*)self; + +error: + Py_XDECREF(self); + return NULL; } -PyObject * +static void +ZstdCompressor_dealloc(PyObject *ob) +{ + ZstdCompressor *self = ZstdCompressor_CAST(ob); + + PyObject_GC_UnTrack(self); + + /* Free compression context */ + if (self->cctx) { + ZSTD_freeCCtx(self->cctx); + } + + /* Py_XDECREF the dict after free the compression context */ + Py_CLEAR(self->dict); + + PyTypeObject *tp = Py_TYPE(self); + PyObject_GC_Del(ob); + Py_DECREF(tp); +} + +static PyObject * compress_impl(ZstdCompressor *self, Py_buffer *data, ZSTD_EndDirective end_directive) { @@ -447,7 +446,7 @@ compress_impl(ZstdCompressor *self, Py_buffer *data, } - /* zstd stream compress */ + /* Zstandard stream compress */ while (1) { Py_BEGIN_ALLOW_THREADS zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, end_directive); @@ -487,6 +486,14 @@ error: return NULL; } +#ifdef Py_DEBUG +static inline int +mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out) +{ + return in->size == in->pos && out->size != out->pos; +} +#endif + static PyObject * compress_mt_continue_impl(ZstdCompressor *self, Py_buffer *data) { @@ -505,7 +512,7 @@ compress_mt_continue_impl(ZstdCompressor *self, Py_buffer *data) goto error; } - /* zstd stream compress */ + /* Zstandard stream compress */ while (1) { Py_BEGIN_ALLOW_THREADS do { @@ -614,14 +621,14 @@ _zstd.ZstdCompressor.flush Finish the compression process. -Flush any remaining data left in internal buffers. Since zstd data consists -of one or more independent frames, the compressor object can still be used -after this method is called. +Flush any remaining data left in internal buffers. Since Zstandard data +consists of one or more independent frames, the compressor object can still +be used after this method is called. [clinic start generated code]*/ static PyObject * _zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) -/*[clinic end generated code: output=b7cf2c8d64dcf2e3 input=a766870301932b85]*/ +/*[clinic end generated code: output=b7cf2c8d64dcf2e3 input=0ab19627f323cdbc]*/ { PyObject *ret; @@ -655,8 +662,7 @@ _zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) static PyMethodDef ZstdCompressor_methods[] = { _ZSTD_ZSTDCOMPRESSOR_COMPRESS_METHODDEF _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF - - {0} + {NULL, NULL} }; PyDoc_STRVAR(ZstdCompressor_last_mode_doc, @@ -668,7 +674,7 @@ PyDoc_STRVAR(ZstdCompressor_last_mode_doc, static PyMemberDef ZstdCompressor_members[] = { {"last_mode", Py_T_INT, offsetof(ZstdCompressor, last_mode), Py_READONLY, ZstdCompressor_last_mode_doc}, - {0} + {NULL} }; static int @@ -690,18 +696,20 @@ ZstdCompressor_clear(PyObject *ob) static PyType_Slot zstdcompressor_slots[] = { {Py_tp_new, _zstd_ZstdCompressor_new}, {Py_tp_dealloc, ZstdCompressor_dealloc}, - {Py_tp_init, _zstd_ZstdCompressor___init__}, {Py_tp_methods, ZstdCompressor_methods}, {Py_tp_members, ZstdCompressor_members}, - {Py_tp_doc, (char*)_zstd_ZstdCompressor___init____doc__}, + {Py_tp_doc, (void *)_zstd_ZstdCompressor_new__doc__}, {Py_tp_traverse, ZstdCompressor_traverse}, {Py_tp_clear, ZstdCompressor_clear}, - {0} + {0, 0} }; -PyType_Spec zstdcompressor_type_spec = { - .name = "_zstd.ZstdCompressor", +PyType_Spec zstd_compressor_type_spec = { + .name = "compression.zstd.ZstdCompressor", .basicsize = sizeof(ZstdCompressor), + // Py_TPFLAGS_IMMUTABLETYPE is not used here as several + // associated constants need to be added to the type. + // PyType_Freeze is called later to set the flag. .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, .slots = zstdcompressor_slots, }; diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index 4ac28d9c987..58f9c9f804e 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -1,28 +1,56 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ /* ZstdDecompressor class definition */ /*[clinic input] module _zstd -class _zstd.ZstdDecompressor "ZstdDecompressor *" "clinic_state()->ZstdDecompressor_type" +class _zstd.ZstdDecompressor "ZstdDecompressor *" "&zstd_decompressor_type_spec" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4e6eae327c0c0c76]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e2969ddf48a203e0]*/ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif -#include "_zstdmodule.h" +#include "Python.h" +#include "_zstdmodule.h" #include "buffer.h" +#include "zstddict.h" +#include <stdbool.h> // bool #include <stddef.h> // offsetof() +#include <zstd.h> // ZSTD_*() + +typedef struct { + PyObject_HEAD + + /* Decompression context */ + ZSTD_DCtx *dctx; + + /* ZstdDict object in use */ + PyObject *dict; + + /* Unconsumed input data */ + char *input_buffer; + size_t input_buffer_size; + size_t in_begin, in_end; + + /* Unused data */ + PyObject *unused_data; + + /* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */ + bool needs_input; + + /* For ZstdDecompressor, 0 or 1. + 1 means the end of the first frame has been reached. */ + bool eof; +} ZstdDecompressor; #define ZstdDecompressor_CAST(op) ((ZstdDecompressor *)op) +#include "clinic/decompressor.c.h" + static inline ZSTD_DDict * _get_DDict(ZstdDict *self) { @@ -47,8 +75,8 @@ _get_DDict(ZstdDict *self) _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, - "Failed to create ZSTD_DDict instance from zstd " - "dictionary content. Maybe the content is corrupted."); + "Failed to create a ZSTD_DDict instance from " + "Zstandard dictionary content."); } } } @@ -215,22 +243,13 @@ load: return 0; } - - /* - Given the two types of decompressors (defined in _zstdmodule.h): - - typedef enum { - TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class - TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function - } decompress_type; - - Decompress implementation for <D>, <E>, pseudo code: + Decompress implementation in pseudo code: initialize_output_buffer while True: decompress_data - set_object_flag # .eof for <D>, .at_frame_edge for <E>. + set_object_flag # .eof if output_buffer_exhausted: if output_buffer_reached_max_length: @@ -240,76 +259,26 @@ load: finish ZSTD_decompressStream()'s size_t return value: - - 0 when a frame is completely decoded and fully flushed, zstd's internal - buffer has no data. + - 0 when a frame is completely decoded and fully flushed, + zstd's internal buffer has no data. - An error code, which can be tested using ZSTD_isError(). - Or any other value > 0, which means there is still some decoding or flushing to do to complete current frame. Note, decompressing "an empty input" in any case will make it > 0. - - <E> supports multiple frames, has an .at_frame_edge flag, it means both the - input and output streams are at a frame edge. The flag can be set by this - statement: - - .at_frame_edge = (zstd_ret == 0) ? 1 : 0 - - But if decompressing "an empty input" at "a frame edge", zstd_ret will be - non-zero, then .at_frame_edge will be wrongly set to false. To solve this - problem, two AFE checks are needed to ensure that: when at "a frame edge", - empty input will not be decompressed. - - // AFE check - if (self->at_frame_edge && in->pos == in->size) { - finish - } - - In <E>, if .at_frame_edge is eventually set to true, but input stream has - unconsumed data (in->pos < in->size), then the outer function - stream_decompress() will set .at_frame_edge to false. In this case, - although the output stream is at a frame edge, for the caller, the input - stream is not at a frame edge, see below diagram. This behavior does not - affect the next AFE check, since (in->pos < in->size). - - input stream: --------------|--- - ^ - output stream: ====================| - ^ */ -PyObject * +static PyObject * decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, - Py_ssize_t max_length, - Py_ssize_t initial_size, - decompress_type type) + Py_ssize_t max_length) { size_t zstd_ret; ZSTD_outBuffer out; _BlocksOutputBuffer buffer = {.list = NULL}; PyObject *ret; - /* The first AFE check for setting .at_frame_edge flag */ - if (type == TYPE_ENDLESS_DECOMPRESSOR) { - if (self->at_frame_edge && in->pos == in->size) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return NULL; - } - ret = mod_state->empty_bytes; - Py_INCREF(ret); - return ret; - } - } - /* Initialize the output buffer */ - if (initial_size >= 0) { - if (_OutputBuffer_InitWithSize(&buffer, &out, max_length, initial_size) < 0) { - goto error; - } - } - else { - if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) { - goto error; - } + if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) { + goto error; } assert(out.pos == 0); @@ -328,26 +297,15 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, goto error; } - /* Set .eof/.af_frame_edge flag */ - if (type == TYPE_DECOMPRESSOR) { - /* ZstdDecompressor class stops when a frame is decompressed */ - if (zstd_ret == 0) { - self->eof = 1; - break; - } - } - else if (type == TYPE_ENDLESS_DECOMPRESSOR) { - /* decompress() function supports multiple frames */ - self->at_frame_edge = (zstd_ret == 0) ? 1 : 0; - - /* The second AFE check for setting .at_frame_edge flag */ - if (self->at_frame_edge && in->pos == in->size) { - break; - } + /* Set .eof flag */ + if (zstd_ret == 0) { + /* Stop when a frame is decompressed */ + self->eof = 1; + break; } /* Need to check out before in. Maybe zstd's internal buffer still has - a few bytes can be output, grow the buffer and continue. */ + a few bytes that can be output, grow the buffer and continue. */ if (out.pos == out.size) { /* Output buffer exhausted */ @@ -380,9 +338,8 @@ error: return NULL; } -void -decompressor_reset_session(ZstdDecompressor *self, - decompress_type type) +static void +decompressor_reset_session(ZstdDecompressor *self) { // TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here // and ensure lock is always held @@ -391,56 +348,28 @@ decompressor_reset_session(ZstdDecompressor *self, self->in_begin = 0; self->in_end = 0; - if (type == TYPE_DECOMPRESSOR) { - Py_CLEAR(self->unused_data); - } + Py_CLEAR(self->unused_data); /* Reset variables in one operation */ self->needs_input = 1; - self->at_frame_edge = 1; self->eof = 0; - self->_unused_char_for_align = 0; - /* Resetting session never fail */ + /* Resetting session is guaranteed to never fail */ ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only); } -PyObject * -stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length, - decompress_type type) +static PyObject * +stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length) { - Py_ssize_t initial_buffer_size = -1; ZSTD_inBuffer in; PyObject *ret = NULL; int use_input_buffer; - if (type == TYPE_DECOMPRESSOR) { - /* Check .eof flag */ - if (self->eof) { - PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame."); - assert(ret == NULL); - goto success; - } - } - else if (type == TYPE_ENDLESS_DECOMPRESSOR) { - /* Fast path for the first frame */ - if (self->at_frame_edge && self->in_begin == self->in_end) { - /* Read decompressed size */ - uint64_t decompressed_size = ZSTD_getFrameContentSize(data->buf, data->len); - - /* These two zstd constants always > PY_SSIZE_T_MAX: - ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1) - ZSTD_CONTENTSIZE_ERROR is (0ULL - 2) - - Use ZSTD_findFrameCompressedSize() to check complete frame, - prevent allocating too much memory for small input chunk. */ - - if (decompressed_size <= (uint64_t) PY_SSIZE_T_MAX && - !ZSTD_isError(ZSTD_findFrameCompressedSize(data->buf, data->len)) ) - { - initial_buffer_size = (Py_ssize_t) decompressed_size; - } - } + /* Check .eof flag */ + if (self->eof) { + PyErr_SetString(PyExc_EOFError, "Already at the end of a Zstandard frame."); + assert(ret == NULL); + return NULL; } /* Prepare input buffer w/wo unconsumed data */ @@ -527,30 +456,18 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length assert(in.pos == 0); /* Decompress */ - ret = decompress_impl(self, &in, - max_length, initial_buffer_size, - type); + ret = decompress_impl(self, &in, max_length); if (ret == NULL) { goto error; } /* Unconsumed input data */ if (in.pos == in.size) { - if (type == TYPE_DECOMPRESSOR) { - if (Py_SIZE(ret) == max_length || self->eof) { - self->needs_input = 0; - } - else { - self->needs_input = 1; - } + if (Py_SIZE(ret) == max_length || self->eof) { + self->needs_input = 0; } - else if (type == TYPE_ENDLESS_DECOMPRESSOR) { - if (Py_SIZE(ret) == max_length && !self->at_frame_edge) { - self->needs_input = 0; - } - else { - self->needs_input = 1; - } + else { + self->needs_input = 1; } if (use_input_buffer) { @@ -564,10 +481,6 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length self->needs_input = 0; - if (type == TYPE_ENDLESS_DECOMPRESSOR) { - self->at_frame_edge = 0; - } - if (!use_input_buffer) { /* Discard buffer if it's too small (resizing it may needlessly copy the current contents) */ @@ -600,43 +513,52 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length } } - goto success; + return ret; error: /* Reset decompressor's states/session */ - decompressor_reset_session(self, type); + decompressor_reset_session(self); Py_CLEAR(ret); -success: - - return ret; + return NULL; } +/*[clinic input] +@classmethod +_zstd.ZstdDecompressor.__new__ as _zstd_ZstdDecompressor_new + zstd_dict: object = None + A ZstdDict object, a pre-trained Zstandard dictionary. + options: object = None + A dict object that contains advanced decompression parameters. + +Create a decompressor object for decompressing data incrementally. + +Thread-safe at method level. For one-shot decompression, use the decompress() +function instead. +[clinic start generated code]*/ + static PyObject * -_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +_zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict, + PyObject *options) +/*[clinic end generated code: output=590ca65c1102ff4a input=213daa57e3ea4062]*/ { - ZstdDecompressor *self; - self = PyObject_GC_New(ZstdDecompressor, type); + ZstdDecompressor* self = PyObject_GC_New(ZstdDecompressor, type); if (self == NULL) { goto error; } - self->inited = 0; - self->dict = NULL; self->input_buffer = NULL; self->input_buffer_size = 0; self->in_begin = -1; self->in_end = -1; self->unused_data = NULL; self->eof = 0; + self->dict = NULL; /* needs_input flag */ self->needs_input = 1; - /* at_frame_edge flag */ - self->at_frame_edge = 1; - /* Decompression context */ self->dctx = ZSTD_createDCtx(); if (self->dctx == NULL) { @@ -648,12 +570,29 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds) goto error; } + /* Load Zstandard dictionary to decompression context */ + if (zstd_dict != Py_None) { + if (_zstd_load_d_dict(self, zstd_dict) < 0) { + goto error; + } + Py_INCREF(zstd_dict); + self->dict = zstd_dict; + } + + /* Set option to decompression context */ + if (options != Py_None) { + if (_zstd_set_d_parameters(self, options) < 0) { + goto error; + } + } + + // We can only start GC tracking once self->dict is set. + PyObject_GC_Track(self); + return (PyObject*)self; error: - if (self != NULL) { - PyObject_GC_Del(self); - } + Py_XDECREF(self); return NULL; } @@ -665,7 +604,9 @@ ZstdDecompressor_dealloc(PyObject *ob) PyObject_GC_UnTrack(self); /* Free decompression context */ - ZSTD_freeDCtx(self->dctx); + if (self->dctx) { + ZSTD_freeDCtx(self->dctx); + } /* Py_CLEAR the dict after free decompression context */ Py_CLEAR(self->dict); @@ -682,55 +623,6 @@ ZstdDecompressor_dealloc(PyObject *ob) } /*[clinic input] -_zstd.ZstdDecompressor.__init__ - - zstd_dict: object = None - A ZstdDict object, a pre-trained zstd dictionary. - options: object = None - A dict object that contains advanced decompression parameters. - -Create a decompressor object for decompressing data incrementally. - -Thread-safe at method level. For one-shot decompression, use the decompress() -function instead. -[clinic start generated code]*/ - -static int -_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self, - PyObject *zstd_dict, PyObject *options) -/*[clinic end generated code: output=703af2f1ec226642 input=8fd72999acc1a146]*/ -{ - /* Only called once */ - if (self->inited) { - PyErr_SetString(PyExc_RuntimeError, init_twice_msg); - return -1; - } - self->inited = 1; - - /* Load dictionary to decompression context */ - if (zstd_dict != Py_None) { - if (_zstd_load_d_dict(self, zstd_dict) < 0) { - return -1; - } - - /* Py_INCREF the dict */ - Py_INCREF(zstd_dict); - self->dict = zstd_dict; - } - - /* Set option to decompression context */ - if (options != Py_None) { - if (_zstd_set_d_parameters(self, options) < 0) { - return -1; - } - } - - // We can only start tracking self with the GC once self->dict is set. - PyObject_GC_Track(self); - return 0; -} - -/*[clinic input] @critical_section @getter _zstd.ZstdDecompressor.unused_data @@ -747,16 +639,8 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) { PyObject *ret; - /* Thread-safe code */ - Py_BEGIN_CRITICAL_SECTION(self); - if (!self->eof) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return NULL; - } - ret = mod_state->empty_bytes; - Py_INCREF(ret); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } else { if (self->unused_data == NULL) { @@ -772,8 +656,6 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) } } - Py_END_CRITICAL_SECTION(); - return ret; } @@ -781,7 +663,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) _zstd.ZstdDecompressor.decompress data: Py_buffer - A bytes-like object, zstd data to be decompressed. + A bytes-like object, Zstandard data to be decompressed. max_length: Py_ssize_t = -1 Maximum size of returned data. When it is negative, the size of output buffer is unlimited. When it is nonnegative, returns at @@ -807,25 +689,20 @@ static PyObject * _zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length) -/*[clinic end generated code: output=a4302b3c940dbec6 input=830e455bc9a50b6e]*/ +/*[clinic end generated code: output=a4302b3c940dbec6 input=6463dfdf98091caa]*/ { PyObject *ret; /* Thread-safe code */ Py_BEGIN_CRITICAL_SECTION(self); - ret = stream_decompress(self, data, max_length, TYPE_DECOMPRESSOR); + ret = stream_decompress(self, data, max_length); Py_END_CRITICAL_SECTION(); return ret; } -#define clinic_state() (get_zstd_state_from_type(type)) -#include "clinic/decompressor.c.h" -#undef clinic_state - static PyMethodDef ZstdDecompressor_methods[] = { _ZSTD_ZSTDDECOMPRESSOR_DECOMPRESS_METHODDEF - - {0} + {NULL, NULL} }; PyDoc_STRVAR(ZstdDecompressor_eof_doc, @@ -840,17 +717,14 @@ PyDoc_STRVAR(ZstdDecompressor_needs_input_doc, static PyMemberDef ZstdDecompressor_members[] = { {"eof", Py_T_BOOL, offsetof(ZstdDecompressor, eof), Py_READONLY, ZstdDecompressor_eof_doc}, - {"needs_input", Py_T_BOOL, offsetof(ZstdDecompressor, needs_input), Py_READONLY, ZstdDecompressor_needs_input_doc}, - - {0} + {NULL} }; static PyGetSetDef ZstdDecompressor_getset[] = { _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF - - {0} + {NULL} }; static int @@ -873,19 +747,19 @@ ZstdDecompressor_clear(PyObject *ob) static PyType_Slot ZstdDecompressor_slots[] = { {Py_tp_new, _zstd_ZstdDecompressor_new}, {Py_tp_dealloc, ZstdDecompressor_dealloc}, - {Py_tp_init, _zstd_ZstdDecompressor___init__}, {Py_tp_methods, ZstdDecompressor_methods}, {Py_tp_members, ZstdDecompressor_members}, {Py_tp_getset, ZstdDecompressor_getset}, - {Py_tp_doc, (char*)_zstd_ZstdDecompressor___init____doc__}, + {Py_tp_doc, (void *)_zstd_ZstdDecompressor_new__doc__}, {Py_tp_traverse, ZstdDecompressor_traverse}, {Py_tp_clear, ZstdDecompressor_clear}, - {0} + {0, 0} }; -PyType_Spec zstddecompressor_type_spec = { - .name = "_zstd.ZstdDecompressor", +PyType_Spec zstd_decompressor_type_spec = { + .name = "compression.zstd.ZstdDecompressor", .basicsize = sizeof(ZstdDecompressor), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE + | Py_TPFLAGS_HAVE_GC, .slots = ZstdDecompressor_slots, }; diff --git a/Modules/_zstd/zstddict.c b/Modules/_zstd/zstddict.c index 53c96b10410..7df187a6fa6 100644 --- a/Modules/_zstd/zstddict.c +++ b/Modules/_zstd/zstddict.c @@ -1,38 +1,58 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ /* ZstdDict class definitions */ /*[clinic input] module _zstd -class _zstd.ZstdDict "ZstdDict *" "clinic_state()->ZstdDict_type" +class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=a5d1254c497e52ba]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dcc175ec974f81c]*/ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif +#include "Python.h" + #include "_zstdmodule.h" +#include "zstddict.h" +#include "clinic/zstddict.c.h" -#include <stddef.h> // offsetof() +#include <zstd.h> // ZSTD_freeDDict(), ZSTD_getDictID_fromDict() #define ZstdDict_CAST(op) ((ZstdDict *)op) +/*[clinic input] +@classmethod +_zstd.ZstdDict.__new__ as _zstd_ZstdDict_new + dict_content: object + The content of a Zstandard dictionary as a bytes-like object. + / + * + is_raw: bool = False + If true, perform no checks on *dict_content*, useful for some + advanced cases. Otherwise, check that the content represents + a Zstandard dictionary created by the zstd library or CLI. + +Represents a Zstandard dictionary. + +The dictionary can be used for compression or decompression, and can be shared +by multiple ZstdCompressor or ZstdDecompressor objects. +[clinic start generated code]*/ + static PyObject * -_zstd_ZstdDict_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs)) +_zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content, + int is_raw) +/*[clinic end generated code: output=3ebff839cb3be6d7 input=6b5de413869ae878]*/ { - ZstdDict *self; - self = PyObject_GC_New(ZstdDict, type); + ZstdDict* self = PyObject_GC_New(ZstdDict, type); if (self == NULL) { goto error; } self->dict_content = NULL; - self->inited = 0; self->d_dict = NULL; + self->dict_id = 0; /* ZSTD_CDict dict */ self->c_dicts = PyDict_New(); @@ -40,78 +60,20 @@ _zstd_ZstdDict_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_U goto error; } - return (PyObject*)self; - -error: - if (self != NULL) { - PyObject_GC_Del(self); - } - return NULL; -} - -static void -ZstdDict_dealloc(PyObject *ob) -{ - ZstdDict *self = ZstdDict_CAST(ob); - - PyObject_GC_UnTrack(self); - - /* Free ZSTD_DDict instance */ - ZSTD_freeDDict(self->d_dict); - - /* Release dict_content after Free ZSTD_CDict/ZSTD_DDict instances */ - Py_CLEAR(self->dict_content); - Py_CLEAR(self->c_dicts); - - PyTypeObject *tp = Py_TYPE(self); - PyObject_GC_Del(ob); - Py_DECREF(tp); -} - -/*[clinic input] -_zstd.ZstdDict.__init__ - - dict_content: object - A bytes-like object, dictionary's content. - is_raw: bool = False - This parameter is for advanced user. True means dict_content - argument is a "raw content" dictionary, free of any format - restriction. False means dict_content argument is an ordinary - zstd dictionary, was created by zstd functions, follow a - specified format. - -Represents a zstd dictionary, which can be used for compression/decompression. - -It's thread-safe, and can be shared by multiple ZstdCompressor / -ZstdDecompressor objects. -[clinic start generated code]*/ - -static int -_zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content, - int is_raw) -/*[clinic end generated code: output=c5f5a0d8377d037c input=e6750f62a513b3ee]*/ -{ - /* Only called once */ - if (self->inited) { - PyErr_SetString(PyExc_RuntimeError, init_twice_msg); - return -1; - } - self->inited = 1; - /* Check dict_content's type */ self->dict_content = PyBytes_FromObject(dict_content); if (self->dict_content == NULL) { PyErr_SetString(PyExc_TypeError, "dict_content argument should be bytes-like object."); - return -1; + goto error; } /* Both ordinary dictionary and "raw content" dictionary should at least 8 bytes */ if (Py_SIZE(self->dict_content) < 8) { PyErr_SetString(PyExc_ValueError, - "Zstd dictionary content should at least 8 bytes."); - return -1; + "Zstandard dictionary content should at least 8 bytes."); + goto error; } /* Get dict_id, 0 means "raw content" dictionary. */ @@ -120,35 +82,51 @@ _zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content, /* Check validity for ordinary dictionary */ if (!is_raw && self->dict_id == 0) { - char *msg = "The dict_content argument is not a valid zstd " - "dictionary. The first 4 bytes of a valid zstd dictionary " - "should be a magic number: b'\\x37\\xA4\\x30\\xEC'.\n" - "If you are an advanced user, and can be sure that " - "dict_content argument is a \"raw content\" zstd " - "dictionary, set is_raw parameter to True."; + char *msg = "Invalid Zstandard dictionary and is_raw not set.\n"; PyErr_SetString(PyExc_ValueError, msg); - return -1; + goto error; } // Can only track self once self->dict_content is included PyObject_GC_Track(self); - return 0; + + return (PyObject*)self; + +error: + Py_XDECREF(self); + return NULL; } -#define clinic_state() (get_zstd_state(type)) -#include "clinic/zstddict.c.h" -#undef clinic_state +static void +ZstdDict_dealloc(PyObject *ob) +{ + ZstdDict *self = ZstdDict_CAST(ob); + + PyObject_GC_UnTrack(self); + + /* Free ZSTD_DDict instance */ + if (self->d_dict) { + ZSTD_freeDDict(self->d_dict); + } + + /* Release dict_content after Free ZSTD_CDict/ZSTD_DDict instances */ + Py_CLEAR(self->dict_content); + Py_CLEAR(self->c_dicts); + + PyTypeObject *tp = Py_TYPE(self); + PyObject_GC_Del(ob); + Py_DECREF(tp); +} PyDoc_STRVAR(ZstdDict_dictid_doc, -"ID of zstd dictionary, a 32-bit unsigned int value.\n\n" -"Non-zero means ordinary dictionary, was created by zstd functions, follow\n" -"a specified format.\n\n" -"0 means a \"raw content\" dictionary, free of any format restriction, used\n" -"for advanced user."); +"the Zstandard dictionary, an int between 0 and 2**32.\n\n" +"A non-zero value represents an ordinary Zstandard dictionary, " +"conforming to the standardised format.\n\n" +"The special value '0' means a 'raw content' dictionary," +"without any restrictions on format or content."); PyDoc_STRVAR(ZstdDict_dictcontent_doc, -"The content of zstd dictionary, a bytes object, it's the same as dict_content\n" -"argument in ZstdDict.__init__() method. It can be used with other programs."); +"The content of a Zstandard dictionary, as a bytes object."); static PyObject * ZstdDict_str(PyObject *ob) @@ -161,7 +139,7 @@ ZstdDict_str(PyObject *ob) static PyMemberDef ZstdDict_members[] = { {"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, ZstdDict_dictid_doc}, {"dict_content", Py_T_OBJECT_EX, offsetof(ZstdDict, dict_content), Py_READONLY, ZstdDict_dictcontent_doc}, - {0} + {NULL} }; /*[clinic input] @@ -231,12 +209,9 @@ _zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self) static PyGetSetDef ZstdDict_getset[] = { _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF - _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF - _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF - - {0} + {NULL} }; static Py_ssize_t @@ -269,18 +244,18 @@ static PyType_Slot zstddict_slots[] = { {Py_tp_getset, ZstdDict_getset}, {Py_tp_new, _zstd_ZstdDict_new}, {Py_tp_dealloc, ZstdDict_dealloc}, - {Py_tp_init, _zstd_ZstdDict___init__}, {Py_tp_str, ZstdDict_str}, - {Py_tp_doc, (char*)_zstd_ZstdDict___init____doc__}, + {Py_tp_doc, (void *)_zstd_ZstdDict_new__doc__}, {Py_sq_length, ZstdDict_length}, {Py_tp_traverse, ZstdDict_traverse}, {Py_tp_clear, ZstdDict_clear}, - {0} + {0, 0} }; -PyType_Spec zstddict_type_spec = { - .name = "_zstd.ZstdDict", +PyType_Spec zstd_dict_type_spec = { + .name = "compression.zstd.ZstdDict", .basicsize = sizeof(ZstdDict), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE + | Py_TPFLAGS_HAVE_GC, .slots = zstddict_slots, }; diff --git a/Modules/_zstd/zstddict.h b/Modules/_zstd/zstddict.h new file mode 100644 index 00000000000..e8a55a3670b --- /dev/null +++ b/Modules/_zstd/zstddict.h @@ -0,0 +1,24 @@ +/* Low level interface to the Zstandard algorthm & the zstd library. */ + +#ifndef ZSTD_DICT_H +#define ZSTD_DICT_H + +#include <zstd.h> // ZSTD_DDict + +typedef struct { + PyObject_HEAD + + /* Reusable compress/decompress dictionary, they are created once and + can be shared by multiple threads concurrently, since its usage is + read-only. + c_dicts is a dict, int(compressionLevel):PyCapsule(ZSTD_CDict*) */ + ZSTD_DDict *d_dict; + PyObject *c_dicts; + + /* Content of the dictionary, bytes object. */ + PyObject *dict_content; + /* Dictionary id */ + uint32_t dict_id; +} ZstdDict; + +#endif // !ZSTD_DICT_H |