aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Modules/_zstd
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_zstd')
-rw-r--r--Modules/_zstd/_zstdmodule.c767
-rw-r--r--Modules/_zstd/_zstdmodule.h61
-rw-r--r--Modules/_zstd/buffer.h108
-rw-r--r--Modules/_zstd/clinic/_zstdmodule.c.h429
-rw-r--r--Modules/_zstd/clinic/compressor.c.h294
-rw-r--r--Modules/_zstd/clinic/decompressor.c.h223
-rw-r--r--Modules/_zstd/clinic/zstddict.c.h225
-rw-r--r--Modules/_zstd/compressor.c797
-rw-r--r--Modules/_zstd/decompressor.c717
-rw-r--r--Modules/_zstd/zstddict.c273
-rw-r--r--Modules/_zstd/zstddict.h29
11 files changed, 3923 insertions, 0 deletions
diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c
new file mode 100644
index 00000000000..d75c0779474
--- /dev/null
+++ b/Modules/_zstd/_zstdmodule.c
@@ -0,0 +1,767 @@
+/* Low level interface to the Zstandard algorthm & the zstd library. */
+
+#ifndef Py_BUILD_CORE_BUILTIN
+# define Py_BUILD_CORE_MODULE 1
+#endif
+
+#include "Python.h"
+
+#include "_zstdmodule.h"
+
+#include <zstd.h> // ZSTD_*()
+#include <zdict.h> // ZDICT_*()
+
+/*[clinic input]
+module _zstd
+
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b5f5587aac15c14]*/
+#include "clinic/_zstdmodule.c.h"
+
+
+ZstdDict *
+_Py_parse_zstd_dict(const _zstd_state *state, PyObject *dict, int *ptype)
+{
+ if (state == NULL) {
+ return NULL;
+ }
+
+ /* Check ZstdDict */
+ if (PyObject_TypeCheck(dict, state->ZstdDict_type)) {
+ return (ZstdDict*)dict;
+ }
+
+ /* Check (ZstdDict, type) */
+ if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2
+ && PyObject_TypeCheck(PyTuple_GET_ITEM(dict, 0), state->ZstdDict_type)
+ && PyLong_Check(PyTuple_GET_ITEM(dict, 1)))
+ {
+ int type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1));
+ if (type == -1 && PyErr_Occurred()) {
+ return NULL;
+ }
+ if (type == DICT_TYPE_DIGESTED
+ || type == DICT_TYPE_UNDIGESTED
+ || type == DICT_TYPE_PREFIX)
+ {
+ *ptype = type;
+ return (ZstdDict*)PyTuple_GET_ITEM(dict, 0);
+ }
+ }
+
+ /* Wrong type */
+ PyErr_SetString(PyExc_TypeError,
+ "zstd_dict argument should be a ZstdDict object.");
+ return NULL;
+}
+
+/* Format error message and set ZstdError. */
+void
+set_zstd_error(const _zstd_state *state, error_type type, size_t zstd_ret)
+{
+ const char *msg;
+ assert(ZSTD_isError(zstd_ret));
+
+ if (state == NULL) {
+ return;
+ }
+ switch (type) {
+ case ERR_DECOMPRESS:
+ msg = "Unable to decompress Zstandard data: %s";
+ break;
+ case ERR_COMPRESS:
+ msg = "Unable to compress Zstandard data: %s";
+ break;
+ case ERR_SET_PLEDGED_INPUT_SIZE:
+ msg = "Unable to set pledged uncompressed content size: %s";
+ break;
+
+ case ERR_LOAD_D_DICT:
+ msg = "Unable to load Zstandard dictionary or prefix for "
+ "decompression: %s";
+ break;
+ case ERR_LOAD_C_DICT:
+ msg = "Unable to load Zstandard dictionary or prefix for "
+ "compression: %s";
+ break;
+
+ case ERR_GET_C_BOUNDS:
+ msg = "Unable to get zstd compression parameter bounds: %s";
+ break;
+ case ERR_GET_D_BOUNDS:
+ msg = "Unable to get zstd decompression parameter bounds: %s";
+ break;
+ case ERR_SET_C_LEVEL:
+ msg = "Unable to set zstd compression level: %s";
+ break;
+
+ case ERR_TRAIN_DICT:
+ msg = "Unable to train the Zstandard dictionary: %s";
+ break;
+ case ERR_FINALIZE_DICT:
+ msg = "Unable to finalize the Zstandard dictionary: %s";
+ break;
+
+ default:
+ Py_UNREACHABLE();
+ }
+ PyErr_Format(state->ZstdError, msg, ZSTD_getErrorName(zstd_ret));
+}
+
+typedef struct {
+ int parameter;
+ char parameter_name[32];
+} ParameterInfo;
+
+static const ParameterInfo cp_list[] = {
+ {ZSTD_c_compressionLevel, "compression_level"},
+ {ZSTD_c_windowLog, "window_log"},
+ {ZSTD_c_hashLog, "hash_log"},
+ {ZSTD_c_chainLog, "chain_log"},
+ {ZSTD_c_searchLog, "search_log"},
+ {ZSTD_c_minMatch, "min_match"},
+ {ZSTD_c_targetLength, "target_length"},
+ {ZSTD_c_strategy, "strategy"},
+
+ {ZSTD_c_enableLongDistanceMatching, "enable_long_distance_matching"},
+ {ZSTD_c_ldmHashLog, "ldm_hash_log"},
+ {ZSTD_c_ldmMinMatch, "ldm_min_match"},
+ {ZSTD_c_ldmBucketSizeLog, "ldm_bucket_size_log"},
+ {ZSTD_c_ldmHashRateLog, "ldm_hash_rate_log"},
+
+ {ZSTD_c_contentSizeFlag, "content_size_flag"},
+ {ZSTD_c_checksumFlag, "checksum_flag"},
+ {ZSTD_c_dictIDFlag, "dict_id_flag"},
+
+ {ZSTD_c_nbWorkers, "nb_workers"},
+ {ZSTD_c_jobSize, "job_size"},
+ {ZSTD_c_overlapLog, "overlap_log"}
+};
+
+static const ParameterInfo dp_list[] = {
+ {ZSTD_d_windowLogMax, "window_log_max"}
+};
+
+void
+set_parameter_error(int is_compress, int key_v, int value_v)
+{
+ ParameterInfo const *list;
+ int list_size;
+ char *type;
+ ZSTD_bounds bounds;
+ char pos_msg[64];
+
+ if (is_compress) {
+ list = cp_list;
+ list_size = Py_ARRAY_LENGTH(cp_list);
+ type = "compression";
+ }
+ else {
+ list = dp_list;
+ list_size = Py_ARRAY_LENGTH(dp_list);
+ type = "decompression";
+ }
+
+ /* Find parameter's name */
+ char const *name = NULL;
+ for (int i = 0; i < list_size; i++) {
+ if (key_v == (list+i)->parameter) {
+ name = (list+i)->parameter_name;
+ break;
+ }
+ }
+
+ /* Unknown parameter */
+ if (name == NULL) {
+ PyOS_snprintf(pos_msg, sizeof(pos_msg),
+ "unknown parameter (key %d)", key_v);
+ name = pos_msg;
+ }
+
+ /* Get parameter bounds */
+ if (is_compress) {
+ bounds = ZSTD_cParam_getBounds(key_v);
+ }
+ else {
+ bounds = ZSTD_dParam_getBounds(key_v);
+ }
+ if (ZSTD_isError(bounds.error)) {
+ PyErr_Format(PyExc_ValueError, "invalid %s parameter '%s'",
+ type, name);
+ return;
+ }
+
+ /* Error message */
+ PyErr_Format(PyExc_ValueError,
+ "%s parameter '%s' received an illegal value %d; "
+ "the valid range is [%d, %d]",
+ type, name, value_v, bounds.lowerBound, bounds.upperBound);
+}
+
+static inline _zstd_state*
+get_zstd_state(PyObject *module)
+{
+ void *state = PyModule_GetState(module);
+ assert(state != NULL);
+ return (_zstd_state *)state;
+}
+
+static Py_ssize_t
+calculate_samples_stats(PyBytesObject *samples_bytes, PyObject *samples_sizes,
+ size_t **chunk_sizes)
+{
+ Py_ssize_t chunks_number;
+ Py_ssize_t sizes_sum;
+ Py_ssize_t i;
+
+ chunks_number = PyTuple_GET_SIZE(samples_sizes);
+ if ((size_t) chunks_number > UINT32_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "The number of samples should be <= %u.", UINT32_MAX);
+ return -1;
+ }
+
+ /* Prepare chunk_sizes */
+ *chunk_sizes = PyMem_New(size_t, chunks_number);
+ if (*chunk_sizes == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ sizes_sum = PyBytes_GET_SIZE(samples_bytes);
+ for (i = 0; i < chunks_number; i++) {
+ size_t size = PyLong_AsSize_t(PyTuple_GET_ITEM(samples_sizes, i));
+ (*chunk_sizes)[i] = size;
+ if (size == (size_t)-1 && PyErr_Occurred()) {
+ if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
+ goto sum_error;
+ }
+ return -1;
+ }
+ if ((size_t)sizes_sum < size) {
+ goto sum_error;
+ }
+ sizes_sum -= size;
+ }
+
+ if (sizes_sum != 0) {
+sum_error:
+ PyErr_SetString(PyExc_ValueError,
+ "The samples size tuple doesn't match the "
+ "concatenation's size.");
+ return -1;
+ }
+ return chunks_number;
+}
+
+
+/*[clinic input]
+_zstd.train_dict
+
+ samples_bytes: PyBytesObject
+ Concatenation of samples.
+ samples_sizes: object(subclass_of='&PyTuple_Type')
+ Tuple of samples' sizes.
+ dict_size: Py_ssize_t
+ The size of the dictionary.
+ /
+
+Train a Zstandard dictionary on sample data.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
+ PyObject *samples_sizes, Py_ssize_t dict_size)
+/*[clinic end generated code: output=8e87fe43935e8f77 input=d20dedb21c72cb62]*/
+{
+ PyObject *dst_dict_bytes = NULL;
+ size_t *chunk_sizes = NULL;
+ Py_ssize_t chunks_number;
+ size_t zstd_ret;
+
+ /* Check arguments */
+ if (dict_size <= 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "dict_size argument should be positive number.");
+ return NULL;
+ }
+
+ /* Check that the samples are valid and get their sizes */
+ chunks_number = calculate_samples_stats(samples_bytes, samples_sizes,
+ &chunk_sizes);
+ if (chunks_number < 0) {
+ goto error;
+ }
+
+ /* Allocate dict buffer */
+ dst_dict_bytes = PyBytes_FromStringAndSize(NULL, dict_size);
+ if (dst_dict_bytes == NULL) {
+ goto error;
+ }
+
+ /* Train the dictionary */
+ char *dst_dict_buffer = PyBytes_AS_STRING(dst_dict_bytes);
+ const char *samples_buffer = PyBytes_AS_STRING(samples_bytes);
+ Py_BEGIN_ALLOW_THREADS
+ zstd_ret = ZDICT_trainFromBuffer(dst_dict_buffer, dict_size,
+ samples_buffer,
+ chunk_sizes, (uint32_t)chunks_number);
+ Py_END_ALLOW_THREADS
+
+ /* Check Zstandard dict error */
+ if (ZDICT_isError(zstd_ret)) {
+ _zstd_state* mod_state = get_zstd_state(module);
+ set_zstd_error(mod_state, ERR_TRAIN_DICT, zstd_ret);
+ goto error;
+ }
+
+ /* Resize dict_buffer */
+ if (_PyBytes_Resize(&dst_dict_bytes, zstd_ret) < 0) {
+ goto error;
+ }
+
+ goto success;
+
+error:
+ Py_CLEAR(dst_dict_bytes);
+
+success:
+ PyMem_Free(chunk_sizes);
+ return dst_dict_bytes;
+}
+
+/*[clinic input]
+_zstd.finalize_dict
+
+ custom_dict_bytes: PyBytesObject
+ Custom dictionary content.
+ samples_bytes: PyBytesObject
+ Concatenation of samples.
+ samples_sizes: object(subclass_of='&PyTuple_Type')
+ Tuple of samples' sizes.
+ dict_size: Py_ssize_t
+ The size of the dictionary.
+ compression_level: int
+ Optimize for a specific Zstandard compression level, 0 means default.
+ /
+
+Finalize a Zstandard dictionary.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
+ PyBytesObject *samples_bytes,
+ PyObject *samples_sizes, Py_ssize_t dict_size,
+ int compression_level)
+/*[clinic end generated code: output=f91821ba5ae85bda input=3c7e2480aa08fb56]*/
+{
+ Py_ssize_t chunks_number;
+ size_t *chunk_sizes = NULL;
+ PyObject *dst_dict_bytes = NULL;
+ size_t zstd_ret;
+ ZDICT_params_t params;
+
+ /* Check arguments */
+ if (dict_size <= 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "dict_size argument should be positive number.");
+ return NULL;
+ }
+
+ /* Check that the samples are valid and get their sizes */
+ chunks_number = calculate_samples_stats(samples_bytes, samples_sizes,
+ &chunk_sizes);
+ if (chunks_number < 0) {
+ goto error;
+ }
+
+ /* Allocate dict buffer */
+ dst_dict_bytes = PyBytes_FromStringAndSize(NULL, dict_size);
+ if (dst_dict_bytes == NULL) {
+ goto error;
+ }
+
+ /* Parameters */
+
+ /* Optimize for a specific Zstandard compression level, 0 means default. */
+ params.compressionLevel = compression_level;
+ /* Write log to stderr, 0 = none. */
+ params.notificationLevel = 0;
+ /* Force dictID value, 0 means auto mode (32-bits random value). */
+ params.dictID = 0;
+
+ /* Finalize the dictionary */
+ Py_BEGIN_ALLOW_THREADS
+ zstd_ret = ZDICT_finalizeDictionary(
+ PyBytes_AS_STRING(dst_dict_bytes), dict_size,
+ PyBytes_AS_STRING(custom_dict_bytes),
+ Py_SIZE(custom_dict_bytes),
+ PyBytes_AS_STRING(samples_bytes), chunk_sizes,
+ (uint32_t)chunks_number, params);
+ Py_END_ALLOW_THREADS
+
+ /* Check Zstandard dict error */
+ if (ZDICT_isError(zstd_ret)) {
+ _zstd_state* mod_state = get_zstd_state(module);
+ set_zstd_error(mod_state, ERR_FINALIZE_DICT, zstd_ret);
+ goto error;
+ }
+
+ /* Resize dict_buffer */
+ if (_PyBytes_Resize(&dst_dict_bytes, zstd_ret) < 0) {
+ goto error;
+ }
+
+ goto success;
+
+error:
+ Py_CLEAR(dst_dict_bytes);
+
+success:
+ PyMem_Free(chunk_sizes);
+ return dst_dict_bytes;
+}
+
+
+/*[clinic input]
+_zstd.get_param_bounds
+
+ parameter: int
+ The parameter to get bounds.
+ is_compress: bool
+ True for CompressionParameter, False for DecompressionParameter.
+
+Get CompressionParameter/DecompressionParameter bounds.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress)
+/*[clinic end generated code: output=4acf5a876f0620ca input=45742ef0a3531b65]*/
+{
+ ZSTD_bounds bound;
+ if (is_compress) {
+ bound = ZSTD_cParam_getBounds(parameter);
+ if (ZSTD_isError(bound.error)) {
+ _zstd_state* mod_state = get_zstd_state(module);
+ set_zstd_error(mod_state, ERR_GET_C_BOUNDS, bound.error);
+ return NULL;
+ }
+ }
+ else {
+ bound = ZSTD_dParam_getBounds(parameter);
+ if (ZSTD_isError(bound.error)) {
+ _zstd_state* mod_state = get_zstd_state(module);
+ set_zstd_error(mod_state, ERR_GET_D_BOUNDS, bound.error);
+ return NULL;
+ }
+ }
+
+ return Py_BuildValue("ii", bound.lowerBound, bound.upperBound);
+}
+
+/*[clinic input]
+_zstd.get_frame_size
+
+ frame_buffer: Py_buffer
+ A bytes-like object, it should start from the beginning of a frame,
+ and contains at least one complete frame.
+
+Get the size of a Zstandard frame, including the header and optional checksum.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer)
+/*[clinic end generated code: output=a7384c2f8780f442 input=3b9f73f8c8129d38]*/
+{
+ size_t frame_size;
+
+ frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf,
+ frame_buffer->len);
+ if (ZSTD_isError(frame_size)) {
+ _zstd_state* mod_state = get_zstd_state(module);
+ PyErr_Format(mod_state->ZstdError,
+ "Error when finding the compressed size of a Zstandard frame. "
+ "Ensure the frame_buffer argument starts from the "
+ "beginning of a frame, and its length is not less than this "
+ "complete frame. Zstd error message: %s.",
+ ZSTD_getErrorName(frame_size));
+ return NULL;
+ }
+
+ return PyLong_FromSize_t(frame_size);
+}
+
+/*[clinic input]
+_zstd.get_frame_info
+
+ frame_buffer: Py_buffer
+ A bytes-like object, containing the header of a Zstandard frame.
+
+Get Zstandard frame infomation from a frame header.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer)
+/*[clinic end generated code: output=56e033cf48001929 input=94b240583ae22ca5]*/
+{
+ uint64_t decompressed_size;
+ uint32_t dict_id;
+
+ /* ZSTD_getFrameContentSize */
+ decompressed_size = ZSTD_getFrameContentSize(frame_buffer->buf,
+ frame_buffer->len);
+
+ /* #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+ #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) */
+ if (decompressed_size == ZSTD_CONTENTSIZE_ERROR) {
+ _zstd_state* mod_state = get_zstd_state(module);
+ PyErr_SetString(mod_state->ZstdError,
+ "Error when getting information from the header of "
+ "a Zstandard frame. Ensure the frame_buffer argument "
+ "starts from the beginning of a frame, and its length "
+ "is not less than the frame header (6~18 bytes).");
+ return NULL;
+ }
+
+ /* ZSTD_getDictID_fromFrame */
+ dict_id = ZSTD_getDictID_fromFrame(frame_buffer->buf, frame_buffer->len);
+
+ /* Build tuple */
+ if (decompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) {
+ return Py_BuildValue("OI", Py_None, dict_id);
+ }
+ return Py_BuildValue("KI", decompressed_size, dict_id);
+}
+
+/*[clinic input]
+_zstd.set_parameter_types
+
+ c_parameter_type: object(subclass_of='&PyType_Type')
+ CompressionParameter IntEnum type object
+ d_parameter_type: object(subclass_of='&PyType_Type')
+ DecompressionParameter IntEnum type object
+
+Set CompressionParameter and DecompressionParameter types for validity check.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type,
+ PyObject *d_parameter_type)
+/*[clinic end generated code: output=f3313b1294f19502 input=75d7a953580fae5f]*/
+{
+ _zstd_state* mod_state = get_zstd_state(module);
+
+ Py_INCREF(c_parameter_type);
+ Py_XSETREF(mod_state->CParameter_type, (PyTypeObject*)c_parameter_type);
+ Py_INCREF(d_parameter_type);
+ Py_XSETREF(mod_state->DParameter_type, (PyTypeObject*)d_parameter_type);
+
+ Py_RETURN_NONE;
+}
+
+static PyMethodDef _zstd_methods[] = {
+ _ZSTD_TRAIN_DICT_METHODDEF
+ _ZSTD_FINALIZE_DICT_METHODDEF
+ _ZSTD_GET_PARAM_BOUNDS_METHODDEF
+ _ZSTD_GET_FRAME_SIZE_METHODDEF
+ _ZSTD_GET_FRAME_INFO_METHODDEF
+ _ZSTD_SET_PARAMETER_TYPES_METHODDEF
+ {NULL, NULL}
+};
+
+static int
+_zstd_exec(PyObject *m)
+{
+#define ADD_TYPE(TYPE, SPEC) \
+do { \
+ TYPE = (PyTypeObject *)PyType_FromModuleAndSpec(m, &(SPEC), NULL); \
+ if (TYPE == NULL) { \
+ return -1; \
+ } \
+ if (PyModule_AddType(m, TYPE) < 0) { \
+ return -1; \
+ } \
+} while (0)
+
+#define ADD_INT_MACRO(MACRO) \
+ if (PyModule_AddIntConstant((m), #MACRO, (MACRO)) < 0) { \
+ return -1; \
+ }
+
+#define ADD_INT_CONST_TO_TYPE(TYPE, NAME, VALUE) \
+do { \
+ PyObject *v = PyLong_FromLong((VALUE)); \
+ if (v == NULL || PyObject_SetAttrString((PyObject *)(TYPE), \
+ (NAME), v) < 0) { \
+ Py_XDECREF(v); \
+ return -1; \
+ } \
+ Py_DECREF(v); \
+} while (0)
+
+ _zstd_state* mod_state = get_zstd_state(m);
+
+ /* Reusable objects & variables */
+ mod_state->CParameter_type = NULL;
+ mod_state->DParameter_type = NULL;
+
+ /* Create and add heap types */
+ ADD_TYPE(mod_state->ZstdDict_type, zstd_dict_type_spec);
+ ADD_TYPE(mod_state->ZstdCompressor_type, zstd_compressor_type_spec);
+ ADD_TYPE(mod_state->ZstdDecompressor_type, zstd_decompressor_type_spec);
+ mod_state->ZstdError = PyErr_NewExceptionWithDoc(
+ "compression.zstd.ZstdError",
+ "An error occurred in the zstd library.",
+ NULL, NULL);
+ if (mod_state->ZstdError == NULL) {
+ return -1;
+ }
+ if (PyModule_AddType(m, (PyTypeObject *)mod_state->ZstdError) < 0) {
+ return -1;
+ }
+
+ /* Add constants */
+ if (PyModule_AddIntConstant(m, "zstd_version_number",
+ ZSTD_versionNumber()) < 0) {
+ return -1;
+ }
+
+ if (PyModule_AddStringConstant(m, "zstd_version",
+ ZSTD_versionString()) < 0) {
+ return -1;
+ }
+
+#if ZSTD_VERSION_NUMBER >= 10500
+ if (PyModule_AddIntConstant(m, "ZSTD_CLEVEL_DEFAULT",
+ ZSTD_defaultCLevel()) < 0) {
+ return -1;
+ }
+#else
+ ADD_INT_MACRO(ZSTD_CLEVEL_DEFAULT);
+#endif
+
+ if (PyModule_Add(m, "ZSTD_DStreamOutSize",
+ PyLong_FromSize_t(ZSTD_DStreamOutSize())) < 0) {
+ return -1;
+ }
+
+ /* Add zstd compression parameters. All should also be in cp_list. */
+ ADD_INT_MACRO(ZSTD_c_compressionLevel);
+ ADD_INT_MACRO(ZSTD_c_windowLog);
+ ADD_INT_MACRO(ZSTD_c_hashLog);
+ ADD_INT_MACRO(ZSTD_c_chainLog);
+ ADD_INT_MACRO(ZSTD_c_searchLog);
+ ADD_INT_MACRO(ZSTD_c_minMatch);
+ ADD_INT_MACRO(ZSTD_c_targetLength);
+ ADD_INT_MACRO(ZSTD_c_strategy);
+
+ ADD_INT_MACRO(ZSTD_c_enableLongDistanceMatching);
+ ADD_INT_MACRO(ZSTD_c_ldmHashLog);
+ ADD_INT_MACRO(ZSTD_c_ldmMinMatch);
+ ADD_INT_MACRO(ZSTD_c_ldmBucketSizeLog);
+ ADD_INT_MACRO(ZSTD_c_ldmHashRateLog);
+
+ ADD_INT_MACRO(ZSTD_c_contentSizeFlag);
+ ADD_INT_MACRO(ZSTD_c_checksumFlag);
+ ADD_INT_MACRO(ZSTD_c_dictIDFlag);
+
+ ADD_INT_MACRO(ZSTD_c_nbWorkers);
+ ADD_INT_MACRO(ZSTD_c_jobSize);
+ ADD_INT_MACRO(ZSTD_c_overlapLog);
+
+ /* Add zstd decompression parameters. All should also be in dp_list. */
+ ADD_INT_MACRO(ZSTD_d_windowLogMax);
+
+ /* Add ZSTD_strategy enum members */
+ ADD_INT_MACRO(ZSTD_fast);
+ ADD_INT_MACRO(ZSTD_dfast);
+ ADD_INT_MACRO(ZSTD_greedy);
+ ADD_INT_MACRO(ZSTD_lazy);
+ ADD_INT_MACRO(ZSTD_lazy2);
+ ADD_INT_MACRO(ZSTD_btlazy2);
+ ADD_INT_MACRO(ZSTD_btopt);
+ ADD_INT_MACRO(ZSTD_btultra);
+ ADD_INT_MACRO(ZSTD_btultra2);
+
+ /* Add ZSTD_EndDirective enum members to ZstdCompressor */
+ ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type,
+ "CONTINUE", ZSTD_e_continue);
+ ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type,
+ "FLUSH_BLOCK", ZSTD_e_flush);
+ ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type,
+ "FLUSH_FRAME", ZSTD_e_end);
+
+ /* Make ZstdCompressor immutable (set Py_TPFLAGS_IMMUTABLETYPE) */
+ PyType_Freeze(mod_state->ZstdCompressor_type);
+
+#undef ADD_TYPE
+#undef ADD_INT_MACRO
+#undef ADD_ZSTD_COMPRESSOR_INT_CONST
+
+ return 0;
+}
+
+static int
+_zstd_traverse(PyObject *module, visitproc visit, void *arg)
+{
+ _zstd_state* mod_state = get_zstd_state(module);
+
+ Py_VISIT(mod_state->ZstdDict_type);
+ Py_VISIT(mod_state->ZstdCompressor_type);
+
+ Py_VISIT(mod_state->ZstdDecompressor_type);
+
+ Py_VISIT(mod_state->ZstdError);
+
+ Py_VISIT(mod_state->CParameter_type);
+ Py_VISIT(mod_state->DParameter_type);
+ return 0;
+}
+
+static int
+_zstd_clear(PyObject *module)
+{
+ _zstd_state* mod_state = get_zstd_state(module);
+
+ Py_CLEAR(mod_state->ZstdDict_type);
+ Py_CLEAR(mod_state->ZstdCompressor_type);
+
+ Py_CLEAR(mod_state->ZstdDecompressor_type);
+
+ Py_CLEAR(mod_state->ZstdError);
+
+ Py_CLEAR(mod_state->CParameter_type);
+ Py_CLEAR(mod_state->DParameter_type);
+ return 0;
+}
+
+static void
+_zstd_free(void *module)
+{
+ (void)_zstd_clear((PyObject *)module);
+}
+
+static struct PyModuleDef_Slot _zstd_slots[] = {
+ {Py_mod_exec, _zstd_exec},
+ {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+ {Py_mod_gil, Py_MOD_GIL_NOT_USED},
+ {0, NULL},
+};
+
+static struct PyModuleDef _zstdmodule = {
+ .m_base = PyModuleDef_HEAD_INIT,
+ .m_name = "_zstd",
+ .m_doc = "Implementation module for Zstandard compression.",
+ .m_size = sizeof(_zstd_state),
+ .m_slots = _zstd_slots,
+ .m_methods = _zstd_methods,
+ .m_traverse = _zstd_traverse,
+ .m_clear = _zstd_clear,
+ .m_free = _zstd_free,
+};
+
+PyMODINIT_FUNC
+PyInit__zstd(void)
+{
+ return PyModuleDef_Init(&_zstdmodule);
+}
diff --git a/Modules/_zstd/_zstdmodule.h b/Modules/_zstd/_zstdmodule.h
new file mode 100644
index 00000000000..4e8f708f223
--- /dev/null
+++ b/Modules/_zstd/_zstdmodule.h
@@ -0,0 +1,61 @@
+/* Low level interface to the Zstandard algorthm & the zstd library. */
+
+/* Declarations shared between different parts of the _zstd module*/
+
+#ifndef ZSTD_MODULE_H
+#define ZSTD_MODULE_H
+
+#include "zstddict.h"
+
+/* Type specs */
+extern PyType_Spec zstd_dict_type_spec;
+extern PyType_Spec zstd_compressor_type_spec;
+extern PyType_Spec zstd_decompressor_type_spec;
+
+typedef struct {
+ /* Module heap types. */
+ PyTypeObject *ZstdDict_type;
+ PyTypeObject *ZstdCompressor_type;
+ PyTypeObject *ZstdDecompressor_type;
+ PyObject *ZstdError;
+
+ /* enum types set by set_parameter_types. */
+ PyTypeObject *CParameter_type;
+ PyTypeObject *DParameter_type;
+} _zstd_state;
+
+typedef enum {
+ ERR_DECOMPRESS,
+ ERR_COMPRESS,
+ ERR_SET_PLEDGED_INPUT_SIZE,
+
+ ERR_LOAD_D_DICT,
+ ERR_LOAD_C_DICT,
+
+ ERR_GET_C_BOUNDS,
+ ERR_GET_D_BOUNDS,
+ ERR_SET_C_LEVEL,
+
+ ERR_TRAIN_DICT,
+ ERR_FINALIZE_DICT,
+} error_type;
+
+typedef enum {
+ DICT_TYPE_DIGESTED = 0,
+ DICT_TYPE_UNDIGESTED = 1,
+ DICT_TYPE_PREFIX = 2
+} dictionary_type;
+
+extern ZstdDict *
+_Py_parse_zstd_dict(const _zstd_state *state,
+ PyObject *dict, int *type);
+
+/* Format error message and set ZstdError. */
+extern void
+set_zstd_error(const _zstd_state *state,
+ error_type type, size_t zstd_ret);
+
+extern void
+set_parameter_error(int is_compress, int key_v, int value_v);
+
+#endif // !ZSTD_MODULE_H
diff --git a/Modules/_zstd/buffer.h b/Modules/_zstd/buffer.h
new file mode 100644
index 00000000000..4c885fa0d72
--- /dev/null
+++ b/Modules/_zstd/buffer.h
@@ -0,0 +1,108 @@
+/* Low level interface to the Zstandard algorthm & the zstd library. */
+
+#ifndef ZSTD_BUFFER_H
+#define ZSTD_BUFFER_H
+
+#include "pycore_blocks_output_buffer.h"
+
+#include <zstd.h> // ZSTD_outBuffer
+
+/* Blocks output buffer wrapper code */
+
+/* Initialize the buffer, and grow the buffer.
+ Return 0 on success
+ Return -1 on failure */
+static inline int
+_OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob,
+ Py_ssize_t max_length)
+{
+ /* Ensure .list was set to NULL */
+ assert(buffer->list == NULL);
+
+ Py_ssize_t res = _BlocksOutputBuffer_InitAndGrow(buffer, max_length,
+ &ob->dst);
+ if (res < 0) {
+ return -1;
+ }
+ ob->size = (size_t) res;
+ ob->pos = 0;
+ return 0;
+}
+
+/* Initialize the buffer, with an initial size.
+ init_size: the initial size.
+ Return 0 on success
+ Return -1 on failure */
+static inline int
+_OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob,
+ Py_ssize_t max_length, Py_ssize_t init_size)
+{
+ Py_ssize_t block_size;
+
+ /* Ensure .list was set to NULL */
+ assert(buffer->list == NULL);
+
+ /* Get block size */
+ if (0 <= max_length && max_length < init_size) {
+ block_size = max_length;
+ }
+ else {
+ block_size = init_size;
+ }
+
+ Py_ssize_t res = _BlocksOutputBuffer_InitWithSize(buffer, block_size,
+ &ob->dst);
+ if (res < 0) {
+ return -1;
+ }
+ // Set max_length, InitWithSize doesn't do this
+ buffer->max_length = max_length;
+ ob->size = (size_t) res;
+ ob->pos = 0;
+ return 0;
+}
+
+/* Grow the buffer.
+ Return 0 on success
+ Return -1 on failure */
+static inline int
+_OutputBuffer_Grow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob)
+{
+ assert(ob->pos == ob->size);
+ Py_ssize_t res = _BlocksOutputBuffer_Grow(buffer, &ob->dst, 0);
+ if (res < 0) {
+ return -1;
+ }
+ ob->size = (size_t) res;
+ ob->pos = 0;
+ return 0;
+}
+
+/* Finish the buffer.
+ Return a bytes object on success
+ Return NULL on failure */
+static inline PyObject *
+_OutputBuffer_Finish(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob)
+{
+ return _BlocksOutputBuffer_Finish(buffer, ob->size - ob->pos);
+}
+
+/* Clean up the buffer */
+static inline void
+_OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
+{
+ _BlocksOutputBuffer_OnError(buffer);
+}
+
+/* Whether the output data has reached max_length.
+The avail_out must be 0, please check it before calling. */
+static inline int
+_OutputBuffer_ReachedMaxLength(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob)
+{
+ /* Ensure (data size == allocated size) */
+ assert(ob->pos == ob->size);
+
+ return buffer->allocated == buffer->max_length;
+}
+
+#endif // !ZSTD_BUFFER_H
diff --git a/Modules/_zstd/clinic/_zstdmodule.c.h b/Modules/_zstd/clinic/_zstdmodule.c.h
new file mode 100644
index 00000000000..766e1cfa776
--- /dev/null
+++ b/Modules/_zstd/clinic/_zstdmodule.c.h
@@ -0,0 +1,429 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+# include "pycore_gc.h" // PyGC_Head
+# include "pycore_runtime.h" // _Py_ID()
+#endif
+#include "pycore_abstract.h" // _PyNumber_Index()
+#include "pycore_modsupport.h" // _PyArg_CheckPositional()
+
+PyDoc_STRVAR(_zstd_train_dict__doc__,
+"train_dict($module, samples_bytes, samples_sizes, dict_size, /)\n"
+"--\n"
+"\n"
+"Train a Zstandard dictionary on sample data.\n"
+"\n"
+" samples_bytes\n"
+" Concatenation of samples.\n"
+" samples_sizes\n"
+" Tuple of samples\' sizes.\n"
+" dict_size\n"
+" The size of the dictionary.");
+
+#define _ZSTD_TRAIN_DICT_METHODDEF \
+ {"train_dict", _PyCFunction_CAST(_zstd_train_dict), METH_FASTCALL, _zstd_train_dict__doc__},
+
+static PyObject *
+_zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
+ PyObject *samples_sizes, Py_ssize_t dict_size);
+
+static PyObject *
+_zstd_train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
+{
+ PyObject *return_value = NULL;
+ PyBytesObject *samples_bytes;
+ PyObject *samples_sizes;
+ Py_ssize_t dict_size;
+
+ if (!_PyArg_CheckPositional("train_dict", nargs, 3, 3)) {
+ goto exit;
+ }
+ if (!PyBytes_Check(args[0])) {
+ _PyArg_BadArgument("train_dict", "argument 1", "bytes", args[0]);
+ goto exit;
+ }
+ samples_bytes = (PyBytesObject *)args[0];
+ if (!PyTuple_Check(args[1])) {
+ _PyArg_BadArgument("train_dict", "argument 2", "tuple", args[1]);
+ goto exit;
+ }
+ samples_sizes = args[1];
+ {
+ Py_ssize_t ival = -1;
+ PyObject *iobj = _PyNumber_Index(args[2]);
+ if (iobj != NULL) {
+ ival = PyLong_AsSsize_t(iobj);
+ Py_DECREF(iobj);
+ }
+ if (ival == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+ dict_size = ival;
+ }
+ return_value = _zstd_train_dict_impl(module, samples_bytes, samples_sizes, dict_size);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_finalize_dict__doc__,
+"finalize_dict($module, custom_dict_bytes, samples_bytes, samples_sizes,\n"
+" dict_size, compression_level, /)\n"
+"--\n"
+"\n"
+"Finalize a Zstandard dictionary.\n"
+"\n"
+" custom_dict_bytes\n"
+" Custom dictionary content.\n"
+" samples_bytes\n"
+" Concatenation of samples.\n"
+" samples_sizes\n"
+" Tuple of samples\' sizes.\n"
+" dict_size\n"
+" The size of the dictionary.\n"
+" compression_level\n"
+" Optimize for a specific Zstandard compression level, 0 means default.");
+
+#define _ZSTD_FINALIZE_DICT_METHODDEF \
+ {"finalize_dict", _PyCFunction_CAST(_zstd_finalize_dict), METH_FASTCALL, _zstd_finalize_dict__doc__},
+
+static PyObject *
+_zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
+ PyBytesObject *samples_bytes,
+ PyObject *samples_sizes, Py_ssize_t dict_size,
+ int compression_level);
+
+static PyObject *
+_zstd_finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
+{
+ PyObject *return_value = NULL;
+ PyBytesObject *custom_dict_bytes;
+ PyBytesObject *samples_bytes;
+ PyObject *samples_sizes;
+ Py_ssize_t dict_size;
+ int compression_level;
+
+ if (!_PyArg_CheckPositional("finalize_dict", nargs, 5, 5)) {
+ goto exit;
+ }
+ if (!PyBytes_Check(args[0])) {
+ _PyArg_BadArgument("finalize_dict", "argument 1", "bytes", args[0]);
+ goto exit;
+ }
+ custom_dict_bytes = (PyBytesObject *)args[0];
+ if (!PyBytes_Check(args[1])) {
+ _PyArg_BadArgument("finalize_dict", "argument 2", "bytes", args[1]);
+ goto exit;
+ }
+ samples_bytes = (PyBytesObject *)args[1];
+ if (!PyTuple_Check(args[2])) {
+ _PyArg_BadArgument("finalize_dict", "argument 3", "tuple", args[2]);
+ goto exit;
+ }
+ samples_sizes = args[2];
+ {
+ Py_ssize_t ival = -1;
+ PyObject *iobj = _PyNumber_Index(args[3]);
+ if (iobj != NULL) {
+ ival = PyLong_AsSsize_t(iobj);
+ Py_DECREF(iobj);
+ }
+ if (ival == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+ dict_size = ival;
+ }
+ compression_level = PyLong_AsInt(args[4]);
+ if (compression_level == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+ return_value = _zstd_finalize_dict_impl(module, custom_dict_bytes, samples_bytes, samples_sizes, dict_size, compression_level);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_get_param_bounds__doc__,
+"get_param_bounds($module, /, parameter, is_compress)\n"
+"--\n"
+"\n"
+"Get CompressionParameter/DecompressionParameter bounds.\n"
+"\n"
+" parameter\n"
+" The parameter to get bounds.\n"
+" is_compress\n"
+" True for CompressionParameter, False for DecompressionParameter.");
+
+#define _ZSTD_GET_PARAM_BOUNDS_METHODDEF \
+ {"get_param_bounds", _PyCFunction_CAST(_zstd_get_param_bounds), METH_FASTCALL|METH_KEYWORDS, _zstd_get_param_bounds__doc__},
+
+static PyObject *
+_zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress);
+
+static PyObject *
+_zstd_get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(parameter), &_Py_ID(is_compress), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"parameter", "is_compress", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "get_param_bounds",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ int parameter;
+ int is_compress;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 2, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ parameter = PyLong_AsInt(args[0]);
+ if (parameter == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+ is_compress = PyObject_IsTrue(args[1]);
+ if (is_compress < 0) {
+ goto exit;
+ }
+ return_value = _zstd_get_param_bounds_impl(module, parameter, is_compress);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_get_frame_size__doc__,
+"get_frame_size($module, /, frame_buffer)\n"
+"--\n"
+"\n"
+"Get the size of a Zstandard frame, including the header and optional checksum.\n"
+"\n"
+" frame_buffer\n"
+" A bytes-like object, it should start from the beginning of a frame,\n"
+" and contains at least one complete frame.");
+
+#define _ZSTD_GET_FRAME_SIZE_METHODDEF \
+ {"get_frame_size", _PyCFunction_CAST(_zstd_get_frame_size), METH_FASTCALL|METH_KEYWORDS, _zstd_get_frame_size__doc__},
+
+static PyObject *
+_zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer);
+
+static PyObject *
+_zstd_get_frame_size(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 1
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(frame_buffer), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"frame_buffer", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "get_frame_size",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[1];
+ Py_buffer frame_buffer = {NULL, NULL};
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (PyObject_GetBuffer(args[0], &frame_buffer, PyBUF_SIMPLE) != 0) {
+ goto exit;
+ }
+ return_value = _zstd_get_frame_size_impl(module, &frame_buffer);
+
+exit:
+ /* Cleanup for frame_buffer */
+ if (frame_buffer.obj) {
+ PyBuffer_Release(&frame_buffer);
+ }
+
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_get_frame_info__doc__,
+"get_frame_info($module, /, frame_buffer)\n"
+"--\n"
+"\n"
+"Get Zstandard frame infomation from a frame header.\n"
+"\n"
+" frame_buffer\n"
+" A bytes-like object, containing the header of a Zstandard frame.");
+
+#define _ZSTD_GET_FRAME_INFO_METHODDEF \
+ {"get_frame_info", _PyCFunction_CAST(_zstd_get_frame_info), METH_FASTCALL|METH_KEYWORDS, _zstd_get_frame_info__doc__},
+
+static PyObject *
+_zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer);
+
+static PyObject *
+_zstd_get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 1
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(frame_buffer), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"frame_buffer", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "get_frame_info",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[1];
+ Py_buffer frame_buffer = {NULL, NULL};
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (PyObject_GetBuffer(args[0], &frame_buffer, PyBUF_SIMPLE) != 0) {
+ goto exit;
+ }
+ return_value = _zstd_get_frame_info_impl(module, &frame_buffer);
+
+exit:
+ /* Cleanup for frame_buffer */
+ if (frame_buffer.obj) {
+ PyBuffer_Release(&frame_buffer);
+ }
+
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_set_parameter_types__doc__,
+"set_parameter_types($module, /, c_parameter_type, d_parameter_type)\n"
+"--\n"
+"\n"
+"Set CompressionParameter and DecompressionParameter types for validity check.\n"
+"\n"
+" c_parameter_type\n"
+" CompressionParameter IntEnum type object\n"
+" d_parameter_type\n"
+" DecompressionParameter IntEnum type object");
+
+#define _ZSTD_SET_PARAMETER_TYPES_METHODDEF \
+ {"set_parameter_types", _PyCFunction_CAST(_zstd_set_parameter_types), METH_FASTCALL|METH_KEYWORDS, _zstd_set_parameter_types__doc__},
+
+static PyObject *
+_zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type,
+ PyObject *d_parameter_type);
+
+static PyObject *
+_zstd_set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(c_parameter_type), &_Py_ID(d_parameter_type), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"c_parameter_type", "d_parameter_type", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "set_parameter_types",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ PyObject *c_parameter_type;
+ PyObject *d_parameter_type;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 2, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (!PyObject_TypeCheck(args[0], &PyType_Type)) {
+ _PyArg_BadArgument("set_parameter_types", "argument 'c_parameter_type'", (&PyType_Type)->tp_name, args[0]);
+ goto exit;
+ }
+ c_parameter_type = args[0];
+ if (!PyObject_TypeCheck(args[1], &PyType_Type)) {
+ _PyArg_BadArgument("set_parameter_types", "argument 'd_parameter_type'", (&PyType_Type)->tp_name, args[1]);
+ goto exit;
+ }
+ d_parameter_type = args[1];
+ return_value = _zstd_set_parameter_types_impl(module, c_parameter_type, d_parameter_type);
+
+exit:
+ return return_value;
+}
+/*[clinic end generated code: output=437b084f149e68e5 input=a9049054013a1b77]*/
diff --git a/Modules/_zstd/clinic/compressor.c.h b/Modules/_zstd/clinic/compressor.c.h
new file mode 100644
index 00000000000..4f8d93fd9e8
--- /dev/null
+++ b/Modules/_zstd/clinic/compressor.c.h
@@ -0,0 +1,294 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+# include "pycore_gc.h" // PyGC_Head
+# include "pycore_runtime.h" // _Py_ID()
+#endif
+#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()
+
+PyDoc_STRVAR(_zstd_ZstdCompressor_new__doc__,
+"ZstdCompressor(level=None, options=None, zstd_dict=None)\n"
+"--\n"
+"\n"
+"Create a compressor object for compressing data incrementally.\n"
+"\n"
+" level\n"
+" The compression level to use. Defaults to COMPRESSION_LEVEL_DEFAULT.\n"
+" options\n"
+" A dict object that contains advanced compression parameters.\n"
+" zstd_dict\n"
+" A ZstdDict object, a pre-trained Zstandard dictionary.\n"
+"\n"
+"Thread-safe at method level. For one-shot compression, use the compress()\n"
+"function instead.");
+
+static PyObject *
+_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level,
+ PyObject *options, PyObject *zstd_dict);
+
+static PyObject *
+_zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 3
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(level), &_Py_ID(options), &_Py_ID(zstd_dict), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"level", "options", "zstd_dict", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "ZstdCompressor",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[3];
+ PyObject * const *fastargs;
+ Py_ssize_t nargs = PyTuple_GET_SIZE(args);
+ Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0;
+ PyObject *level = Py_None;
+ PyObject *options = Py_None;
+ PyObject *zstd_dict = Py_None;
+
+ fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser,
+ /*minpos*/ 0, /*maxpos*/ 3, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!fastargs) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ if (fastargs[0]) {
+ level = fastargs[0];
+ if (!--noptargs) {
+ goto skip_optional_pos;
+ }
+ }
+ if (fastargs[1]) {
+ options = fastargs[1];
+ if (!--noptargs) {
+ goto skip_optional_pos;
+ }
+ }
+ zstd_dict = fastargs[2];
+skip_optional_pos:
+ return_value = _zstd_ZstdCompressor_new_impl(type, level, options, zstd_dict);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_ZstdCompressor_compress__doc__,
+"compress($self, /, data, mode=ZstdCompressor.CONTINUE)\n"
+"--\n"
+"\n"
+"Provide data to the compressor object.\n"
+"\n"
+" mode\n"
+" Can be these 3 values ZstdCompressor.CONTINUE,\n"
+" ZstdCompressor.FLUSH_BLOCK, ZstdCompressor.FLUSH_FRAME\n"
+"\n"
+"Return a chunk of compressed data if possible, or b\'\' otherwise. When you have\n"
+"finished providing data to the compressor, call the flush() method to finish\n"
+"the compression process.");
+
+#define _ZSTD_ZSTDCOMPRESSOR_COMPRESS_METHODDEF \
+ {"compress", _PyCFunction_CAST(_zstd_ZstdCompressor_compress), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdCompressor_compress__doc__},
+
+static PyObject *
+_zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data,
+ int mode);
+
+static PyObject *
+_zstd_ZstdCompressor_compress(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(data), &_Py_ID(mode), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"data", "mode", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "compress",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
+ Py_buffer data = {NULL, NULL};
+ int mode = ZSTD_e_continue;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ mode = PyLong_AsInt(args[1]);
+ if (mode == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+skip_optional_pos:
+ return_value = _zstd_ZstdCompressor_compress_impl((ZstdCompressor *)self, &data, mode);
+
+exit:
+ /* Cleanup for data */
+ if (data.obj) {
+ PyBuffer_Release(&data);
+ }
+
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_ZstdCompressor_flush__doc__,
+"flush($self, /, mode=ZstdCompressor.FLUSH_FRAME)\n"
+"--\n"
+"\n"
+"Finish the compression process.\n"
+"\n"
+" mode\n"
+" Can be these 2 values ZstdCompressor.FLUSH_FRAME,\n"
+" ZstdCompressor.FLUSH_BLOCK\n"
+"\n"
+"Flush any remaining data left in internal buffers. Since Zstandard data\n"
+"consists of one or more independent frames, the compressor object can still\n"
+"be used after this method is called.");
+
+#define _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF \
+ {"flush", _PyCFunction_CAST(_zstd_ZstdCompressor_flush), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdCompressor_flush__doc__},
+
+static PyObject *
+_zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode);
+
+static PyObject *
+_zstd_ZstdCompressor_flush(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 1
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(mode), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"mode", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "flush",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[1];
+ Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0;
+ int mode = ZSTD_e_end;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ mode = PyLong_AsInt(args[0]);
+ if (mode == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+skip_optional_pos:
+ return_value = _zstd_ZstdCompressor_flush_impl((ZstdCompressor *)self, mode);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_ZstdCompressor_set_pledged_input_size__doc__,
+"set_pledged_input_size($self, size, /)\n"
+"--\n"
+"\n"
+"Set the uncompressed content size to be written into the frame header.\n"
+"\n"
+" size\n"
+" The size of the uncompressed data to be provided to the compressor.\n"
+"\n"
+"This method can be used to ensure the header of the frame about to be written\n"
+"includes the size of the data, unless the CompressionParameter.content_size_flag\n"
+"is set to False. If last_mode != FLUSH_FRAME, then a RuntimeError is raised.\n"
+"\n"
+"It is important to ensure that the pledged data size matches the actual data\n"
+"size. If they do not match the compressed output data may be corrupted and the\n"
+"final chunk written may be lost.");
+
+#define _ZSTD_ZSTDCOMPRESSOR_SET_PLEDGED_INPUT_SIZE_METHODDEF \
+ {"set_pledged_input_size", (PyCFunction)_zstd_ZstdCompressor_set_pledged_input_size, METH_O, _zstd_ZstdCompressor_set_pledged_input_size__doc__},
+
+static PyObject *
+_zstd_ZstdCompressor_set_pledged_input_size_impl(ZstdCompressor *self,
+ unsigned long long size);
+
+static PyObject *
+_zstd_ZstdCompressor_set_pledged_input_size(PyObject *self, PyObject *arg)
+{
+ PyObject *return_value = NULL;
+ unsigned long long size;
+
+ if (!zstd_contentsize_converter(arg, &size)) {
+ goto exit;
+ }
+ return_value = _zstd_ZstdCompressor_set_pledged_input_size_impl((ZstdCompressor *)self, size);
+
+exit:
+ return return_value;
+}
+/*[clinic end generated code: output=c1d5c2cf06a8becd input=a9049054013a1b77]*/
diff --git a/Modules/_zstd/clinic/decompressor.c.h b/Modules/_zstd/clinic/decompressor.c.h
new file mode 100644
index 00000000000..c6fdae74ab0
--- /dev/null
+++ b/Modules/_zstd/clinic/decompressor.c.h
@@ -0,0 +1,223 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+# include "pycore_gc.h" // PyGC_Head
+# include "pycore_runtime.h" // _Py_ID()
+#endif
+#include "pycore_abstract.h" // _PyNumber_Index()
+#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()
+
+PyDoc_STRVAR(_zstd_ZstdDecompressor_new__doc__,
+"ZstdDecompressor(zstd_dict=None, options=None)\n"
+"--\n"
+"\n"
+"Create a decompressor object for decompressing data incrementally.\n"
+"\n"
+" zstd_dict\n"
+" A ZstdDict object, a pre-trained Zstandard dictionary.\n"
+" options\n"
+" A dict object that contains advanced decompression parameters.\n"
+"\n"
+"Thread-safe at method level. For one-shot decompression, use the decompress()\n"
+"function instead.");
+
+static PyObject *
+_zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict,
+ PyObject *options);
+
+static PyObject *
+_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(zstd_dict), &_Py_ID(options), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"zstd_dict", "options", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "ZstdDecompressor",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ PyObject * const *fastargs;
+ Py_ssize_t nargs = PyTuple_GET_SIZE(args);
+ Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0;
+ PyObject *zstd_dict = Py_None;
+ PyObject *options = Py_None;
+
+ fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser,
+ /*minpos*/ 0, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!fastargs) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ if (fastargs[0]) {
+ zstd_dict = fastargs[0];
+ if (!--noptargs) {
+ goto skip_optional_pos;
+ }
+ }
+ options = fastargs[1];
+skip_optional_pos:
+ return_value = _zstd_ZstdDecompressor_new_impl(type, zstd_dict, options);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_ZstdDecompressor_unused_data__doc__,
+"A bytes object of un-consumed input data.\n"
+"\n"
+"When ZstdDecompressor object stops after a frame is\n"
+"decompressed, unused input data after the frame. Otherwise this will be b\'\'.");
+#if defined(_zstd_ZstdDecompressor_unused_data_DOCSTR)
+# undef _zstd_ZstdDecompressor_unused_data_DOCSTR
+#endif
+#define _zstd_ZstdDecompressor_unused_data_DOCSTR _zstd_ZstdDecompressor_unused_data__doc__
+
+#if !defined(_zstd_ZstdDecompressor_unused_data_DOCSTR)
+# define _zstd_ZstdDecompressor_unused_data_DOCSTR NULL
+#endif
+#if defined(_ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF)
+# undef _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF
+# define _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF {"unused_data", (getter)_zstd_ZstdDecompressor_unused_data_get, (setter)_zstd_ZstdDecompressor_unused_data_set, _zstd_ZstdDecompressor_unused_data_DOCSTR},
+#else
+# define _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF {"unused_data", (getter)_zstd_ZstdDecompressor_unused_data_get, NULL, _zstd_ZstdDecompressor_unused_data_DOCSTR},
+#endif
+
+static PyObject *
+_zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self);
+
+static PyObject *
+_zstd_ZstdDecompressor_unused_data_get(PyObject *self, void *Py_UNUSED(context))
+{
+ return _zstd_ZstdDecompressor_unused_data_get_impl((ZstdDecompressor *)self);
+}
+
+PyDoc_STRVAR(_zstd_ZstdDecompressor_decompress__doc__,
+"decompress($self, /, data, max_length=-1)\n"
+"--\n"
+"\n"
+"Decompress *data*, returning uncompressed bytes if possible, or b\'\' otherwise.\n"
+"\n"
+" data\n"
+" A bytes-like object, Zstandard data to be decompressed.\n"
+" max_length\n"
+" Maximum size of returned data. When it is negative, the size of\n"
+" output buffer is unlimited. When it is nonnegative, returns at\n"
+" most max_length bytes of decompressed data.\n"
+"\n"
+"If *max_length* is nonnegative, returns at most *max_length* bytes of\n"
+"decompressed data. If this limit is reached and further output can be\n"
+"produced, *self.needs_input* will be set to ``False``. In this case, the next\n"
+"call to *decompress()* may provide *data* as b\'\' to obtain more of the output.\n"
+"\n"
+"If all of the input data was decompressed and returned (either because this\n"
+"was less than *max_length* bytes, or because *max_length* was negative),\n"
+"*self.needs_input* will be set to True.\n"
+"\n"
+"Attempting to decompress data after the end of a frame is reached raises an\n"
+"EOFError. Any data found after the end of the frame is ignored and saved in\n"
+"the self.unused_data attribute.");
+
+#define _ZSTD_ZSTDDECOMPRESSOR_DECOMPRESS_METHODDEF \
+ {"decompress", _PyCFunction_CAST(_zstd_ZstdDecompressor_decompress), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdDecompressor_decompress__doc__},
+
+static PyObject *
+_zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self,
+ Py_buffer *data,
+ Py_ssize_t max_length);
+
+static PyObject *
+_zstd_ZstdDecompressor_decompress(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(data), &_Py_ID(max_length), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"data", "max_length", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "decompress",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
+ Py_buffer data = {NULL, NULL};
+ Py_ssize_t max_length = -1;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ {
+ Py_ssize_t ival = -1;
+ PyObject *iobj = _PyNumber_Index(args[1]);
+ if (iobj != NULL) {
+ ival = PyLong_AsSsize_t(iobj);
+ Py_DECREF(iobj);
+ }
+ if (ival == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+ max_length = ival;
+ }
+skip_optional_pos:
+ return_value = _zstd_ZstdDecompressor_decompress_impl((ZstdDecompressor *)self, &data, max_length);
+
+exit:
+ /* Cleanup for data */
+ if (data.obj) {
+ PyBuffer_Release(&data);
+ }
+
+ return return_value;
+}
+/*[clinic end generated code: output=30c12ef047027ede input=a9049054013a1b77]*/
diff --git a/Modules/_zstd/clinic/zstddict.c.h b/Modules/_zstd/clinic/zstddict.c.h
new file mode 100644
index 00000000000..79db85405d6
--- /dev/null
+++ b/Modules/_zstd/clinic/zstddict.c.h
@@ -0,0 +1,225 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+# include "pycore_gc.h" // PyGC_Head
+# include "pycore_runtime.h" // _Py_ID()
+#endif
+#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()
+
+PyDoc_STRVAR(_zstd_ZstdDict_new__doc__,
+"ZstdDict(dict_content, /, *, is_raw=False)\n"
+"--\n"
+"\n"
+"Represents a Zstandard dictionary.\n"
+"\n"
+" dict_content\n"
+" The content of a Zstandard dictionary as a bytes-like object.\n"
+" is_raw\n"
+" If true, perform no checks on *dict_content*, useful for some\n"
+" advanced cases. Otherwise, check that the content represents\n"
+" a Zstandard dictionary created by the zstd library or CLI.\n"
+"\n"
+"The dictionary can be used for compression or decompression, and can be shared\n"
+"by multiple ZstdCompressor or ZstdDecompressor objects.");
+
+static PyObject *
+_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content,
+ int is_raw);
+
+static PyObject *
+_zstd_ZstdDict_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 1
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(is_raw), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"", "is_raw", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "ZstdDict",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ PyObject * const *fastargs;
+ Py_ssize_t nargs = PyTuple_GET_SIZE(args);
+ Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1;
+ Py_buffer dict_content = {NULL, NULL};
+ int is_raw = 0;
+
+ fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser,
+ /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!fastargs) {
+ goto exit;
+ }
+ if (PyObject_GetBuffer(fastargs[0], &dict_content, PyBUF_SIMPLE) != 0) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_kwonly;
+ }
+ is_raw = PyObject_IsTrue(fastargs[1]);
+ if (is_raw < 0) {
+ goto exit;
+ }
+skip_optional_kwonly:
+ return_value = _zstd_ZstdDict_new_impl(type, &dict_content, is_raw);
+
+exit:
+ /* Cleanup for dict_content */
+ if (dict_content.obj) {
+ PyBuffer_Release(&dict_content);
+ }
+
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_ZstdDict_dict_content__doc__,
+"The content of a Zstandard dictionary, as a bytes object.");
+#if defined(_zstd_ZstdDict_dict_content_DOCSTR)
+# undef _zstd_ZstdDict_dict_content_DOCSTR
+#endif
+#define _zstd_ZstdDict_dict_content_DOCSTR _zstd_ZstdDict_dict_content__doc__
+
+#if !defined(_zstd_ZstdDict_dict_content_DOCSTR)
+# define _zstd_ZstdDict_dict_content_DOCSTR NULL
+#endif
+#if defined(_ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF)
+# undef _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF
+# define _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF {"dict_content", (getter)_zstd_ZstdDict_dict_content_get, (setter)_zstd_ZstdDict_dict_content_set, _zstd_ZstdDict_dict_content_DOCSTR},
+#else
+# define _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF {"dict_content", (getter)_zstd_ZstdDict_dict_content_get, NULL, _zstd_ZstdDict_dict_content_DOCSTR},
+#endif
+
+static PyObject *
+_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self);
+
+static PyObject *
+_zstd_ZstdDict_dict_content_get(PyObject *self, void *Py_UNUSED(context))
+{
+ return _zstd_ZstdDict_dict_content_get_impl((ZstdDict *)self);
+}
+
+PyDoc_STRVAR(_zstd_ZstdDict_as_digested_dict__doc__,
+"Load as a digested dictionary to compressor.\n"
+"\n"
+"Pass this attribute as zstd_dict argument:\n"
+"compress(dat, zstd_dict=zd.as_digested_dict)\n"
+"\n"
+"1. Some advanced compression parameters of compressor may be overridden\n"
+" by parameters of digested dictionary.\n"
+"2. ZstdDict has a digested dictionaries cache for each compression level.\n"
+" It\'s faster when loading again a digested dictionary with the same\n"
+" compression level.\n"
+"3. No need to use this for decompression.");
+#if defined(_zstd_ZstdDict_as_digested_dict_DOCSTR)
+# undef _zstd_ZstdDict_as_digested_dict_DOCSTR
+#endif
+#define _zstd_ZstdDict_as_digested_dict_DOCSTR _zstd_ZstdDict_as_digested_dict__doc__
+
+#if !defined(_zstd_ZstdDict_as_digested_dict_DOCSTR)
+# define _zstd_ZstdDict_as_digested_dict_DOCSTR NULL
+#endif
+#if defined(_ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF)
+# undef _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF
+# define _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF {"as_digested_dict", (getter)_zstd_ZstdDict_as_digested_dict_get, (setter)_zstd_ZstdDict_as_digested_dict_set, _zstd_ZstdDict_as_digested_dict_DOCSTR},
+#else
+# define _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF {"as_digested_dict", (getter)_zstd_ZstdDict_as_digested_dict_get, NULL, _zstd_ZstdDict_as_digested_dict_DOCSTR},
+#endif
+
+static PyObject *
+_zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self);
+
+static PyObject *
+_zstd_ZstdDict_as_digested_dict_get(PyObject *self, void *Py_UNUSED(context))
+{
+ return _zstd_ZstdDict_as_digested_dict_get_impl((ZstdDict *)self);
+}
+
+PyDoc_STRVAR(_zstd_ZstdDict_as_undigested_dict__doc__,
+"Load as an undigested dictionary to compressor.\n"
+"\n"
+"Pass this attribute as zstd_dict argument:\n"
+"compress(dat, zstd_dict=zd.as_undigested_dict)\n"
+"\n"
+"1. The advanced compression parameters of compressor will not be overridden.\n"
+"2. Loading an undigested dictionary is costly. If load an undigested dictionary\n"
+" multiple times, consider reusing a compressor object.\n"
+"3. No need to use this for decompression.");
+#if defined(_zstd_ZstdDict_as_undigested_dict_DOCSTR)
+# undef _zstd_ZstdDict_as_undigested_dict_DOCSTR
+#endif
+#define _zstd_ZstdDict_as_undigested_dict_DOCSTR _zstd_ZstdDict_as_undigested_dict__doc__
+
+#if !defined(_zstd_ZstdDict_as_undigested_dict_DOCSTR)
+# define _zstd_ZstdDict_as_undigested_dict_DOCSTR NULL
+#endif
+#if defined(_ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF)
+# undef _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF
+# define _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF {"as_undigested_dict", (getter)_zstd_ZstdDict_as_undigested_dict_get, (setter)_zstd_ZstdDict_as_undigested_dict_set, _zstd_ZstdDict_as_undigested_dict_DOCSTR},
+#else
+# define _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF {"as_undigested_dict", (getter)_zstd_ZstdDict_as_undigested_dict_get, NULL, _zstd_ZstdDict_as_undigested_dict_DOCSTR},
+#endif
+
+static PyObject *
+_zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self);
+
+static PyObject *
+_zstd_ZstdDict_as_undigested_dict_get(PyObject *self, void *Py_UNUSED(context))
+{
+ return _zstd_ZstdDict_as_undigested_dict_get_impl((ZstdDict *)self);
+}
+
+PyDoc_STRVAR(_zstd_ZstdDict_as_prefix__doc__,
+"Load as a prefix to compressor/decompressor.\n"
+"\n"
+"Pass this attribute as zstd_dict argument:\n"
+"compress(dat, zstd_dict=zd.as_prefix)\n"
+"\n"
+"1. Prefix is compatible with long distance matching, while dictionary is not.\n"
+"2. It only works for the first frame, then the compressor/decompressor will\n"
+" return to no prefix state.\n"
+"3. When decompressing, must use the same prefix as when compressing.\"");
+#if defined(_zstd_ZstdDict_as_prefix_DOCSTR)
+# undef _zstd_ZstdDict_as_prefix_DOCSTR
+#endif
+#define _zstd_ZstdDict_as_prefix_DOCSTR _zstd_ZstdDict_as_prefix__doc__
+
+#if !defined(_zstd_ZstdDict_as_prefix_DOCSTR)
+# define _zstd_ZstdDict_as_prefix_DOCSTR NULL
+#endif
+#if defined(_ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF)
+# undef _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF
+# define _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF {"as_prefix", (getter)_zstd_ZstdDict_as_prefix_get, (setter)_zstd_ZstdDict_as_prefix_set, _zstd_ZstdDict_as_prefix_DOCSTR},
+#else
+# define _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF {"as_prefix", (getter)_zstd_ZstdDict_as_prefix_get, NULL, _zstd_ZstdDict_as_prefix_DOCSTR},
+#endif
+
+static PyObject *
+_zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self);
+
+static PyObject *
+_zstd_ZstdDict_as_prefix_get(PyObject *self, void *Py_UNUSED(context))
+{
+ return _zstd_ZstdDict_as_prefix_get_impl((ZstdDict *)self);
+}
+/*[clinic end generated code: output=4696cbc722e5fdfc input=a9049054013a1b77]*/
diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c
new file mode 100644
index 00000000000..bc9e6eff89a
--- /dev/null
+++ b/Modules/_zstd/compressor.c
@@ -0,0 +1,797 @@
+/* Low level interface to the Zstandard algorthm & the zstd library. */
+
+/* ZstdCompressor class definitions */
+
+/*[clinic input]
+module _zstd
+class _zstd.ZstdCompressor "ZstdCompressor *" "&zstd_compressor_type_spec"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7166021db1ef7df8]*/
+
+#ifndef Py_BUILD_CORE_BUILTIN
+# define Py_BUILD_CORE_MODULE 1
+#endif
+
+#include "Python.h"
+
+#include "_zstdmodule.h"
+#include "buffer.h"
+#include "internal/pycore_lock.h" // PyMutex_IsLocked
+
+#include <stddef.h> // offsetof()
+#include <zstd.h> // ZSTD_*()
+
+typedef struct {
+ PyObject_HEAD
+
+ /* Compression context */
+ ZSTD_CCtx *cctx;
+
+ /* ZstdDict object in use */
+ PyObject *dict;
+
+ /* Last mode, initialized to ZSTD_e_end */
+ int last_mode;
+
+ /* (nbWorker >= 1) ? 1 : 0 */
+ int use_multithread;
+
+ /* Compression level */
+ int compression_level;
+
+ /* Lock to protect the compression context */
+ PyMutex lock;
+} ZstdCompressor;
+
+#define ZstdCompressor_CAST(op) ((ZstdCompressor *)op)
+
+/*[python input]
+
+class zstd_contentsize_converter(CConverter):
+ type = 'unsigned long long'
+ converter = 'zstd_contentsize_converter'
+
+[python start generated code]*/
+/*[python end generated code: output=da39a3ee5e6b4b0d input=0932c350d633c7de]*/
+
+
+static int
+zstd_contentsize_converter(PyObject *size, unsigned long long *p)
+{
+ // None means the user indicates the size is unknown.
+ if (size == Py_None) {
+ *p = ZSTD_CONTENTSIZE_UNKNOWN;
+ }
+ else {
+ /* ZSTD_CONTENTSIZE_UNKNOWN is 0ULL - 1
+ ZSTD_CONTENTSIZE_ERROR is 0ULL - 2
+ Users should only pass values < ZSTD_CONTENTSIZE_ERROR */
+ unsigned long long pledged_size = PyLong_AsUnsignedLongLong(size);
+ /* Here we check for (unsigned long long)-1 as a sign of an error in
+ PyLong_AsUnsignedLongLong */
+ if (pledged_size == (unsigned long long)-1 && PyErr_Occurred()) {
+ *p = ZSTD_CONTENTSIZE_ERROR;
+ if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
+ PyErr_Format(PyExc_ValueError,
+ "size argument should be a positive int less "
+ "than %ull", ZSTD_CONTENTSIZE_ERROR);
+ return 0;
+ }
+ return 0;
+ }
+ if (pledged_size >= ZSTD_CONTENTSIZE_ERROR) {
+ *p = ZSTD_CONTENTSIZE_ERROR;
+ PyErr_Format(PyExc_ValueError,
+ "size argument should be a positive int less "
+ "than %ull", ZSTD_CONTENTSIZE_ERROR);
+ return 0;
+ }
+ *p = pledged_size;
+ }
+ return 1;
+}
+
+#include "clinic/compressor.c.h"
+
+static int
+_zstd_set_c_level(ZstdCompressor *self, int level)
+{
+ /* Set integer compression level */
+ int min_level = ZSTD_minCLevel();
+ int max_level = ZSTD_maxCLevel();
+ if (level < min_level || level > max_level) {
+ PyErr_Format(PyExc_ValueError,
+ "illegal compression level %d; the valid range is [%d, %d]",
+ level, min_level, max_level);
+ return -1;
+ }
+
+ /* Save for generating ZSTD_CDICT */
+ self->compression_level = level;
+
+ /* Set compressionLevel to compression context */
+ size_t zstd_ret = ZSTD_CCtx_setParameter(
+ self->cctx, ZSTD_c_compressionLevel, level);
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
+ set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret);
+ return -1;
+ }
+ return 0;
+}
+
+static int
+_zstd_set_c_parameters(ZstdCompressor *self, PyObject *options)
+{
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state == NULL) {
+ return -1;
+ }
+
+ if (!PyDict_Check(options)) {
+ PyErr_Format(PyExc_TypeError,
+ "ZstdCompressor() argument 'options' must be dict, not %T",
+ options);
+ return -1;
+ }
+
+ Py_ssize_t pos = 0;
+ PyObject *key, *value;
+ while (PyDict_Next(options, &pos, &key, &value)) {
+ /* Check key type */
+ if (Py_TYPE(key) == mod_state->DParameter_type) {
+ PyErr_SetString(PyExc_TypeError,
+ "compression options dictionary key must not be a "
+ "DecompressionParameter attribute");
+ return -1;
+ }
+
+ Py_INCREF(key);
+ Py_INCREF(value);
+ int key_v = PyLong_AsInt(key);
+ Py_DECREF(key);
+ if (key_v == -1 && PyErr_Occurred()) {
+ Py_DECREF(value);
+ return -1;
+ }
+
+ int value_v = PyLong_AsInt(value);
+ Py_DECREF(value);
+ if (value_v == -1 && PyErr_Occurred()) {
+ return -1;
+ }
+
+ if (key_v == ZSTD_c_compressionLevel) {
+ if (_zstd_set_c_level(self, value_v) < 0) {
+ return -1;
+ }
+ continue;
+ }
+ if (key_v == ZSTD_c_nbWorkers) {
+ /* From the zstd library docs:
+ 1. When nbWorkers >= 1, triggers asynchronous mode when
+ used with ZSTD_compressStream2().
+ 2, Default value is `0`, aka "single-threaded mode" : no
+ worker is spawned, compression is performed inside
+ caller's thread, all invocations are blocking. */
+ if (value_v != 0) {
+ self->use_multithread = 1;
+ }
+ }
+
+ /* Set parameter to compression context */
+ size_t zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v);
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ set_parameter_error(1, key_v, value_v);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static void
+capsule_free_cdict(PyObject *capsule)
+{
+ ZSTD_CDict *cdict = PyCapsule_GetPointer(capsule, NULL);
+ ZSTD_freeCDict(cdict);
+}
+
+ZSTD_CDict *
+_get_CDict(ZstdDict *self, int compressionLevel)
+{
+ assert(PyMutex_IsLocked(&self->lock));
+ PyObject *level = NULL;
+ PyObject *capsule = NULL;
+ ZSTD_CDict *cdict;
+ int ret;
+
+
+ /* int level object */
+ level = PyLong_FromLong(compressionLevel);
+ if (level == NULL) {
+ goto error;
+ }
+
+ /* Get PyCapsule object from self->c_dicts */
+ ret = PyDict_GetItemRef(self->c_dicts, level, &capsule);
+ if (ret < 0) {
+ goto error;
+ }
+ if (capsule == NULL) {
+ /* Create ZSTD_CDict instance */
+ Py_BEGIN_ALLOW_THREADS
+ cdict = ZSTD_createCDict(self->dict_buffer, self->dict_len,
+ compressionLevel);
+ Py_END_ALLOW_THREADS
+
+ if (cdict == NULL) {
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state != NULL) {
+ PyErr_SetString(mod_state->ZstdError,
+ "Failed to create a ZSTD_CDict instance from "
+ "Zstandard dictionary content.");
+ }
+ goto error;
+ }
+
+ /* Put ZSTD_CDict instance into PyCapsule object */
+ capsule = PyCapsule_New(cdict, NULL, capsule_free_cdict);
+ if (capsule == NULL) {
+ ZSTD_freeCDict(cdict);
+ goto error;
+ }
+
+ /* Add PyCapsule object to self->c_dicts */
+ ret = PyDict_SetItem(self->c_dicts, level, capsule);
+ if (ret < 0) {
+ goto error;
+ }
+ }
+ else {
+ /* ZSTD_CDict instance already exists */
+ cdict = PyCapsule_GetPointer(capsule, NULL);
+ }
+ goto success;
+
+error:
+ cdict = NULL;
+success:
+ Py_XDECREF(level);
+ Py_XDECREF(capsule);
+ return cdict;
+}
+
+static int
+_zstd_load_impl(ZstdCompressor *self, ZstdDict *zd,
+ _zstd_state *mod_state, int type)
+{
+ size_t zstd_ret;
+ if (type == DICT_TYPE_DIGESTED) {
+ /* Get ZSTD_CDict */
+ ZSTD_CDict *c_dict = _get_CDict(zd, self->compression_level);
+ if (c_dict == NULL) {
+ return -1;
+ }
+ /* Reference a prepared dictionary.
+ It overrides some compression context's parameters. */
+ zstd_ret = ZSTD_CCtx_refCDict(self->cctx, c_dict);
+ }
+ else if (type == DICT_TYPE_UNDIGESTED) {
+ /* Load a dictionary.
+ It doesn't override compression context's parameters. */
+ zstd_ret = ZSTD_CCtx_loadDictionary(self->cctx, zd->dict_buffer,
+ zd->dict_len);
+ }
+ else if (type == DICT_TYPE_PREFIX) {
+ /* Load a prefix */
+ zstd_ret = ZSTD_CCtx_refPrefix(self->cctx, zd->dict_buffer,
+ zd->dict_len);
+ }
+ else {
+ Py_UNREACHABLE();
+ }
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ set_zstd_error(mod_state, ERR_LOAD_C_DICT, zstd_ret);
+ return -1;
+ }
+ return 0;
+}
+
+static int
+_zstd_load_c_dict(ZstdCompressor *self, PyObject *dict)
+{
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
+ /* When compressing, use undigested dictionary by default. */
+ int type = DICT_TYPE_UNDIGESTED;
+ ZstdDict *zd = _Py_parse_zstd_dict(mod_state, dict, &type);
+ if (zd == NULL) {
+ return -1;
+ }
+ int ret;
+ PyMutex_Lock(&zd->lock);
+ ret = _zstd_load_impl(self, zd, mod_state, type);
+ PyMutex_Unlock(&zd->lock);
+ return ret;
+}
+
+/*[clinic input]
+@classmethod
+_zstd.ZstdCompressor.__new__ as _zstd_ZstdCompressor_new
+ level: object = None
+ The compression level to use. Defaults to COMPRESSION_LEVEL_DEFAULT.
+ options: object = None
+ A dict object that contains advanced compression parameters.
+ zstd_dict: object = None
+ A ZstdDict object, a pre-trained Zstandard dictionary.
+
+Create a compressor object for compressing data incrementally.
+
+Thread-safe at method level. For one-shot compression, use the compress()
+function instead.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level,
+ PyObject *options, PyObject *zstd_dict)
+/*[clinic end generated code: output=cdef61eafecac3d7 input=92de0211ae20ffdc]*/
+{
+ ZstdCompressor* self = PyObject_GC_New(ZstdCompressor, type);
+ if (self == NULL) {
+ goto error;
+ }
+
+ self->use_multithread = 0;
+ self->dict = NULL;
+ self->lock = (PyMutex){0};
+
+ /* Compression context */
+ self->cctx = ZSTD_createCCtx();
+ if (self->cctx == NULL) {
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state != NULL) {
+ PyErr_SetString(mod_state->ZstdError,
+ "Unable to create ZSTD_CCtx instance.");
+ }
+ goto error;
+ }
+
+ /* Last mode */
+ self->last_mode = ZSTD_e_end;
+
+ if (level != Py_None && options != Py_None) {
+ PyErr_SetString(PyExc_TypeError,
+ "Only one of level or options should be used.");
+ goto error;
+ }
+
+ /* Set compression level */
+ if (level != Py_None) {
+ if (!PyLong_Check(level)) {
+ PyErr_SetString(PyExc_TypeError,
+ "invalid type for level, expected int");
+ goto error;
+ }
+ int level_v = PyLong_AsInt(level);
+ if (level_v == -1 && PyErr_Occurred()) {
+ if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
+ PyErr_Format(PyExc_ValueError,
+ "illegal compression level; the valid range is [%d, %d]",
+ ZSTD_minCLevel(), ZSTD_maxCLevel());
+ }
+ goto error;
+ }
+ if (_zstd_set_c_level(self, level_v) < 0) {
+ goto error;
+ }
+ }
+
+ /* Set options dictionary */
+ if (options != Py_None) {
+ if (_zstd_set_c_parameters(self, options) < 0) {
+ goto error;
+ }
+ }
+
+ /* Load Zstandard dictionary to compression context */
+ if (zstd_dict != Py_None) {
+ if (_zstd_load_c_dict(self, zstd_dict) < 0) {
+ goto error;
+ }
+ Py_INCREF(zstd_dict);
+ self->dict = zstd_dict;
+ }
+
+ // We can only start GC tracking once self->dict is set.
+ PyObject_GC_Track(self);
+
+ return (PyObject*)self;
+
+error:
+ Py_XDECREF(self);
+ return NULL;
+}
+
+static void
+ZstdCompressor_dealloc(PyObject *ob)
+{
+ ZstdCompressor *self = ZstdCompressor_CAST(ob);
+
+ PyObject_GC_UnTrack(self);
+
+ /* Free compression context */
+ if (self->cctx) {
+ ZSTD_freeCCtx(self->cctx);
+ }
+
+ assert(!PyMutex_IsLocked(&self->lock));
+
+ /* Py_XDECREF the dict after free the compression context */
+ Py_CLEAR(self->dict);
+
+ PyTypeObject *tp = Py_TYPE(self);
+ PyObject_GC_Del(ob);
+ Py_DECREF(tp);
+}
+
+static PyObject *
+compress_lock_held(ZstdCompressor *self, Py_buffer *data,
+ ZSTD_EndDirective end_directive)
+{
+ assert(PyMutex_IsLocked(&self->lock));
+ ZSTD_inBuffer in;
+ ZSTD_outBuffer out;
+ _BlocksOutputBuffer buffer = {.list = NULL};
+ size_t zstd_ret;
+ PyObject *ret;
+
+ /* Prepare input & output buffers */
+ if (data != NULL) {
+ in.src = data->buf;
+ in.size = data->len;
+ in.pos = 0;
+ }
+ else {
+ in.src = &in;
+ in.size = 0;
+ in.pos = 0;
+ }
+
+ /* Calculate output buffer's size */
+ size_t output_buffer_size = ZSTD_compressBound(in.size);
+ if (output_buffer_size > (size_t) PY_SSIZE_T_MAX) {
+ PyErr_NoMemory();
+ goto error;
+ }
+
+ if (_OutputBuffer_InitWithSize(&buffer, &out, -1,
+ (Py_ssize_t) output_buffer_size) < 0) {
+ goto error;
+ }
+
+
+ /* Zstandard stream compress */
+ while (1) {
+ Py_BEGIN_ALLOW_THREADS
+ zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, end_directive);
+ Py_END_ALLOW_THREADS
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
+ set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret);
+ goto error;
+ }
+
+ /* Finished */
+ if (zstd_ret == 0) {
+ break;
+ }
+
+ /* Output buffer should be exhausted, grow the buffer. */
+ assert(out.pos == out.size);
+ if (out.pos == out.size) {
+ if (_OutputBuffer_Grow(&buffer, &out) < 0) {
+ goto error;
+ }
+ }
+ }
+
+ /* Return a bytes object */
+ ret = _OutputBuffer_Finish(&buffer, &out);
+ if (ret != NULL) {
+ return ret;
+ }
+
+error:
+ _OutputBuffer_OnError(&buffer);
+ return NULL;
+}
+
+#ifndef NDEBUG
+static inline int
+mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out)
+{
+ return in->size == in->pos && out->size != out->pos;
+}
+#endif
+
+static PyObject *
+compress_mt_continue_lock_held(ZstdCompressor *self, Py_buffer *data)
+{
+ assert(PyMutex_IsLocked(&self->lock));
+ ZSTD_inBuffer in;
+ ZSTD_outBuffer out;
+ _BlocksOutputBuffer buffer = {.list = NULL};
+ size_t zstd_ret;
+ PyObject *ret;
+
+ /* Prepare input & output buffers */
+ in.src = data->buf;
+ in.size = data->len;
+ in.pos = 0;
+
+ if (_OutputBuffer_InitAndGrow(&buffer, &out, -1) < 0) {
+ goto error;
+ }
+
+ /* Zstandard stream compress */
+ while (1) {
+ Py_BEGIN_ALLOW_THREADS
+ do {
+ zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in,
+ ZSTD_e_continue);
+ } while (out.pos != out.size
+ && in.pos != in.size
+ && !ZSTD_isError(zstd_ret));
+ Py_END_ALLOW_THREADS
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
+ set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret);
+ goto error;
+ }
+
+ /* Like compress_lock_held(), output as much as possible. */
+ if (out.pos == out.size) {
+ if (_OutputBuffer_Grow(&buffer, &out) < 0) {
+ goto error;
+ }
+ }
+ else if (in.pos == in.size) {
+ /* Finished */
+ assert(mt_continue_should_break(&in, &out));
+ break;
+ }
+ }
+
+ /* Return a bytes object */
+ ret = _OutputBuffer_Finish(&buffer, &out);
+ if (ret != NULL) {
+ return ret;
+ }
+
+error:
+ _OutputBuffer_OnError(&buffer);
+ return NULL;
+}
+
+/*[clinic input]
+_zstd.ZstdCompressor.compress
+
+ data: Py_buffer
+ mode: int(c_default="ZSTD_e_continue") = ZstdCompressor.CONTINUE
+ Can be these 3 values ZstdCompressor.CONTINUE,
+ ZstdCompressor.FLUSH_BLOCK, ZstdCompressor.FLUSH_FRAME
+
+Provide data to the compressor object.
+
+Return a chunk of compressed data if possible, or b'' otherwise. When you have
+finished providing data to the compressor, call the flush() method to finish
+the compression process.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data,
+ int mode)
+/*[clinic end generated code: output=ed7982d1cf7b4f98 input=ac2c21d180f579ea]*/
+{
+ PyObject *ret;
+
+ /* Check mode value */
+ if (mode != ZSTD_e_continue &&
+ mode != ZSTD_e_flush &&
+ mode != ZSTD_e_end)
+ {
+ PyErr_SetString(PyExc_ValueError,
+ "mode argument wrong value, it should be one of "
+ "ZstdCompressor.CONTINUE, ZstdCompressor.FLUSH_BLOCK, "
+ "ZstdCompressor.FLUSH_FRAME.");
+ return NULL;
+ }
+
+ /* Thread-safe code */
+ PyMutex_Lock(&self->lock);
+
+ /* Compress */
+ if (self->use_multithread && mode == ZSTD_e_continue) {
+ ret = compress_mt_continue_lock_held(self, data);
+ }
+ else {
+ ret = compress_lock_held(self, data, mode);
+ }
+
+ if (ret) {
+ self->last_mode = mode;
+ }
+ else {
+ self->last_mode = ZSTD_e_end;
+
+ /* Resetting cctx's session never fail */
+ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
+ }
+ PyMutex_Unlock(&self->lock);
+
+ return ret;
+}
+
+/*[clinic input]
+_zstd.ZstdCompressor.flush
+
+ mode: int(c_default="ZSTD_e_end") = ZstdCompressor.FLUSH_FRAME
+ Can be these 2 values ZstdCompressor.FLUSH_FRAME,
+ ZstdCompressor.FLUSH_BLOCK
+
+Finish the compression process.
+
+Flush any remaining data left in internal buffers. Since Zstandard data
+consists of one or more independent frames, the compressor object can still
+be used after this method is called.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode)
+/*[clinic end generated code: output=b7cf2c8d64dcf2e3 input=0ab19627f323cdbc]*/
+{
+ PyObject *ret;
+
+ /* Check mode value */
+ if (mode != ZSTD_e_end && mode != ZSTD_e_flush) {
+ PyErr_SetString(PyExc_ValueError,
+ "mode argument wrong value, it should be "
+ "ZstdCompressor.FLUSH_FRAME or "
+ "ZstdCompressor.FLUSH_BLOCK.");
+ return NULL;
+ }
+
+ /* Thread-safe code */
+ PyMutex_Lock(&self->lock);
+
+ ret = compress_lock_held(self, NULL, mode);
+
+ if (ret) {
+ self->last_mode = mode;
+ }
+ else {
+ self->last_mode = ZSTD_e_end;
+
+ /* Resetting cctx's session never fail */
+ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
+ }
+ PyMutex_Unlock(&self->lock);
+
+ return ret;
+}
+
+
+/*[clinic input]
+_zstd.ZstdCompressor.set_pledged_input_size
+
+ size: zstd_contentsize
+ The size of the uncompressed data to be provided to the compressor.
+ /
+
+Set the uncompressed content size to be written into the frame header.
+
+This method can be used to ensure the header of the frame about to be written
+includes the size of the data, unless the CompressionParameter.content_size_flag
+is set to False. If last_mode != FLUSH_FRAME, then a RuntimeError is raised.
+
+It is important to ensure that the pledged data size matches the actual data
+size. If they do not match the compressed output data may be corrupted and the
+final chunk written may be lost.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdCompressor_set_pledged_input_size_impl(ZstdCompressor *self,
+ unsigned long long size)
+/*[clinic end generated code: output=3a09e55cc0e3b4f9 input=afd8a7d78cff2eb5]*/
+{
+ // Error occured while converting argument, should be unreachable
+ assert(size != ZSTD_CONTENTSIZE_ERROR);
+
+ /* Thread-safe code */
+ PyMutex_Lock(&self->lock);
+
+ /* Check the current mode */
+ if (self->last_mode != ZSTD_e_end) {
+ PyErr_SetString(PyExc_ValueError,
+ "set_pledged_input_size() method must be called "
+ "when last_mode == FLUSH_FRAME");
+ PyMutex_Unlock(&self->lock);
+ return NULL;
+ }
+
+ /* Set pledged content size */
+ size_t zstd_ret = ZSTD_CCtx_setPledgedSrcSize(self->cctx, size);
+ PyMutex_Unlock(&self->lock);
+ if (ZSTD_isError(zstd_ret)) {
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
+ set_zstd_error(mod_state, ERR_SET_PLEDGED_INPUT_SIZE, zstd_ret);
+ return NULL;
+ }
+
+ Py_RETURN_NONE;
+}
+
+static PyMethodDef ZstdCompressor_methods[] = {
+ _ZSTD_ZSTDCOMPRESSOR_COMPRESS_METHODDEF
+ _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF
+ _ZSTD_ZSTDCOMPRESSOR_SET_PLEDGED_INPUT_SIZE_METHODDEF
+ {NULL, NULL}
+};
+
+PyDoc_STRVAR(ZstdCompressor_last_mode_doc,
+"The last mode used to this compressor object, its value can be .CONTINUE,\n"
+".FLUSH_BLOCK, .FLUSH_FRAME. Initialized to .FLUSH_FRAME.\n\n"
+"It can be used to get the current state of a compressor, such as, data\n"
+"flushed, or a frame ended.");
+
+static PyMemberDef ZstdCompressor_members[] = {
+ {"last_mode", Py_T_INT, offsetof(ZstdCompressor, last_mode),
+ Py_READONLY, ZstdCompressor_last_mode_doc},
+ {NULL}
+};
+
+static int
+ZstdCompressor_traverse(PyObject *ob, visitproc visit, void *arg)
+{
+ ZstdCompressor *self = ZstdCompressor_CAST(ob);
+ Py_VISIT(self->dict);
+ return 0;
+}
+
+static int
+ZstdCompressor_clear(PyObject *ob)
+{
+ ZstdCompressor *self = ZstdCompressor_CAST(ob);
+ Py_CLEAR(self->dict);
+ return 0;
+}
+
+static PyType_Slot zstdcompressor_slots[] = {
+ {Py_tp_new, _zstd_ZstdCompressor_new},
+ {Py_tp_dealloc, ZstdCompressor_dealloc},
+ {Py_tp_methods, ZstdCompressor_methods},
+ {Py_tp_members, ZstdCompressor_members},
+ {Py_tp_doc, (void *)_zstd_ZstdCompressor_new__doc__},
+ {Py_tp_traverse, ZstdCompressor_traverse},
+ {Py_tp_clear, ZstdCompressor_clear},
+ {0, 0}
+};
+
+PyType_Spec zstd_compressor_type_spec = {
+ .name = "compression.zstd.ZstdCompressor",
+ .basicsize = sizeof(ZstdCompressor),
+ // Py_TPFLAGS_IMMUTABLETYPE is not used here as several
+ // associated constants need to be added to the type.
+ // PyType_Freeze is called later to set the flag.
+ .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
+ .slots = zstdcompressor_slots,
+};
diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c
new file mode 100644
index 00000000000..c53d6e4cb05
--- /dev/null
+++ b/Modules/_zstd/decompressor.c
@@ -0,0 +1,717 @@
+/* Low level interface to the Zstandard algorthm & the zstd library. */
+
+/* ZstdDecompressor class definition */
+
+/*[clinic input]
+module _zstd
+class _zstd.ZstdDecompressor "ZstdDecompressor *" "&zstd_decompressor_type_spec"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e2969ddf48a203e0]*/
+
+#ifndef Py_BUILD_CORE_BUILTIN
+# define Py_BUILD_CORE_MODULE 1
+#endif
+
+#include "Python.h"
+
+#include "_zstdmodule.h"
+#include "buffer.h"
+#include "internal/pycore_lock.h" // PyMutex_IsLocked
+
+#include <stdbool.h> // bool
+#include <stddef.h> // offsetof()
+#include <zstd.h> // ZSTD_*()
+
+typedef struct {
+ PyObject_HEAD
+
+ /* Decompression context */
+ ZSTD_DCtx *dctx;
+
+ /* ZstdDict object in use */
+ PyObject *dict;
+
+ /* Unconsumed input data */
+ char *input_buffer;
+ size_t input_buffer_size;
+ size_t in_begin, in_end;
+
+ /* Unused data */
+ PyObject *unused_data;
+
+ /* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */
+ bool needs_input;
+
+ /* For ZstdDecompressor, 0 or 1.
+ 1 means the end of the first frame has been reached. */
+ bool eof;
+
+ /* Lock to protect the decompression context */
+ PyMutex lock;
+} ZstdDecompressor;
+
+#define ZstdDecompressor_CAST(op) ((ZstdDecompressor *)op)
+
+#include "clinic/decompressor.c.h"
+
+static inline ZSTD_DDict *
+_get_DDict(ZstdDict *self)
+{
+ assert(PyMutex_IsLocked(&self->lock));
+ ZSTD_DDict *ret;
+
+ if (self->d_dict == NULL) {
+ /* Create ZSTD_DDict instance from dictionary content */
+ Py_BEGIN_ALLOW_THREADS
+ ret = ZSTD_createDDict(self->dict_buffer, self->dict_len);
+ Py_END_ALLOW_THREADS
+ self->d_dict = ret;
+
+ if (self->d_dict == NULL) {
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state != NULL) {
+ PyErr_SetString(mod_state->ZstdError,
+ "Failed to create a ZSTD_DDict instance from "
+ "Zstandard dictionary content.");
+ }
+ }
+ }
+
+ return self->d_dict;
+}
+
+static int
+_zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options)
+{
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state == NULL) {
+ return -1;
+ }
+
+ if (!PyDict_Check(options)) {
+ PyErr_Format(PyExc_TypeError,
+ "ZstdDecompressor() argument 'options' must be dict, not %T",
+ options);
+ return -1;
+ }
+
+ Py_ssize_t pos = 0;
+ PyObject *key, *value;
+ while (PyDict_Next(options, &pos, &key, &value)) {
+ /* Check key type */
+ if (Py_TYPE(key) == mod_state->CParameter_type) {
+ PyErr_SetString(PyExc_TypeError,
+ "compression options dictionary key must not be a "
+ "CompressionParameter attribute");
+ return -1;
+ }
+
+ Py_INCREF(key);
+ Py_INCREF(value);
+ int key_v = PyLong_AsInt(key);
+ Py_DECREF(key);
+ if (key_v == -1 && PyErr_Occurred()) {
+ return -1;
+ }
+
+ int value_v = PyLong_AsInt(value);
+ Py_DECREF(value);
+ if (value_v == -1 && PyErr_Occurred()) {
+ return -1;
+ }
+
+ /* Set parameter to compression context */
+ size_t zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v);
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ set_parameter_error(0, key_v, value_v);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int
+_zstd_load_impl(ZstdDecompressor *self, ZstdDict *zd,
+ _zstd_state *mod_state, int type)
+{
+ size_t zstd_ret;
+ if (type == DICT_TYPE_DIGESTED) {
+ /* Get ZSTD_DDict */
+ ZSTD_DDict *d_dict = _get_DDict(zd);
+ if (d_dict == NULL) {
+ return -1;
+ }
+ /* Reference a prepared dictionary */
+ zstd_ret = ZSTD_DCtx_refDDict(self->dctx, d_dict);
+ }
+ else if (type == DICT_TYPE_UNDIGESTED) {
+ /* Load a dictionary */
+ zstd_ret = ZSTD_DCtx_loadDictionary(self->dctx, zd->dict_buffer,
+ zd->dict_len);
+ }
+ else if (type == DICT_TYPE_PREFIX) {
+ /* Load a prefix */
+ zstd_ret = ZSTD_DCtx_refPrefix(self->dctx, zd->dict_buffer,
+ zd->dict_len);
+ }
+ else {
+ /* Impossible code path */
+ PyErr_SetString(PyExc_SystemError,
+ "load_d_dict() impossible code path");
+ return -1;
+ }
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ set_zstd_error(mod_state, ERR_LOAD_D_DICT, zstd_ret);
+ return -1;
+ }
+ return 0;
+}
+
+/* Load dictionary or prefix to decompression context */
+static int
+_zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict)
+{
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
+ /* When decompressing, use digested dictionary by default. */
+ int type = DICT_TYPE_DIGESTED;
+ ZstdDict *zd = _Py_parse_zstd_dict(mod_state, dict, &type);
+ if (zd == NULL) {
+ return -1;
+ }
+ int ret;
+ PyMutex_Lock(&zd->lock);
+ ret = _zstd_load_impl(self, zd, mod_state, type);
+ PyMutex_Unlock(&zd->lock);
+ return ret;
+}
+
+/*
+ Decompress implementation in pseudo code:
+
+ initialize_output_buffer
+ while True:
+ decompress_data
+ set_object_flag # .eof
+
+ if output_buffer_exhausted:
+ if output_buffer_reached_max_length:
+ finish
+ grow_output_buffer
+ elif input_buffer_exhausted:
+ finish
+
+ ZSTD_decompressStream()'s size_t return value:
+ - 0 when a frame is completely decoded and fully flushed,
+ zstd's internal buffer has no data.
+ - An error code, which can be tested using ZSTD_isError().
+ - Or any other value > 0, which means there is still some decoding or
+ flushing to do to complete current frame.
+
+ Note, decompressing "an empty input" in any case will make it > 0.
+*/
+static PyObject *
+decompress_lock_held(ZstdDecompressor *self, ZSTD_inBuffer *in,
+ Py_ssize_t max_length)
+{
+ size_t zstd_ret;
+ ZSTD_outBuffer out;
+ _BlocksOutputBuffer buffer = {.list = NULL};
+ PyObject *ret;
+
+ /* Initialize the output buffer */
+ if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) {
+ goto error;
+ }
+ assert(out.pos == 0);
+
+ while (1) {
+ /* Decompress */
+ Py_BEGIN_ALLOW_THREADS
+ zstd_ret = ZSTD_decompressStream(self->dctx, &out, in);
+ Py_END_ALLOW_THREADS
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
+ set_zstd_error(mod_state, ERR_DECOMPRESS, zstd_ret);
+ goto error;
+ }
+
+ /* Set .eof flag */
+ if (zstd_ret == 0) {
+ /* Stop when a frame is decompressed */
+ self->eof = 1;
+ break;
+ }
+
+ /* Need to check out before in. Maybe zstd's internal buffer still has
+ a few bytes that can be output, grow the buffer and continue. */
+ if (out.pos == out.size) {
+ /* Output buffer exhausted */
+
+ /* Output buffer reached max_length */
+ if (_OutputBuffer_ReachedMaxLength(&buffer, &out)) {
+ break;
+ }
+
+ /* Grow output buffer */
+ if (_OutputBuffer_Grow(&buffer, &out) < 0) {
+ goto error;
+ }
+ assert(out.pos == 0);
+
+ }
+ else if (in->pos == in->size) {
+ /* Finished */
+ break;
+ }
+ }
+
+ /* Return a bytes object */
+ ret = _OutputBuffer_Finish(&buffer, &out);
+ if (ret != NULL) {
+ return ret;
+ }
+
+error:
+ _OutputBuffer_OnError(&buffer);
+ return NULL;
+}
+
+static void
+decompressor_reset_session_lock_held(ZstdDecompressor *self)
+{
+ assert(PyMutex_IsLocked(&self->lock));
+
+ /* Reset variables */
+ self->in_begin = 0;
+ self->in_end = 0;
+
+ Py_CLEAR(self->unused_data);
+
+ /* Reset variables in one operation */
+ self->needs_input = 1;
+ self->eof = 0;
+
+ /* Resetting session is guaranteed to never fail */
+ ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only);
+}
+
+static PyObject *
+stream_decompress_lock_held(ZstdDecompressor *self, Py_buffer *data,
+ Py_ssize_t max_length)
+{
+ assert(PyMutex_IsLocked(&self->lock));
+ ZSTD_inBuffer in;
+ PyObject *ret = NULL;
+ int use_input_buffer;
+
+ /* Check .eof flag */
+ if (self->eof) {
+ PyErr_SetString(PyExc_EOFError,
+ "Already at the end of a Zstandard frame.");
+ assert(ret == NULL);
+ return NULL;
+ }
+
+ /* Prepare input buffer w/wo unconsumed data */
+ if (self->in_begin == self->in_end) {
+ /* No unconsumed data */
+ use_input_buffer = 0;
+
+ in.src = data->buf;
+ in.size = data->len;
+ in.pos = 0;
+ }
+ else if (data->len == 0) {
+ /* Has unconsumed data, fast path for b'' */
+ assert(self->in_begin < self->in_end);
+
+ use_input_buffer = 1;
+
+ in.src = self->input_buffer + self->in_begin;
+ in.size = self->in_end - self->in_begin;
+ in.pos = 0;
+ }
+ else {
+ /* Has unconsumed data */
+ use_input_buffer = 1;
+
+ /* Unconsumed data size in input_buffer */
+ size_t used_now = self->in_end - self->in_begin;
+ assert(self->in_end > self->in_begin);
+
+ /* Number of bytes we can append to input buffer */
+ size_t avail_now = self->input_buffer_size - self->in_end;
+ assert(self->input_buffer_size >= self->in_end);
+
+ /* Number of bytes we can append if we move existing contents to
+ beginning of buffer */
+ size_t avail_total = self->input_buffer_size - used_now;
+ assert(self->input_buffer_size >= used_now);
+
+ if (avail_total < (size_t) data->len) {
+ char *tmp;
+ size_t new_size = used_now + data->len;
+
+ /* Allocate with new size */
+ tmp = PyMem_Malloc(new_size);
+ if (tmp == NULL) {
+ PyErr_NoMemory();
+ goto error;
+ }
+
+ /* Copy unconsumed data to the beginning of new buffer */
+ memcpy(tmp,
+ self->input_buffer + self->in_begin,
+ used_now);
+
+ /* Switch to new buffer */
+ PyMem_Free(self->input_buffer);
+ self->input_buffer = tmp;
+ self->input_buffer_size = new_size;
+
+ /* Set begin & end position */
+ self->in_begin = 0;
+ self->in_end = used_now;
+ }
+ else if (avail_now < (size_t) data->len) {
+ /* Move unconsumed data to the beginning.
+ Overlap is possible, so use memmove(). */
+ memmove(self->input_buffer,
+ self->input_buffer + self->in_begin,
+ used_now);
+
+ /* Set begin & end position */
+ self->in_begin = 0;
+ self->in_end = used_now;
+ }
+
+ /* Copy data to input buffer */
+ memcpy(self->input_buffer + self->in_end, data->buf, data->len);
+ self->in_end += data->len;
+
+ in.src = self->input_buffer + self->in_begin;
+ in.size = used_now + data->len;
+ in.pos = 0;
+ }
+ assert(in.pos == 0);
+
+ /* Decompress */
+ ret = decompress_lock_held(self, &in, max_length);
+ if (ret == NULL) {
+ goto error;
+ }
+
+ /* Unconsumed input data */
+ if (in.pos == in.size) {
+ if (Py_SIZE(ret) == max_length || self->eof) {
+ self->needs_input = 0;
+ }
+ else {
+ self->needs_input = 1;
+ }
+
+ if (use_input_buffer) {
+ /* Clear input_buffer */
+ self->in_begin = 0;
+ self->in_end = 0;
+ }
+ }
+ else {
+ size_t data_size = in.size - in.pos;
+
+ self->needs_input = 0;
+
+ if (!use_input_buffer) {
+ /* Discard buffer if it's too small
+ (resizing it may needlessly copy the current contents) */
+ if (self->input_buffer != NULL
+ && self->input_buffer_size < data_size)
+ {
+ PyMem_Free(self->input_buffer);
+ self->input_buffer = NULL;
+ self->input_buffer_size = 0;
+ }
+
+ /* Allocate if necessary */
+ if (self->input_buffer == NULL) {
+ self->input_buffer = PyMem_Malloc(data_size);
+ if (self->input_buffer == NULL) {
+ PyErr_NoMemory();
+ goto error;
+ }
+ self->input_buffer_size = data_size;
+ }
+
+ /* Copy unconsumed data */
+ memcpy(self->input_buffer, (char*)in.src + in.pos, data_size);
+ self->in_begin = 0;
+ self->in_end = data_size;
+ }
+ else {
+ /* Use input buffer */
+ self->in_begin += in.pos;
+ }
+ }
+
+ return ret;
+
+error:
+ /* Reset decompressor's states/session */
+ decompressor_reset_session_lock_held(self);
+
+ Py_CLEAR(ret);
+ return NULL;
+}
+
+
+/*[clinic input]
+@classmethod
+_zstd.ZstdDecompressor.__new__ as _zstd_ZstdDecompressor_new
+ zstd_dict: object = None
+ A ZstdDict object, a pre-trained Zstandard dictionary.
+ options: object = None
+ A dict object that contains advanced decompression parameters.
+
+Create a decompressor object for decompressing data incrementally.
+
+Thread-safe at method level. For one-shot decompression, use the decompress()
+function instead.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict,
+ PyObject *options)
+/*[clinic end generated code: output=590ca65c1102ff4a input=213daa57e3ea4062]*/
+{
+ ZstdDecompressor* self = PyObject_GC_New(ZstdDecompressor, type);
+ if (self == NULL) {
+ goto error;
+ }
+
+ self->input_buffer = NULL;
+ self->input_buffer_size = 0;
+ self->in_begin = -1;
+ self->in_end = -1;
+ self->unused_data = NULL;
+ self->eof = 0;
+ self->dict = NULL;
+ self->lock = (PyMutex){0};
+
+ /* needs_input flag */
+ self->needs_input = 1;
+
+ /* Decompression context */
+ self->dctx = ZSTD_createDCtx();
+ if (self->dctx == NULL) {
+ _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state != NULL) {
+ PyErr_SetString(mod_state->ZstdError,
+ "Unable to create ZSTD_DCtx instance.");
+ }
+ goto error;
+ }
+
+ /* Load Zstandard dictionary to decompression context */
+ if (zstd_dict != Py_None) {
+ if (_zstd_load_d_dict(self, zstd_dict) < 0) {
+ goto error;
+ }
+ Py_INCREF(zstd_dict);
+ self->dict = zstd_dict;
+ }
+
+ /* Set options dictionary */
+ if (options != Py_None) {
+ if (_zstd_set_d_parameters(self, options) < 0) {
+ goto error;
+ }
+ }
+
+ // We can only start GC tracking once self->dict is set.
+ PyObject_GC_Track(self);
+
+ return (PyObject*)self;
+
+error:
+ Py_XDECREF(self);
+ return NULL;
+}
+
+static void
+ZstdDecompressor_dealloc(PyObject *ob)
+{
+ ZstdDecompressor *self = ZstdDecompressor_CAST(ob);
+
+ PyObject_GC_UnTrack(self);
+
+ /* Free decompression context */
+ if (self->dctx) {
+ ZSTD_freeDCtx(self->dctx);
+ }
+
+ assert(!PyMutex_IsLocked(&self->lock));
+
+ /* Py_CLEAR the dict after free decompression context */
+ Py_CLEAR(self->dict);
+
+ /* Free unconsumed input data buffer */
+ PyMem_Free(self->input_buffer);
+
+ /* Free unused data */
+ Py_CLEAR(self->unused_data);
+
+ PyTypeObject *tp = Py_TYPE(self);
+ PyObject_GC_Del(ob);
+ Py_DECREF(tp);
+}
+
+/*[clinic input]
+@getter
+_zstd.ZstdDecompressor.unused_data
+
+A bytes object of un-consumed input data.
+
+When ZstdDecompressor object stops after a frame is
+decompressed, unused input data after the frame. Otherwise this will be b''.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self)
+/*[clinic end generated code: output=f3a20940f11b6b09 input=54d41ecd681a3444]*/
+{
+ PyObject *ret;
+
+ PyMutex_Lock(&self->lock);
+
+ if (!self->eof) {
+ PyMutex_Unlock(&self->lock);
+ return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
+ }
+ else {
+ if (self->unused_data == NULL) {
+ self->unused_data = PyBytes_FromStringAndSize(
+ self->input_buffer + self->in_begin,
+ self->in_end - self->in_begin);
+ ret = self->unused_data;
+ Py_XINCREF(ret);
+ }
+ else {
+ ret = self->unused_data;
+ Py_INCREF(ret);
+ }
+ }
+
+ PyMutex_Unlock(&self->lock);
+ return ret;
+}
+
+/*[clinic input]
+_zstd.ZstdDecompressor.decompress
+
+ data: Py_buffer
+ A bytes-like object, Zstandard data to be decompressed.
+ max_length: Py_ssize_t = -1
+ Maximum size of returned data. When it is negative, the size of
+ output buffer is unlimited. When it is nonnegative, returns at
+ most max_length bytes of decompressed data.
+
+Decompress *data*, returning uncompressed bytes if possible, or b'' otherwise.
+
+If *max_length* is nonnegative, returns at most *max_length* bytes of
+decompressed data. If this limit is reached and further output can be
+produced, *self.needs_input* will be set to ``False``. In this case, the next
+call to *decompress()* may provide *data* as b'' to obtain more of the output.
+
+If all of the input data was decompressed and returned (either because this
+was less than *max_length* bytes, or because *max_length* was negative),
+*self.needs_input* will be set to True.
+
+Attempting to decompress data after the end of a frame is reached raises an
+EOFError. Any data found after the end of the frame is ignored and saved in
+the self.unused_data attribute.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self,
+ Py_buffer *data,
+ Py_ssize_t max_length)
+/*[clinic end generated code: output=a4302b3c940dbec6 input=6463dfdf98091caa]*/
+{
+ PyObject *ret;
+ /* Thread-safe code */
+ PyMutex_Lock(&self->lock);
+ ret = stream_decompress_lock_held(self, data, max_length);
+ PyMutex_Unlock(&self->lock);
+ return ret;
+}
+
+static PyMethodDef ZstdDecompressor_methods[] = {
+ _ZSTD_ZSTDDECOMPRESSOR_DECOMPRESS_METHODDEF
+ {NULL, NULL}
+};
+
+PyDoc_STRVAR(ZstdDecompressor_eof_doc,
+"True means the end of the first frame has been reached. If decompress data\n"
+"after that, an EOFError exception will be raised.");
+
+PyDoc_STRVAR(ZstdDecompressor_needs_input_doc,
+"If the max_length output limit in .decompress() method has been reached,\n"
+"and the decompressor has (or may has) unconsumed input data, it will be set\n"
+"to False. In this case, passing b'' to the .decompress() method may output\n"
+"further data.");
+
+static PyMemberDef ZstdDecompressor_members[] = {
+ {"eof", Py_T_BOOL, offsetof(ZstdDecompressor, eof),
+ Py_READONLY, ZstdDecompressor_eof_doc},
+ {"needs_input", Py_T_BOOL, offsetof(ZstdDecompressor, needs_input),
+ Py_READONLY, ZstdDecompressor_needs_input_doc},
+ {NULL}
+};
+
+static PyGetSetDef ZstdDecompressor_getset[] = {
+ _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF
+ {NULL}
+};
+
+static int
+ZstdDecompressor_traverse(PyObject *ob, visitproc visit, void *arg)
+{
+ ZstdDecompressor *self = ZstdDecompressor_CAST(ob);
+ Py_VISIT(self->dict);
+ return 0;
+}
+
+static int
+ZstdDecompressor_clear(PyObject *ob)
+{
+ ZstdDecompressor *self = ZstdDecompressor_CAST(ob);
+ Py_CLEAR(self->dict);
+ Py_CLEAR(self->unused_data);
+ return 0;
+}
+
+static PyType_Slot ZstdDecompressor_slots[] = {
+ {Py_tp_new, _zstd_ZstdDecompressor_new},
+ {Py_tp_dealloc, ZstdDecompressor_dealloc},
+ {Py_tp_methods, ZstdDecompressor_methods},
+ {Py_tp_members, ZstdDecompressor_members},
+ {Py_tp_getset, ZstdDecompressor_getset},
+ {Py_tp_doc, (void *)_zstd_ZstdDecompressor_new__doc__},
+ {Py_tp_traverse, ZstdDecompressor_traverse},
+ {Py_tp_clear, ZstdDecompressor_clear},
+ {0, 0}
+};
+
+PyType_Spec zstd_decompressor_type_spec = {
+ .name = "compression.zstd.ZstdDecompressor",
+ .basicsize = sizeof(ZstdDecompressor),
+ .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE
+ | Py_TPFLAGS_HAVE_GC,
+ .slots = ZstdDecompressor_slots,
+};
diff --git a/Modules/_zstd/zstddict.c b/Modules/_zstd/zstddict.c
new file mode 100644
index 00000000000..14f74aaed46
--- /dev/null
+++ b/Modules/_zstd/zstddict.c
@@ -0,0 +1,273 @@
+/* Low level interface to the Zstandard algorthm & the zstd library. */
+
+/* ZstdDict class definitions */
+
+/*[clinic input]
+module _zstd
+class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dcc175ec974f81c]*/
+
+#ifndef Py_BUILD_CORE_BUILTIN
+# define Py_BUILD_CORE_MODULE 1
+#endif
+
+#include "Python.h"
+
+#include "_zstdmodule.h"
+#include "clinic/zstddict.c.h"
+#include "internal/pycore_lock.h" // PyMutex_IsLocked
+
+#include <zstd.h> // ZSTD_freeDDict(), ZSTD_getDictID_fromDict()
+
+#define ZstdDict_CAST(op) ((ZstdDict *)op)
+
+/*[clinic input]
+@classmethod
+_zstd.ZstdDict.__new__ as _zstd_ZstdDict_new
+ dict_content: Py_buffer
+ The content of a Zstandard dictionary as a bytes-like object.
+ /
+ *
+ is_raw: bool = False
+ If true, perform no checks on *dict_content*, useful for some
+ advanced cases. Otherwise, check that the content represents
+ a Zstandard dictionary created by the zstd library or CLI.
+
+Represents a Zstandard dictionary.
+
+The dictionary can be used for compression or decompression, and can be shared
+by multiple ZstdCompressor or ZstdDecompressor objects.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content,
+ int is_raw)
+/*[clinic end generated code: output=685b7406a48b0949 input=9e8c493e31c98383]*/
+{
+ /* All dictionaries must be at least 8 bytes */
+ if (dict_content->len < 8) {
+ PyErr_SetString(PyExc_ValueError,
+ "Zstandard dictionary content too short "
+ "(must have at least eight bytes)");
+ return NULL;
+ }
+
+ ZstdDict* self = PyObject_GC_New(ZstdDict, type);
+ if (self == NULL) {
+ return NULL;
+ }
+
+ self->d_dict = NULL;
+ self->dict_buffer = NULL;
+ self->dict_id = 0;
+ self->lock = (PyMutex){0};
+
+ /* ZSTD_CDict dict */
+ self->c_dicts = PyDict_New();
+ if (self->c_dicts == NULL) {
+ goto error;
+ }
+
+ self->dict_buffer = PyMem_Malloc(dict_content->len);
+ if (!self->dict_buffer) {
+ PyErr_NoMemory();
+ goto error;
+ }
+ memcpy(self->dict_buffer, dict_content->buf, dict_content->len);
+ self->dict_len = dict_content->len;
+
+ /* Get dict_id, 0 means "raw content" dictionary. */
+ self->dict_id = ZSTD_getDictID_fromDict(self->dict_buffer, self->dict_len);
+
+ /* Check validity for ordinary dictionary */
+ if (!is_raw && self->dict_id == 0) {
+ PyErr_SetString(PyExc_ValueError, "invalid Zstandard dictionary");
+ goto error;
+ }
+
+ PyObject_GC_Track(self);
+
+ return (PyObject *)self;
+
+error:
+ Py_XDECREF(self);
+ return NULL;
+}
+
+static void
+ZstdDict_dealloc(PyObject *ob)
+{
+ ZstdDict *self = ZstdDict_CAST(ob);
+
+ PyObject_GC_UnTrack(self);
+
+ /* Free ZSTD_DDict instance */
+ if (self->d_dict) {
+ ZSTD_freeDDict(self->d_dict);
+ }
+
+ assert(!PyMutex_IsLocked(&self->lock));
+
+ /* Release dict_buffer after freeing ZSTD_CDict/ZSTD_DDict instances */
+ PyMem_Free(self->dict_buffer);
+ Py_CLEAR(self->c_dicts);
+
+ PyTypeObject *tp = Py_TYPE(self);
+ tp->tp_free(self);
+ Py_DECREF(tp);
+}
+
+PyDoc_STRVAR(ZstdDict_dictid_doc,
+"the Zstandard dictionary, an int between 0 and 2**32.\n\n"
+"A non-zero value represents an ordinary Zstandard dictionary, "
+"conforming to the standardised format.\n\n"
+"The special value '0' means a 'raw content' dictionary,"
+"without any restrictions on format or content.");
+
+static PyObject *
+ZstdDict_repr(PyObject *ob)
+{
+ ZstdDict *dict = ZstdDict_CAST(ob);
+ return PyUnicode_FromFormat("<ZstdDict dict_id=%u dict_size=%zd>",
+ (unsigned int)dict->dict_id, dict->dict_len);
+}
+
+static PyMemberDef ZstdDict_members[] = {
+ {"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, ZstdDict_dictid_doc},
+ {NULL}
+};
+
+/*[clinic input]
+@getter
+_zstd.ZstdDict.dict_content
+
+The content of a Zstandard dictionary, as a bytes object.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self)
+/*[clinic end generated code: output=0d05caa5b550eabb input=4ed526d1c151c596]*/
+{
+ return PyBytes_FromStringAndSize(self->dict_buffer, self->dict_len);
+}
+
+/*[clinic input]
+@getter
+_zstd.ZstdDict.as_digested_dict
+
+Load as a digested dictionary to compressor.
+
+Pass this attribute as zstd_dict argument:
+compress(dat, zstd_dict=zd.as_digested_dict)
+
+1. Some advanced compression parameters of compressor may be overridden
+ by parameters of digested dictionary.
+2. ZstdDict has a digested dictionaries cache for each compression level.
+ It's faster when loading again a digested dictionary with the same
+ compression level.
+3. No need to use this for decompression.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self)
+/*[clinic end generated code: output=09b086e7a7320dbb input=ee45e1b4a48f6f2c]*/
+{
+ return Py_BuildValue("Oi", self, DICT_TYPE_DIGESTED);
+}
+
+/*[clinic input]
+@getter
+_zstd.ZstdDict.as_undigested_dict
+
+Load as an undigested dictionary to compressor.
+
+Pass this attribute as zstd_dict argument:
+compress(dat, zstd_dict=zd.as_undigested_dict)
+
+1. The advanced compression parameters of compressor will not be overridden.
+2. Loading an undigested dictionary is costly. If load an undigested dictionary
+ multiple times, consider reusing a compressor object.
+3. No need to use this for decompression.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self)
+/*[clinic end generated code: output=43c7a989e6d4253a input=d39210eedec76fed]*/
+{
+ return Py_BuildValue("Oi", self, DICT_TYPE_UNDIGESTED);
+}
+
+/*[clinic input]
+@getter
+_zstd.ZstdDict.as_prefix
+
+Load as a prefix to compressor/decompressor.
+
+Pass this attribute as zstd_dict argument:
+compress(dat, zstd_dict=zd.as_prefix)
+
+1. Prefix is compatible with long distance matching, while dictionary is not.
+2. It only works for the first frame, then the compressor/decompressor will
+ return to no prefix state.
+3. When decompressing, must use the same prefix as when compressing."
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self)
+/*[clinic end generated code: output=6f7130c356595a16 input=d59757b0b5a9551a]*/
+{
+ return Py_BuildValue("Oi", self, DICT_TYPE_PREFIX);
+}
+
+static PyGetSetDef ZstdDict_getset[] = {
+ _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF
+ _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF
+ _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF
+ _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF
+ {NULL}
+};
+
+static Py_ssize_t
+ZstdDict_length(PyObject *ob)
+{
+ ZstdDict *self = ZstdDict_CAST(ob);
+ return self->dict_len;
+}
+
+static int
+ZstdDict_traverse(PyObject *ob, visitproc visit, void *arg)
+{
+ ZstdDict *self = ZstdDict_CAST(ob);
+ Py_VISIT(self->c_dicts);
+ return 0;
+}
+
+static int
+ZstdDict_clear(PyObject *ob)
+{
+ ZstdDict *self = ZstdDict_CAST(ob);
+ Py_CLEAR(self->c_dicts);
+ return 0;
+}
+
+static PyType_Slot zstddict_slots[] = {
+ {Py_tp_members, ZstdDict_members},
+ {Py_tp_getset, ZstdDict_getset},
+ {Py_tp_new, _zstd_ZstdDict_new},
+ {Py_tp_dealloc, ZstdDict_dealloc},
+ {Py_tp_repr, ZstdDict_repr},
+ {Py_tp_doc, (void *)_zstd_ZstdDict_new__doc__},
+ {Py_sq_length, ZstdDict_length},
+ {Py_tp_traverse, ZstdDict_traverse},
+ {Py_tp_clear, ZstdDict_clear},
+ {0, 0}
+};
+
+PyType_Spec zstd_dict_type_spec = {
+ .name = "compression.zstd.ZstdDict",
+ .basicsize = sizeof(ZstdDict),
+ .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE
+ | Py_TPFLAGS_HAVE_GC,
+ .slots = zstddict_slots,
+};
diff --git a/Modules/_zstd/zstddict.h b/Modules/_zstd/zstddict.h
new file mode 100644
index 00000000000..4a403416dbd
--- /dev/null
+++ b/Modules/_zstd/zstddict.h
@@ -0,0 +1,29 @@
+/* Low level interface to the Zstandard algorthm & the zstd library. */
+
+#ifndef ZSTD_DICT_H
+#define ZSTD_DICT_H
+
+#include <zstd.h> // ZSTD_DDict
+
+typedef struct {
+ PyObject_HEAD
+
+ /* Reusable compress/decompress dictionary, they are created once and
+ can be shared by multiple threads concurrently, since its usage is
+ read-only.
+ c_dicts is a dict, int(compressionLevel):PyCapsule(ZSTD_CDict*) */
+ ZSTD_DDict *d_dict;
+ PyObject *c_dicts;
+
+ /* Dictionary content. */
+ char *dict_buffer;
+ Py_ssize_t dict_len;
+
+ /* Dictionary id */
+ uint32_t dict_id;
+
+ /* Lock to protect the digested dictionaries */
+ PyMutex lock;
+} ZstdDict;
+
+#endif // !ZSTD_DICT_H