aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Modules/_zstd/_zstdmodule.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_zstd/_zstdmodule.c')
-rw-r--r--Modules/_zstd/_zstdmodule.c855
1 files changed, 354 insertions, 501 deletions
diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c
index 18dc13b3fd1..d75c0779474 100644
--- a/Modules/_zstd/_zstdmodule.c
+++ b/Modules/_zstd/_zstdmodule.c
@@ -1,14 +1,16 @@
-/*
-Low level interface to Meta's zstd library for use in the compression.zstd
-Python module.
-*/
+/* Low level interface to the Zstandard algorthm & the zstd library. */
#ifndef Py_BUILD_CORE_BUILTIN
# define Py_BUILD_CORE_MODULE 1
#endif
+#include "Python.h"
+
#include "_zstdmodule.h"
+#include <zstd.h> // ZSTD_*()
+#include <zdict.h> // ZDICT_*()
+
/*[clinic input]
module _zstd
@@ -17,52 +19,91 @@ module _zstd
#include "clinic/_zstdmodule.c.h"
+ZstdDict *
+_Py_parse_zstd_dict(const _zstd_state *state, PyObject *dict, int *ptype)
+{
+ if (state == NULL) {
+ return NULL;
+ }
+
+ /* Check ZstdDict */
+ if (PyObject_TypeCheck(dict, state->ZstdDict_type)) {
+ return (ZstdDict*)dict;
+ }
+
+ /* Check (ZstdDict, type) */
+ if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2
+ && PyObject_TypeCheck(PyTuple_GET_ITEM(dict, 0), state->ZstdDict_type)
+ && PyLong_Check(PyTuple_GET_ITEM(dict, 1)))
+ {
+ int type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1));
+ if (type == -1 && PyErr_Occurred()) {
+ return NULL;
+ }
+ if (type == DICT_TYPE_DIGESTED
+ || type == DICT_TYPE_UNDIGESTED
+ || type == DICT_TYPE_PREFIX)
+ {
+ *ptype = type;
+ return (ZstdDict*)PyTuple_GET_ITEM(dict, 0);
+ }
+ }
+
+ /* Wrong type */
+ PyErr_SetString(PyExc_TypeError,
+ "zstd_dict argument should be a ZstdDict object.");
+ return NULL;
+}
+
/* Format error message and set ZstdError. */
void
-set_zstd_error(const _zstd_state* const state,
- error_type type, size_t zstd_ret)
+set_zstd_error(const _zstd_state *state, error_type type, size_t zstd_ret)
{
- char *msg;
+ const char *msg;
assert(ZSTD_isError(zstd_ret));
- switch (type)
- {
- case ERR_DECOMPRESS:
- msg = "Unable to decompress zstd data: %s";
- break;
- case ERR_COMPRESS:
- msg = "Unable to compress zstd data: %s";
- break;
- case ERR_SET_PLEDGED_INPUT_SIZE:
- msg = "Unable to set pledged uncompressed content size: %s";
- break;
-
- case ERR_LOAD_D_DICT:
- msg = "Unable to load zstd dictionary or prefix for decompression: %s";
- break;
- case ERR_LOAD_C_DICT:
- msg = "Unable to load zstd dictionary or prefix for compression: %s";
- break;
-
- case ERR_GET_C_BOUNDS:
- msg = "Unable to get zstd compression parameter bounds: %s";
- break;
- case ERR_GET_D_BOUNDS:
- msg = "Unable to get zstd decompression parameter bounds: %s";
- break;
- case ERR_SET_C_LEVEL:
- msg = "Unable to set zstd compression level: %s";
- break;
-
- case ERR_TRAIN_DICT:
- msg = "Unable to train zstd dictionary: %s";
- break;
- case ERR_FINALIZE_DICT:
- msg = "Unable to finalize zstd dictionary: %s";
- break;
-
- default:
- Py_UNREACHABLE();
+ if (state == NULL) {
+ return;
+ }
+ switch (type) {
+ case ERR_DECOMPRESS:
+ msg = "Unable to decompress Zstandard data: %s";
+ break;
+ case ERR_COMPRESS:
+ msg = "Unable to compress Zstandard data: %s";
+ break;
+ case ERR_SET_PLEDGED_INPUT_SIZE:
+ msg = "Unable to set pledged uncompressed content size: %s";
+ break;
+
+ case ERR_LOAD_D_DICT:
+ msg = "Unable to load Zstandard dictionary or prefix for "
+ "decompression: %s";
+ break;
+ case ERR_LOAD_C_DICT:
+ msg = "Unable to load Zstandard dictionary or prefix for "
+ "compression: %s";
+ break;
+
+ case ERR_GET_C_BOUNDS:
+ msg = "Unable to get zstd compression parameter bounds: %s";
+ break;
+ case ERR_GET_D_BOUNDS:
+ msg = "Unable to get zstd decompression parameter bounds: %s";
+ break;
+ case ERR_SET_C_LEVEL:
+ msg = "Unable to set zstd compression level: %s";
+ break;
+
+ case ERR_TRAIN_DICT:
+ msg = "Unable to train the Zstandard dictionary: %s";
+ break;
+ case ERR_FINALIZE_DICT:
+ msg = "Unable to finalize the Zstandard dictionary: %s";
+ break;
+
+ default:
+ Py_UNREACHABLE();
}
PyErr_Format(state->ZstdError, msg, ZSTD_getErrorName(zstd_ret));
}
@@ -72,48 +113,43 @@ typedef struct {
char parameter_name[32];
} ParameterInfo;
-static const ParameterInfo cp_list[] =
-{
- {ZSTD_c_compressionLevel, "compressionLevel"},
- {ZSTD_c_windowLog, "windowLog"},
- {ZSTD_c_hashLog, "hashLog"},
- {ZSTD_c_chainLog, "chainLog"},
- {ZSTD_c_searchLog, "searchLog"},
- {ZSTD_c_minMatch, "minMatch"},
- {ZSTD_c_targetLength, "targetLength"},
+static const ParameterInfo cp_list[] = {
+ {ZSTD_c_compressionLevel, "compression_level"},
+ {ZSTD_c_windowLog, "window_log"},
+ {ZSTD_c_hashLog, "hash_log"},
+ {ZSTD_c_chainLog, "chain_log"},
+ {ZSTD_c_searchLog, "search_log"},
+ {ZSTD_c_minMatch, "min_match"},
+ {ZSTD_c_targetLength, "target_length"},
{ZSTD_c_strategy, "strategy"},
- {ZSTD_c_enableLongDistanceMatching, "enableLongDistanceMatching"},
- {ZSTD_c_ldmHashLog, "ldmHashLog"},
- {ZSTD_c_ldmMinMatch, "ldmMinMatch"},
- {ZSTD_c_ldmBucketSizeLog, "ldmBucketSizeLog"},
- {ZSTD_c_ldmHashRateLog, "ldmHashRateLog"},
+ {ZSTD_c_enableLongDistanceMatching, "enable_long_distance_matching"},
+ {ZSTD_c_ldmHashLog, "ldm_hash_log"},
+ {ZSTD_c_ldmMinMatch, "ldm_min_match"},
+ {ZSTD_c_ldmBucketSizeLog, "ldm_bucket_size_log"},
+ {ZSTD_c_ldmHashRateLog, "ldm_hash_rate_log"},
- {ZSTD_c_contentSizeFlag, "contentSizeFlag"},
- {ZSTD_c_checksumFlag, "checksumFlag"},
- {ZSTD_c_dictIDFlag, "dictIDFlag"},
+ {ZSTD_c_contentSizeFlag, "content_size_flag"},
+ {ZSTD_c_checksumFlag, "checksum_flag"},
+ {ZSTD_c_dictIDFlag, "dict_id_flag"},
- {ZSTD_c_nbWorkers, "nbWorkers"},
- {ZSTD_c_jobSize, "jobSize"},
- {ZSTD_c_overlapLog, "overlapLog"}
+ {ZSTD_c_nbWorkers, "nb_workers"},
+ {ZSTD_c_jobSize, "job_size"},
+ {ZSTD_c_overlapLog, "overlap_log"}
};
-static const ParameterInfo dp_list[] =
-{
- {ZSTD_d_windowLogMax, "windowLogMax"}
+static const ParameterInfo dp_list[] = {
+ {ZSTD_d_windowLogMax, "window_log_max"}
};
void
-set_parameter_error(const _zstd_state* const state, int is_compress,
- int key_v, int value_v)
+set_parameter_error(int is_compress, int key_v, int value_v)
{
ParameterInfo const *list;
int list_size;
- char const *name;
char *type;
ZSTD_bounds bounds;
- int i;
- char pos_msg[128];
+ char pos_msg[64];
if (is_compress) {
list = cp_list;
@@ -127,8 +163,8 @@ set_parameter_error(const _zstd_state* const state, int is_compress,
}
/* Find parameter's name */
- name = NULL;
- for (i = 0; i < list_size; i++) {
+ char const *name = NULL;
+ for (int i = 0; i < list_size; i++) {
if (key_v == (list+i)->parameter) {
name = (list+i)->parameter_name;
break;
@@ -150,20 +186,16 @@ set_parameter_error(const _zstd_state* const state, int is_compress,
bounds = ZSTD_dParam_getBounds(key_v);
}
if (ZSTD_isError(bounds.error)) {
- PyErr_Format(state->ZstdError,
- "Zstd %s parameter \"%s\" is invalid. (zstd v%s)",
- type, name, ZSTD_versionString());
+ PyErr_Format(PyExc_ValueError, "invalid %s parameter '%s'",
+ type, name);
return;
}
/* Error message */
- PyErr_Format(state->ZstdError,
- "Error when setting zstd %s parameter \"%s\", it "
- "should %d <= value <= %d, provided value is %d. "
- "(zstd v%s, %d-bit build)",
- type, name,
- bounds.lowerBound, bounds.upperBound, value_v,
- ZSTD_versionString(), 8*(int)sizeof(Py_ssize_t));
+ PyErr_Format(PyExc_ValueError,
+ "%s parameter '%s' received an illegal value %d; "
+ "the valid range is [%d, %d]",
+ type, name, value_v, bounds.lowerBound, bounds.upperBound);
}
static inline _zstd_state*
@@ -174,72 +206,90 @@ get_zstd_state(PyObject *module)
return (_zstd_state *)state;
}
+static Py_ssize_t
+calculate_samples_stats(PyBytesObject *samples_bytes, PyObject *samples_sizes,
+ size_t **chunk_sizes)
+{
+ Py_ssize_t chunks_number;
+ Py_ssize_t sizes_sum;
+ Py_ssize_t i;
+
+ chunks_number = PyTuple_GET_SIZE(samples_sizes);
+ if ((size_t) chunks_number > UINT32_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "The number of samples should be <= %u.", UINT32_MAX);
+ return -1;
+ }
+
+ /* Prepare chunk_sizes */
+ *chunk_sizes = PyMem_New(size_t, chunks_number);
+ if (*chunk_sizes == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ sizes_sum = PyBytes_GET_SIZE(samples_bytes);
+ for (i = 0; i < chunks_number; i++) {
+ size_t size = PyLong_AsSize_t(PyTuple_GET_ITEM(samples_sizes, i));
+ (*chunk_sizes)[i] = size;
+ if (size == (size_t)-1 && PyErr_Occurred()) {
+ if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
+ goto sum_error;
+ }
+ return -1;
+ }
+ if ((size_t)sizes_sum < size) {
+ goto sum_error;
+ }
+ sizes_sum -= size;
+ }
+
+ if (sizes_sum != 0) {
+sum_error:
+ PyErr_SetString(PyExc_ValueError,
+ "The samples size tuple doesn't match the "
+ "concatenation's size.");
+ return -1;
+ }
+ return chunks_number;
+}
+
/*[clinic input]
-_zstd._train_dict
+_zstd.train_dict
samples_bytes: PyBytesObject
Concatenation of samples.
- samples_size_list: object(subclass_of='&PyList_Type')
- List of samples' sizes.
+ samples_sizes: object(subclass_of='&PyTuple_Type')
+ Tuple of samples' sizes.
dict_size: Py_ssize_t
The size of the dictionary.
/
-Internal function, train a zstd dictionary on sample data.
+Train a Zstandard dictionary on sample data.
[clinic start generated code]*/
static PyObject *
-_zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
- PyObject *samples_size_list, Py_ssize_t dict_size)
-/*[clinic end generated code: output=ee53c34c8f77886b input=b21d092c695a3a81]*/
+_zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
+ PyObject *samples_sizes, Py_ssize_t dict_size)
+/*[clinic end generated code: output=8e87fe43935e8f77 input=d20dedb21c72cb62]*/
{
- // TODO(emmatyping): The preamble and suffix to this function and _finalize_dict
- // are pretty similar. We should see if we can refactor them to share that code.
- Py_ssize_t chunks_number;
- size_t *chunk_sizes = NULL;
PyObject *dst_dict_bytes = NULL;
+ size_t *chunk_sizes = NULL;
+ Py_ssize_t chunks_number;
size_t zstd_ret;
- Py_ssize_t sizes_sum;
- Py_ssize_t i;
/* Check arguments */
if (dict_size <= 0) {
- PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number.");
- return NULL;
- }
-
- chunks_number = Py_SIZE(samples_size_list);
- if ((size_t) chunks_number > UINT32_MAX) {
- PyErr_Format(PyExc_ValueError,
- "The number of samples should be <= %u.", UINT32_MAX);
+ PyErr_SetString(PyExc_ValueError,
+ "dict_size argument should be positive number.");
return NULL;
}
- /* Prepare chunk_sizes */
- chunk_sizes = PyMem_New(size_t, chunks_number);
- if (chunk_sizes == NULL) {
- PyErr_NoMemory();
- goto error;
- }
-
- sizes_sum = 0;
- for (i = 0; i < chunks_number; i++) {
- PyObject *size = PyList_GetItemRef(samples_size_list, i);
- chunk_sizes[i] = PyLong_AsSize_t(size);
- Py_DECREF(size);
- if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) {
- PyErr_Format(PyExc_ValueError,
- "Items in samples_size_list should be an int "
- "object, with a value between 0 and %u.", SIZE_MAX);
- goto error;
- }
- sizes_sum += chunk_sizes[i];
- }
-
- if (sizes_sum != Py_SIZE(samples_bytes)) {
- PyErr_SetString(PyExc_ValueError,
- "The samples size list doesn't match the concatenation's size.");
+ /* Check that the samples are valid and get their sizes */
+ chunks_number = calculate_samples_stats(samples_bytes, samples_sizes,
+ &chunk_sizes);
+ if (chunks_number < 0) {
goto error;
}
@@ -251,16 +301,16 @@ _zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
/* Train the dictionary */
char *dst_dict_buffer = PyBytes_AS_STRING(dst_dict_bytes);
- char *samples_buffer = PyBytes_AS_STRING(samples_bytes);
+ const char *samples_buffer = PyBytes_AS_STRING(samples_bytes);
Py_BEGIN_ALLOW_THREADS
zstd_ret = ZDICT_trainFromBuffer(dst_dict_buffer, dict_size,
samples_buffer,
chunk_sizes, (uint32_t)chunks_number);
Py_END_ALLOW_THREADS
- /* Check zstd dict error */
+ /* Check Zstandard dict error */
if (ZDICT_isError(zstd_ret)) {
- _zstd_state* const mod_state = get_zstd_state(module);
+ _zstd_state* mod_state = get_zstd_state(module);
set_zstd_error(mod_state, ERR_TRAIN_DICT, zstd_ret);
goto error;
}
@@ -281,74 +331,47 @@ success:
}
/*[clinic input]
-_zstd._finalize_dict
+_zstd.finalize_dict
custom_dict_bytes: PyBytesObject
Custom dictionary content.
samples_bytes: PyBytesObject
Concatenation of samples.
- samples_size_list: object(subclass_of='&PyList_Type')
- List of samples' sizes.
+ samples_sizes: object(subclass_of='&PyTuple_Type')
+ Tuple of samples' sizes.
dict_size: Py_ssize_t
The size of the dictionary.
compression_level: int
- Optimize for a specific zstd compression level, 0 means default.
+ Optimize for a specific Zstandard compression level, 0 means default.
/
-Internal function, finalize a zstd dictionary.
+Finalize a Zstandard dictionary.
[clinic start generated code]*/
static PyObject *
-_zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
- PyBytesObject *samples_bytes,
- PyObject *samples_size_list, Py_ssize_t dict_size,
- int compression_level)
-/*[clinic end generated code: output=9c2a7d8c845cee93 input=08531a803d87c56f]*/
+_zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
+ PyBytesObject *samples_bytes,
+ PyObject *samples_sizes, Py_ssize_t dict_size,
+ int compression_level)
+/*[clinic end generated code: output=f91821ba5ae85bda input=3c7e2480aa08fb56]*/
{
Py_ssize_t chunks_number;
size_t *chunk_sizes = NULL;
PyObject *dst_dict_bytes = NULL;
size_t zstd_ret;
ZDICT_params_t params;
- Py_ssize_t sizes_sum;
- Py_ssize_t i;
/* Check arguments */
if (dict_size <= 0) {
- PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number.");
- return NULL;
- }
-
- chunks_number = Py_SIZE(samples_size_list);
- if ((size_t) chunks_number > UINT32_MAX) {
- PyErr_Format(PyExc_ValueError,
- "The number of samples should be <= %u.", UINT32_MAX);
+ PyErr_SetString(PyExc_ValueError,
+ "dict_size argument should be positive number.");
return NULL;
}
- /* Prepare chunk_sizes */
- chunk_sizes = PyMem_New(size_t, chunks_number);
- if (chunk_sizes == NULL) {
- PyErr_NoMemory();
- goto error;
- }
-
- sizes_sum = 0;
- for (i = 0; i < chunks_number; i++) {
- PyObject *size = PyList_GET_ITEM(samples_size_list, i);
- chunk_sizes[i] = PyLong_AsSize_t(size);
- if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) {
- PyErr_Format(PyExc_ValueError,
- "Items in samples_size_list should be an int "
- "object, with a value between 0 and %u.", SIZE_MAX);
- goto error;
- }
- sizes_sum += chunk_sizes[i];
- }
-
- if (sizes_sum != Py_SIZE(samples_bytes)) {
- PyErr_SetString(PyExc_ValueError,
- "The samples size list doesn't match the concatenation's size.");
+ /* Check that the samples are valid and get their sizes */
+ chunks_number = calculate_samples_stats(samples_bytes, samples_sizes,
+ &chunk_sizes);
+ if (chunks_number < 0) {
goto error;
}
@@ -360,7 +383,7 @@ _zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
/* Parameters */
- /* Optimize for a specific zstd compression level, 0 means default. */
+ /* Optimize for a specific Zstandard compression level, 0 means default. */
params.compressionLevel = compression_level;
/* Write log to stderr, 0 = none. */
params.notificationLevel = 0;
@@ -371,14 +394,15 @@ _zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
Py_BEGIN_ALLOW_THREADS
zstd_ret = ZDICT_finalizeDictionary(
PyBytes_AS_STRING(dst_dict_bytes), dict_size,
- PyBytes_AS_STRING(custom_dict_bytes), Py_SIZE(custom_dict_bytes),
+ PyBytes_AS_STRING(custom_dict_bytes),
+ Py_SIZE(custom_dict_bytes),
PyBytes_AS_STRING(samples_bytes), chunk_sizes,
(uint32_t)chunks_number, params);
Py_END_ALLOW_THREADS
- /* Check zstd dict error */
+ /* Check Zstandard dict error */
if (ZDICT_isError(zstd_ret)) {
- _zstd_state* const mod_state = get_zstd_state(module);
+ _zstd_state* mod_state = get_zstd_state(module);
set_zstd_error(mod_state, ERR_FINALIZE_DICT, zstd_ret);
goto error;
}
@@ -400,26 +424,25 @@ success:
/*[clinic input]
-_zstd._get_param_bounds
+_zstd.get_param_bounds
- is_compress: bool
- True for CParameter, False for DParameter.
parameter: int
The parameter to get bounds.
+ is_compress: bool
+ True for CompressionParameter, False for DecompressionParameter.
-Internal function, get CParameter/DParameter bounds.
+Get CompressionParameter/DecompressionParameter bounds.
[clinic start generated code]*/
static PyObject *
-_zstd__get_param_bounds_impl(PyObject *module, int is_compress,
- int parameter)
-/*[clinic end generated code: output=b751dc710f89ef55 input=fb21ff96aff65df1]*/
+_zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress)
+/*[clinic end generated code: output=4acf5a876f0620ca input=45742ef0a3531b65]*/
{
ZSTD_bounds bound;
if (is_compress) {
bound = ZSTD_cParam_getBounds(parameter);
if (ZSTD_isError(bound.error)) {
- _zstd_state* const mod_state = get_zstd_state(module);
+ _zstd_state* mod_state = get_zstd_state(module);
set_zstd_error(mod_state, ERR_GET_C_BOUNDS, bound.error);
return NULL;
}
@@ -427,7 +450,7 @@ _zstd__get_param_bounds_impl(PyObject *module, int is_compress,
else {
bound = ZSTD_dParam_getBounds(parameter);
if (ZSTD_isError(bound.error)) {
- _zstd_state* const mod_state = get_zstd_state(module);
+ _zstd_state* mod_state = get_zstd_state(module);
set_zstd_error(mod_state, ERR_GET_D_BOUNDS, bound.error);
return NULL;
}
@@ -443,24 +466,23 @@ _zstd.get_frame_size
A bytes-like object, it should start from the beginning of a frame,
and contains at least one complete frame.
-Get the size of a zstd frame, including frame header and 4-byte checksum if it has one.
-
-It will iterate all blocks' headers within a frame, to accumulate the frame size.
+Get the size of a Zstandard frame, including the header and optional checksum.
[clinic start generated code]*/
static PyObject *
_zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer)
-/*[clinic end generated code: output=a7384c2f8780f442 input=7d3ad24311893bf3]*/
+/*[clinic end generated code: output=a7384c2f8780f442 input=3b9f73f8c8129d38]*/
{
size_t frame_size;
- frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf, frame_buffer->len);
+ frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf,
+ frame_buffer->len);
if (ZSTD_isError(frame_size)) {
- _zstd_state* const mod_state = get_zstd_state(module);
+ _zstd_state* mod_state = get_zstd_state(module);
PyErr_Format(mod_state->ZstdError,
- "Error when finding the compressed size of a zstd frame. "
- "Make sure the frame_buffer argument starts from the "
- "beginning of a frame, and its length not less than this "
+ "Error when finding the compressed size of a Zstandard frame. "
+ "Ensure the frame_buffer argument starts from the "
+ "beginning of a frame, and its length is not less than this "
"complete frame. Zstd error message: %s.",
ZSTD_getErrorName(frame_size));
return NULL;
@@ -470,17 +492,17 @@ _zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer)
}
/*[clinic input]
-_zstd._get_frame_info
+_zstd.get_frame_info
frame_buffer: Py_buffer
- A bytes-like object, containing the header of a zstd frame.
+ A bytes-like object, containing the header of a Zstandard frame.
-Internal function, get zstd frame infomation from a frame header.
+Get Zstandard frame infomation from a frame header.
[clinic start generated code]*/
static PyObject *
-_zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer)
-/*[clinic end generated code: output=5462855464ecdf81 input=67f1f8e4b7b89c4d]*/
+_zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer)
+/*[clinic end generated code: output=56e033cf48001929 input=94b240583ae22ca5]*/
{
uint64_t decompressed_size;
uint32_t dict_id;
@@ -492,12 +514,12 @@ _zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer)
/* #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) */
if (decompressed_size == ZSTD_CONTENTSIZE_ERROR) {
- _zstd_state* const mod_state = get_zstd_state(module);
+ _zstd_state* mod_state = get_zstd_state(module);
PyErr_SetString(mod_state->ZstdError,
"Error when getting information from the header of "
- "a zstd frame. Make sure the frame_buffer argument "
+ "a Zstandard frame. Ensure the frame_buffer argument "
"starts from the beginning of a frame, and its length "
- "not less than the frame header (6~18 bytes).");
+ "is not less than the frame header (6~18 bytes).");
return NULL;
}
@@ -512,325 +534,169 @@ _zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer)
}
/*[clinic input]
-_zstd._set_parameter_types
+_zstd.set_parameter_types
c_parameter_type: object(subclass_of='&PyType_Type')
- CParameter IntEnum type object
+ CompressionParameter IntEnum type object
d_parameter_type: object(subclass_of='&PyType_Type')
- DParameter IntEnum type object
+ DecompressionParameter IntEnum type object
-Internal function, set CParameter/DParameter types for validity check.
+Set CompressionParameter and DecompressionParameter types for validity check.
[clinic start generated code]*/
static PyObject *
-_zstd__set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type,
- PyObject *d_parameter_type)
-/*[clinic end generated code: output=a13d4890ccbd2873 input=3e7d0d37c3a1045a]*/
+_zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type,
+ PyObject *d_parameter_type)
+/*[clinic end generated code: output=f3313b1294f19502 input=75d7a953580fae5f]*/
{
- _zstd_state* const mod_state = get_zstd_state(module);
-
- if (!PyType_Check(c_parameter_type) || !PyType_Check(d_parameter_type)) {
- PyErr_SetString(PyExc_ValueError,
- "The two arguments should be CParameter and "
- "DParameter types.");
- return NULL;
- }
+ _zstd_state* mod_state = get_zstd_state(module);
- Py_XDECREF(mod_state->CParameter_type);
Py_INCREF(c_parameter_type);
- mod_state->CParameter_type = (PyTypeObject*) c_parameter_type;
-
- Py_XDECREF(mod_state->DParameter_type);
+ Py_XSETREF(mod_state->CParameter_type, (PyTypeObject*)c_parameter_type);
Py_INCREF(d_parameter_type);
- mod_state->DParameter_type = (PyTypeObject*)d_parameter_type;
+ Py_XSETREF(mod_state->DParameter_type, (PyTypeObject*)d_parameter_type);
Py_RETURN_NONE;
}
static PyMethodDef _zstd_methods[] = {
- _ZSTD__TRAIN_DICT_METHODDEF
- _ZSTD__FINALIZE_DICT_METHODDEF
- _ZSTD__GET_PARAM_BOUNDS_METHODDEF
+ _ZSTD_TRAIN_DICT_METHODDEF
+ _ZSTD_FINALIZE_DICT_METHODDEF
+ _ZSTD_GET_PARAM_BOUNDS_METHODDEF
_ZSTD_GET_FRAME_SIZE_METHODDEF
- _ZSTD__GET_FRAME_INFO_METHODDEF
- _ZSTD__SET_PARAMETER_TYPES_METHODDEF
-
- {0}
+ _ZSTD_GET_FRAME_INFO_METHODDEF
+ _ZSTD_SET_PARAMETER_TYPES_METHODDEF
+ {NULL, NULL}
};
-
-#define ADD_INT_PREFIX_MACRO(module, macro) \
- do { \
- if (PyModule_AddIntConstant(module, "_" #macro, macro) < 0) { \
- return -1; \
- } \
- } while(0)
-
static int
-add_parameters(PyObject *module)
-{
- /* If add new parameters, please also add to cp_list/dp_list above. */
-
- /* Compression parameters */
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_compressionLevel);
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_windowLog);
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_hashLog);
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_chainLog);
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_searchLog);
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_minMatch);
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_targetLength);
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_strategy);
-
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_enableLongDistanceMatching);
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashLog);
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmMinMatch);
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmBucketSizeLog);
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashRateLog);
-
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_contentSizeFlag);
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_checksumFlag);
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_dictIDFlag);
-
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_nbWorkers);
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_jobSize);
- ADD_INT_PREFIX_MACRO(module, ZSTD_c_overlapLog);
-
- /* Decompression parameters */
- ADD_INT_PREFIX_MACRO(module, ZSTD_d_windowLogMax);
-
- /* ZSTD_strategy enum */
- ADD_INT_PREFIX_MACRO(module, ZSTD_fast);
- ADD_INT_PREFIX_MACRO(module, ZSTD_dfast);
- ADD_INT_PREFIX_MACRO(module, ZSTD_greedy);
- ADD_INT_PREFIX_MACRO(module, ZSTD_lazy);
- ADD_INT_PREFIX_MACRO(module, ZSTD_lazy2);
- ADD_INT_PREFIX_MACRO(module, ZSTD_btlazy2);
- ADD_INT_PREFIX_MACRO(module, ZSTD_btopt);
- ADD_INT_PREFIX_MACRO(module, ZSTD_btultra);
- ADD_INT_PREFIX_MACRO(module, ZSTD_btultra2);
-
- return 0;
-}
-
-static inline PyObject *
-get_zstd_version_info(void)
-{
- uint32_t ver = ZSTD_versionNumber();
- uint32_t major, minor, release;
-
- major = ver / 10000;
- minor = (ver / 100) % 100;
- release = ver % 100;
-
- return Py_BuildValue("III", major, minor, release);
-}
-
-static inline int
-add_vars_to_module(PyObject *module)
-{
- PyObject *obj;
-
- /* zstd_version, a str. */
- if (PyModule_AddStringConstant(module, "zstd_version",
- ZSTD_versionString()) < 0) {
- return -1;
- }
-
- /* zstd_version_info, a tuple. */
- obj = get_zstd_version_info();
- if (PyModule_AddObjectRef(module, "zstd_version_info", obj) < 0) {
- Py_XDECREF(obj);
- return -1;
- }
- Py_DECREF(obj);
-
- /* Add zstd parameters */
- if (add_parameters(module) < 0) {
- return -1;
- }
-
- /* _compressionLevel_values: (default, min, max)
- ZSTD_defaultCLevel() was added in zstd v1.5.0 */
- obj = Py_BuildValue("iii",
-#if ZSTD_VERSION_NUMBER < 10500
- ZSTD_CLEVEL_DEFAULT,
-#else
- ZSTD_defaultCLevel(),
-#endif
- ZSTD_minCLevel(),
- ZSTD_maxCLevel());
- if (PyModule_AddObjectRef(module,
- "_compressionLevel_values",
- obj) < 0) {
- Py_XDECREF(obj);
- return -1;
- }
- Py_DECREF(obj);
-
- /* _ZSTD_CStreamSizes */
- obj = Py_BuildValue("II",
- (uint32_t)ZSTD_CStreamInSize(),
- (uint32_t)ZSTD_CStreamOutSize());
- if (PyModule_AddObjectRef(module, "_ZSTD_CStreamSizes", obj) < 0) {
- Py_XDECREF(obj);
- return -1;
- }
- Py_DECREF(obj);
-
- /* _ZSTD_DStreamSizes */
- obj = Py_BuildValue("II",
- (uint32_t)ZSTD_DStreamInSize(),
- (uint32_t)ZSTD_DStreamOutSize());
- if (PyModule_AddObjectRef(module, "_ZSTD_DStreamSizes", obj) < 0) {
- Py_XDECREF(obj);
- return -1;
- }
- Py_DECREF(obj);
-
- /* _ZSTD_CONFIG */
- obj = Py_BuildValue("isOOO", 8*(int)sizeof(Py_ssize_t), "c",
- Py_False,
- Py_True,
-/* User mremap output buffer */
-#if defined(HAVE_MREMAP)
- Py_True
-#else
- Py_False
-#endif
- );
- if (PyModule_AddObjectRef(module, "_ZSTD_CONFIG", obj) < 0) {
- Py_XDECREF(obj);
- return -1;
- }
- Py_DECREF(obj);
-
- return 0;
-}
-
-#define ADD_STR_TO_STATE_MACRO(STR) \
- do { \
- mod_state->str_##STR = PyUnicode_FromString(#STR); \
- if (mod_state->str_##STR == NULL) { \
- return -1; \
- } \
- } while(0)
-
-static inline int
-add_type_to_module(PyObject *module, const char *name,
- PyType_Spec *type_spec, PyTypeObject **dest)
-{
- PyObject *temp = PyType_FromModuleAndSpec(module, type_spec, NULL);
-
- if (PyModule_AddObjectRef(module, name, temp) < 0) {
- Py_XDECREF(temp);
- return -1;
- }
-
- *dest = (PyTypeObject*) temp;
-
- return 0;
-}
-
-static inline int
-add_constant_to_type(PyTypeObject *type, const char *name, long value)
+_zstd_exec(PyObject *m)
{
- PyObject *temp;
-
- temp = PyLong_FromLong(value);
- if (temp == NULL) {
- return -1;
- }
-
- int rc = PyObject_SetAttrString((PyObject*) type, name, temp);
- Py_DECREF(temp);
- return rc;
-}
-
-static int _zstd_exec(PyObject *module) {
- _zstd_state* const mod_state = get_zstd_state(module);
+#define ADD_TYPE(TYPE, SPEC) \
+do { \
+ TYPE = (PyTypeObject *)PyType_FromModuleAndSpec(m, &(SPEC), NULL); \
+ if (TYPE == NULL) { \
+ return -1; \
+ } \
+ if (PyModule_AddType(m, TYPE) < 0) { \
+ return -1; \
+ } \
+} while (0)
+
+#define ADD_INT_MACRO(MACRO) \
+ if (PyModule_AddIntConstant((m), #MACRO, (MACRO)) < 0) { \
+ return -1; \
+ }
+
+#define ADD_INT_CONST_TO_TYPE(TYPE, NAME, VALUE) \
+do { \
+ PyObject *v = PyLong_FromLong((VALUE)); \
+ if (v == NULL || PyObject_SetAttrString((PyObject *)(TYPE), \
+ (NAME), v) < 0) { \
+ Py_XDECREF(v); \
+ return -1; \
+ } \
+ Py_DECREF(v); \
+} while (0)
+
+ _zstd_state* mod_state = get_zstd_state(m);
/* Reusable objects & variables */
- mod_state->empty_bytes = PyBytes_FromStringAndSize(NULL, 0);
- if (mod_state->empty_bytes == NULL) {
- return -1;
- }
-
- mod_state->empty_readonly_memoryview =
- PyMemoryView_FromMemory((char*)mod_state, 0, PyBUF_READ);
- if (mod_state->empty_readonly_memoryview == NULL) {
- return -1;
- }
-
- /* Add str to module state */
- ADD_STR_TO_STATE_MACRO(read);
- ADD_STR_TO_STATE_MACRO(readinto);
- ADD_STR_TO_STATE_MACRO(write);
- ADD_STR_TO_STATE_MACRO(flush);
-
mod_state->CParameter_type = NULL;
mod_state->DParameter_type = NULL;
- /* Add variables to module */
- if (add_vars_to_module(module) < 0) {
- return -1;
- }
-
- /* ZstdError */
+ /* Create and add heap types */
+ ADD_TYPE(mod_state->ZstdDict_type, zstd_dict_type_spec);
+ ADD_TYPE(mod_state->ZstdCompressor_type, zstd_compressor_type_spec);
+ ADD_TYPE(mod_state->ZstdDecompressor_type, zstd_decompressor_type_spec);
mod_state->ZstdError = PyErr_NewExceptionWithDoc(
- "_zstd.ZstdError",
- "Call to the underlying zstd library failed.",
- NULL, NULL);
+ "compression.zstd.ZstdError",
+ "An error occurred in the zstd library.",
+ NULL, NULL);
if (mod_state->ZstdError == NULL) {
return -1;
}
-
- if (PyModule_AddObjectRef(module, "ZstdError", mod_state->ZstdError) < 0) {
- Py_DECREF(mod_state->ZstdError);
+ if (PyModule_AddType(m, (PyTypeObject *)mod_state->ZstdError) < 0) {
return -1;
}
- /* ZstdDict */
- if (add_type_to_module(module,
- "ZstdDict",
- &zstddict_type_spec,
- &mod_state->ZstdDict_type) < 0) {
+ /* Add constants */
+ if (PyModule_AddIntConstant(m, "zstd_version_number",
+ ZSTD_versionNumber()) < 0) {
return -1;
}
- // ZstdCompressor
- if (add_type_to_module(module,
- "ZstdCompressor",
- &zstdcompressor_type_spec,
- &mod_state->ZstdCompressor_type) < 0) {
+ if (PyModule_AddStringConstant(m, "zstd_version",
+ ZSTD_versionString()) < 0) {
return -1;
}
- // Add EndDirective enum to ZstdCompressor
- if (add_constant_to_type(mod_state->ZstdCompressor_type,
- "CONTINUE",
- ZSTD_e_continue) < 0) {
+#if ZSTD_VERSION_NUMBER >= 10500
+ if (PyModule_AddIntConstant(m, "ZSTD_CLEVEL_DEFAULT",
+ ZSTD_defaultCLevel()) < 0) {
return -1;
}
+#else
+ ADD_INT_MACRO(ZSTD_CLEVEL_DEFAULT);
+#endif
- if (add_constant_to_type(mod_state->ZstdCompressor_type,
- "FLUSH_BLOCK",
- ZSTD_e_flush) < 0) {
+ if (PyModule_Add(m, "ZSTD_DStreamOutSize",
+ PyLong_FromSize_t(ZSTD_DStreamOutSize())) < 0) {
return -1;
}
- if (add_constant_to_type(mod_state->ZstdCompressor_type,
- "FLUSH_FRAME",
- ZSTD_e_end) < 0) {
- return -1;
- }
-
- // ZstdDecompressor
- if (add_type_to_module(module,
- "ZstdDecompressor",
- &ZstdDecompressor_type_spec,
- &mod_state->ZstdDecompressor_type) < 0) {
- return -1;
- }
+ /* Add zstd compression parameters. All should also be in cp_list. */
+ ADD_INT_MACRO(ZSTD_c_compressionLevel);
+ ADD_INT_MACRO(ZSTD_c_windowLog);
+ ADD_INT_MACRO(ZSTD_c_hashLog);
+ ADD_INT_MACRO(ZSTD_c_chainLog);
+ ADD_INT_MACRO(ZSTD_c_searchLog);
+ ADD_INT_MACRO(ZSTD_c_minMatch);
+ ADD_INT_MACRO(ZSTD_c_targetLength);
+ ADD_INT_MACRO(ZSTD_c_strategy);
+
+ ADD_INT_MACRO(ZSTD_c_enableLongDistanceMatching);
+ ADD_INT_MACRO(ZSTD_c_ldmHashLog);
+ ADD_INT_MACRO(ZSTD_c_ldmMinMatch);
+ ADD_INT_MACRO(ZSTD_c_ldmBucketSizeLog);
+ ADD_INT_MACRO(ZSTD_c_ldmHashRateLog);
+
+ ADD_INT_MACRO(ZSTD_c_contentSizeFlag);
+ ADD_INT_MACRO(ZSTD_c_checksumFlag);
+ ADD_INT_MACRO(ZSTD_c_dictIDFlag);
+
+ ADD_INT_MACRO(ZSTD_c_nbWorkers);
+ ADD_INT_MACRO(ZSTD_c_jobSize);
+ ADD_INT_MACRO(ZSTD_c_overlapLog);
+
+ /* Add zstd decompression parameters. All should also be in dp_list. */
+ ADD_INT_MACRO(ZSTD_d_windowLogMax);
+
+ /* Add ZSTD_strategy enum members */
+ ADD_INT_MACRO(ZSTD_fast);
+ ADD_INT_MACRO(ZSTD_dfast);
+ ADD_INT_MACRO(ZSTD_greedy);
+ ADD_INT_MACRO(ZSTD_lazy);
+ ADD_INT_MACRO(ZSTD_lazy2);
+ ADD_INT_MACRO(ZSTD_btlazy2);
+ ADD_INT_MACRO(ZSTD_btopt);
+ ADD_INT_MACRO(ZSTD_btultra);
+ ADD_INT_MACRO(ZSTD_btultra2);
+
+ /* Add ZSTD_EndDirective enum members to ZstdCompressor */
+ ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type,
+ "CONTINUE", ZSTD_e_continue);
+ ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type,
+ "FLUSH_BLOCK", ZSTD_e_flush);
+ ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type,
+ "FLUSH_FRAME", ZSTD_e_end);
+
+ /* Make ZstdCompressor immutable (set Py_TPFLAGS_IMMUTABLETYPE) */
+ PyType_Freeze(mod_state->ZstdCompressor_type);
+
+#undef ADD_TYPE
+#undef ADD_INT_MACRO
+#undef ADD_ZSTD_COMPRESSOR_INT_CONST
return 0;
}
@@ -838,14 +704,7 @@ static int _zstd_exec(PyObject *module) {
static int
_zstd_traverse(PyObject *module, visitproc visit, void *arg)
{
- _zstd_state* const mod_state = get_zstd_state(module);
-
- Py_VISIT(mod_state->empty_bytes);
- Py_VISIT(mod_state->empty_readonly_memoryview);
- Py_VISIT(mod_state->str_read);
- Py_VISIT(mod_state->str_readinto);
- Py_VISIT(mod_state->str_write);
- Py_VISIT(mod_state->str_flush);
+ _zstd_state* mod_state = get_zstd_state(module);
Py_VISIT(mod_state->ZstdDict_type);
Py_VISIT(mod_state->ZstdCompressor_type);
@@ -862,14 +721,7 @@ _zstd_traverse(PyObject *module, visitproc visit, void *arg)
static int
_zstd_clear(PyObject *module)
{
- _zstd_state* const mod_state = get_zstd_state(module);
-
- Py_CLEAR(mod_state->empty_bytes);
- Py_CLEAR(mod_state->empty_readonly_memoryview);
- Py_CLEAR(mod_state->str_read);
- Py_CLEAR(mod_state->str_readinto);
- Py_CLEAR(mod_state->str_write);
- Py_CLEAR(mod_state->str_flush);
+ _zstd_state* mod_state = get_zstd_state(module);
Py_CLEAR(mod_state->ZstdDict_type);
Py_CLEAR(mod_state->ZstdCompressor_type);
@@ -891,20 +743,21 @@ _zstd_free(void *module)
static struct PyModuleDef_Slot _zstd_slots[] = {
{Py_mod_exec, _zstd_exec},
+ {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
{Py_mod_gil, Py_MOD_GIL_NOT_USED},
-
- {0}
+ {0, NULL},
};
-struct PyModuleDef _zstdmodule = {
- PyModuleDef_HEAD_INIT,
+static struct PyModuleDef _zstdmodule = {
+ .m_base = PyModuleDef_HEAD_INIT,
.m_name = "_zstd",
+ .m_doc = "Implementation module for Zstandard compression.",
.m_size = sizeof(_zstd_state),
.m_slots = _zstd_slots,
.m_methods = _zstd_methods,
.m_traverse = _zstd_traverse,
.m_clear = _zstd_clear,
- .m_free = _zstd_free
+ .m_free = _zstd_free,
};
PyMODINIT_FUNC