aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Modules/_zstd/_zstdmodule.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_zstd/_zstdmodule.c')
-rw-r--r--Modules/_zstd/_zstdmodule.c367
1 files changed, 188 insertions, 179 deletions
diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c
index c3852fe8973..b0e50f873f4 100644
--- a/Modules/_zstd/_zstdmodule.c
+++ b/Modules/_zstd/_zstdmodule.c
@@ -1,7 +1,4 @@
-/*
-Low level interface to Meta's zstd library for use in the compression.zstd
-Python module.
-*/
+/* Low level interface to the Zstandard algorthm & the zstd library. */
#ifndef Py_BUILD_CORE_BUILTIN
# define Py_BUILD_CORE_MODULE 1
@@ -10,7 +7,6 @@ Python module.
#include "Python.h"
#include "_zstdmodule.h"
-#include "zstddict.h"
#include <zstd.h> // ZSTD_*()
#include <zdict.h> // ZDICT_*()
@@ -23,49 +19,88 @@ module _zstd
#include "clinic/_zstdmodule.c.h"
+ZstdDict *
+_Py_parse_zstd_dict(const _zstd_state *state, PyObject *dict, int *ptype)
+{
+ if (state == NULL) {
+ return NULL;
+ }
+
+ /* Check ZstdDict */
+ if (PyObject_TypeCheck(dict, state->ZstdDict_type)) {
+ return (ZstdDict*)dict;
+ }
+
+ /* Check (ZstdDict, type) */
+ if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2
+ && PyObject_TypeCheck(PyTuple_GET_ITEM(dict, 0), state->ZstdDict_type)
+ && PyLong_Check(PyTuple_GET_ITEM(dict, 1)))
+ {
+ int type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1));
+ if (type == -1 && PyErr_Occurred()) {
+ return NULL;
+ }
+ if (type == DICT_TYPE_DIGESTED
+ || type == DICT_TYPE_UNDIGESTED
+ || type == DICT_TYPE_PREFIX)
+ {
+ *ptype = type;
+ return (ZstdDict*)PyTuple_GET_ITEM(dict, 0);
+ }
+ }
+
+ /* Wrong type */
+ PyErr_SetString(PyExc_TypeError,
+ "zstd_dict argument should be a ZstdDict object.");
+ return NULL;
+}
+
/* Format error message and set ZstdError. */
void
-set_zstd_error(const _zstd_state* const state,
- error_type type, size_t zstd_ret)
+set_zstd_error(const _zstd_state *state, error_type type, size_t zstd_ret)
{
- char *msg;
+ const char *msg;
assert(ZSTD_isError(zstd_ret));
- switch (type)
- {
- case ERR_DECOMPRESS:
- msg = "Unable to decompress zstd data: %s";
- break;
- case ERR_COMPRESS:
- msg = "Unable to compress zstd data: %s";
- break;
-
- case ERR_LOAD_D_DICT:
- msg = "Unable to load zstd dictionary or prefix for decompression: %s";
- break;
- case ERR_LOAD_C_DICT:
- msg = "Unable to load zstd dictionary or prefix for compression: %s";
- break;
-
- case ERR_GET_C_BOUNDS:
- msg = "Unable to get zstd compression parameter bounds: %s";
- break;
- case ERR_GET_D_BOUNDS:
- msg = "Unable to get zstd decompression parameter bounds: %s";
- break;
- case ERR_SET_C_LEVEL:
- msg = "Unable to set zstd compression level: %s";
- break;
-
- case ERR_TRAIN_DICT:
- msg = "Unable to train zstd dictionary: %s";
- break;
- case ERR_FINALIZE_DICT:
- msg = "Unable to finalize zstd dictionary: %s";
- break;
-
- default:
- Py_UNREACHABLE();
+ if (state == NULL) {
+ return;
+ }
+ switch (type) {
+ case ERR_DECOMPRESS:
+ msg = "Unable to decompress Zstandard data: %s";
+ break;
+ case ERR_COMPRESS:
+ msg = "Unable to compress Zstandard data: %s";
+ break;
+
+ case ERR_LOAD_D_DICT:
+ msg = "Unable to load Zstandard dictionary or prefix for "
+ "decompression: %s";
+ break;
+ case ERR_LOAD_C_DICT:
+ msg = "Unable to load Zstandard dictionary or prefix for "
+ "compression: %s";
+ break;
+
+ case ERR_GET_C_BOUNDS:
+ msg = "Unable to get zstd compression parameter bounds: %s";
+ break;
+ case ERR_GET_D_BOUNDS:
+ msg = "Unable to get zstd decompression parameter bounds: %s";
+ break;
+ case ERR_SET_C_LEVEL:
+ msg = "Unable to set zstd compression level: %s";
+ break;
+
+ case ERR_TRAIN_DICT:
+ msg = "Unable to train the Zstandard dictionary: %s";
+ break;
+ case ERR_FINALIZE_DICT:
+ msg = "Unable to finalize the Zstandard dictionary: %s";
+ break;
+
+ default:
+ Py_UNREACHABLE();
}
PyErr_Format(state->ZstdError, msg, ZSTD_getErrorName(zstd_ret));
}
@@ -105,16 +140,13 @@ static const ParameterInfo dp_list[] = {
};
void
-set_parameter_error(const _zstd_state* const state, int is_compress,
- int key_v, int value_v)
+set_parameter_error(int is_compress, int key_v, int value_v)
{
ParameterInfo const *list;
int list_size;
- char const *name;
char *type;
ZSTD_bounds bounds;
- int i;
- char pos_msg[128];
+ char pos_msg[64];
if (is_compress) {
list = cp_list;
@@ -128,8 +160,8 @@ set_parameter_error(const _zstd_state* const state, int is_compress,
}
/* Find parameter's name */
- name = NULL;
- for (i = 0; i < list_size; i++) {
+ char const *name = NULL;
+ for (int i = 0; i < list_size; i++) {
if (key_v == (list+i)->parameter) {
name = (list+i)->parameter_name;
break;
@@ -151,20 +183,16 @@ set_parameter_error(const _zstd_state* const state, int is_compress,
bounds = ZSTD_dParam_getBounds(key_v);
}
if (ZSTD_isError(bounds.error)) {
- PyErr_Format(state->ZstdError,
- "Zstd %s parameter \"%s\" is invalid.",
+ PyErr_Format(PyExc_ValueError, "invalid %s parameter '%s'",
type, name);
return;
}
/* Error message */
- PyErr_Format(state->ZstdError,
- "Error when setting zstd %s parameter \"%s\", it "
- "should %d <= value <= %d, provided value is %d. "
- "(%d-bit build)",
- type, name,
- bounds.lowerBound, bounds.upperBound, value_v,
- 8*(int)sizeof(Py_ssize_t));
+ PyErr_Format(PyExc_ValueError,
+ "%s parameter '%s' received an illegal value %d; "
+ "the valid range is [%d, %d]",
+ type, name, value_v, bounds.lowerBound, bounds.upperBound);
}
static inline _zstd_state*
@@ -175,6 +203,54 @@ get_zstd_state(PyObject *module)
return (_zstd_state *)state;
}
+static Py_ssize_t
+calculate_samples_stats(PyBytesObject *samples_bytes, PyObject *samples_sizes,
+ size_t **chunk_sizes)
+{
+ Py_ssize_t chunks_number;
+ Py_ssize_t sizes_sum;
+ Py_ssize_t i;
+
+ chunks_number = PyTuple_GET_SIZE(samples_sizes);
+ if ((size_t) chunks_number > UINT32_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "The number of samples should be <= %u.", UINT32_MAX);
+ return -1;
+ }
+
+ /* Prepare chunk_sizes */
+ *chunk_sizes = PyMem_New(size_t, chunks_number);
+ if (*chunk_sizes == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ sizes_sum = PyBytes_GET_SIZE(samples_bytes);
+ for (i = 0; i < chunks_number; i++) {
+ size_t size = PyLong_AsSize_t(PyTuple_GET_ITEM(samples_sizes, i));
+ (*chunk_sizes)[i] = size;
+ if (size == (size_t)-1 && PyErr_Occurred()) {
+ if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
+ goto sum_error;
+ }
+ return -1;
+ }
+ if ((size_t)sizes_sum < size) {
+ goto sum_error;
+ }
+ sizes_sum -= size;
+ }
+
+ if (sizes_sum != 0) {
+sum_error:
+ PyErr_SetString(PyExc_ValueError,
+ "The samples size tuple doesn't match the "
+ "concatenation's size.");
+ return -1;
+ }
+ return chunks_number;
+}
+
/*[clinic input]
_zstd.train_dict
@@ -187,59 +263,30 @@ _zstd.train_dict
The size of the dictionary.
/
-Internal function, train a zstd dictionary on sample data.
+Train a Zstandard dictionary on sample data.
[clinic start generated code]*/
static PyObject *
_zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
PyObject *samples_sizes, Py_ssize_t dict_size)
-/*[clinic end generated code: output=8e87fe43935e8f77 input=70fcd8937f2528b6]*/
+/*[clinic end generated code: output=8e87fe43935e8f77 input=d20dedb21c72cb62]*/
{
- // TODO(emmatyping): The preamble and suffix to this function and _finalize_dict
- // are pretty similar. We should see if we can refactor them to share that code.
- Py_ssize_t chunks_number;
- size_t *chunk_sizes = NULL;
PyObject *dst_dict_bytes = NULL;
+ size_t *chunk_sizes = NULL;
+ Py_ssize_t chunks_number;
size_t zstd_ret;
- Py_ssize_t sizes_sum;
- Py_ssize_t i;
/* Check arguments */
if (dict_size <= 0) {
- PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number.");
- return NULL;
- }
-
- chunks_number = Py_SIZE(samples_sizes);
- if ((size_t) chunks_number > UINT32_MAX) {
- PyErr_Format(PyExc_ValueError,
- "The number of samples should be <= %u.", UINT32_MAX);
+ PyErr_SetString(PyExc_ValueError,
+ "dict_size argument should be positive number.");
return NULL;
}
- /* Prepare chunk_sizes */
- chunk_sizes = PyMem_New(size_t, chunks_number);
- if (chunk_sizes == NULL) {
- PyErr_NoMemory();
- goto error;
- }
-
- sizes_sum = 0;
- for (i = 0; i < chunks_number; i++) {
- PyObject *size = PyTuple_GetItem(samples_sizes, i);
- chunk_sizes[i] = PyLong_AsSize_t(size);
- if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) {
- PyErr_Format(PyExc_ValueError,
- "Items in samples_sizes should be an int "
- "object, with a value between 0 and %u.", SIZE_MAX);
- goto error;
- }
- sizes_sum += chunk_sizes[i];
- }
-
- if (sizes_sum != Py_SIZE(samples_bytes)) {
- PyErr_SetString(PyExc_ValueError,
- "The samples size tuple doesn't match the concatenation's size.");
+ /* Check that the samples are valid and get their sizes */
+ chunks_number = calculate_samples_stats(samples_bytes, samples_sizes,
+ &chunk_sizes);
+ if (chunks_number < 0) {
goto error;
}
@@ -251,16 +298,16 @@ _zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
/* Train the dictionary */
char *dst_dict_buffer = PyBytes_AS_STRING(dst_dict_bytes);
- char *samples_buffer = PyBytes_AS_STRING(samples_bytes);
+ const char *samples_buffer = PyBytes_AS_STRING(samples_bytes);
Py_BEGIN_ALLOW_THREADS
zstd_ret = ZDICT_trainFromBuffer(dst_dict_buffer, dict_size,
samples_buffer,
chunk_sizes, (uint32_t)chunks_number);
Py_END_ALLOW_THREADS
- /* Check zstd dict error */
+ /* Check Zstandard dict error */
if (ZDICT_isError(zstd_ret)) {
- _zstd_state* const mod_state = get_zstd_state(module);
+ _zstd_state* mod_state = get_zstd_state(module);
set_zstd_error(mod_state, ERR_TRAIN_DICT, zstd_ret);
goto error;
}
@@ -292,10 +339,10 @@ _zstd.finalize_dict
dict_size: Py_ssize_t
The size of the dictionary.
compression_level: int
- Optimize for a specific zstd compression level, 0 means default.
+ Optimize for a specific Zstandard compression level, 0 means default.
/
-Internal function, finalize a zstd dictionary.
+Finalize a Zstandard dictionary.
[clinic start generated code]*/
static PyObject *
@@ -303,52 +350,25 @@ _zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
PyBytesObject *samples_bytes,
PyObject *samples_sizes, Py_ssize_t dict_size,
int compression_level)
-/*[clinic end generated code: output=f91821ba5ae85bda input=130d1508adb55ba1]*/
+/*[clinic end generated code: output=f91821ba5ae85bda input=3c7e2480aa08fb56]*/
{
Py_ssize_t chunks_number;
size_t *chunk_sizes = NULL;
PyObject *dst_dict_bytes = NULL;
size_t zstd_ret;
ZDICT_params_t params;
- Py_ssize_t sizes_sum;
- Py_ssize_t i;
/* Check arguments */
if (dict_size <= 0) {
- PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number.");
- return NULL;
- }
-
- chunks_number = Py_SIZE(samples_sizes);
- if ((size_t) chunks_number > UINT32_MAX) {
- PyErr_Format(PyExc_ValueError,
- "The number of samples should be <= %u.", UINT32_MAX);
+ PyErr_SetString(PyExc_ValueError,
+ "dict_size argument should be positive number.");
return NULL;
}
- /* Prepare chunk_sizes */
- chunk_sizes = PyMem_New(size_t, chunks_number);
- if (chunk_sizes == NULL) {
- PyErr_NoMemory();
- goto error;
- }
-
- sizes_sum = 0;
- for (i = 0; i < chunks_number; i++) {
- PyObject *size = PyTuple_GetItem(samples_sizes, i);
- chunk_sizes[i] = PyLong_AsSize_t(size);
- if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) {
- PyErr_Format(PyExc_ValueError,
- "Items in samples_sizes should be an int "
- "object, with a value between 0 and %u.", SIZE_MAX);
- goto error;
- }
- sizes_sum += chunk_sizes[i];
- }
-
- if (sizes_sum != Py_SIZE(samples_bytes)) {
- PyErr_SetString(PyExc_ValueError,
- "The samples size tuple doesn't match the concatenation's size.");
+ /* Check that the samples are valid and get their sizes */
+ chunks_number = calculate_samples_stats(samples_bytes, samples_sizes,
+ &chunk_sizes);
+ if (chunks_number < 0) {
goto error;
}
@@ -360,7 +380,7 @@ _zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
/* Parameters */
- /* Optimize for a specific zstd compression level, 0 means default. */
+ /* Optimize for a specific Zstandard compression level, 0 means default. */
params.compressionLevel = compression_level;
/* Write log to stderr, 0 = none. */
params.notificationLevel = 0;
@@ -371,14 +391,15 @@ _zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
Py_BEGIN_ALLOW_THREADS
zstd_ret = ZDICT_finalizeDictionary(
PyBytes_AS_STRING(dst_dict_bytes), dict_size,
- PyBytes_AS_STRING(custom_dict_bytes), Py_SIZE(custom_dict_bytes),
+ PyBytes_AS_STRING(custom_dict_bytes),
+ Py_SIZE(custom_dict_bytes),
PyBytes_AS_STRING(samples_bytes), chunk_sizes,
(uint32_t)chunks_number, params);
Py_END_ALLOW_THREADS
- /* Check zstd dict error */
+ /* Check Zstandard dict error */
if (ZDICT_isError(zstd_ret)) {
- _zstd_state* const mod_state = get_zstd_state(module);
+ _zstd_state* mod_state = get_zstd_state(module);
set_zstd_error(mod_state, ERR_FINALIZE_DICT, zstd_ret);
goto error;
}
@@ -407,18 +428,18 @@ _zstd.get_param_bounds
is_compress: bool
True for CompressionParameter, False for DecompressionParameter.
-Internal function, get CompressionParameter/DecompressionParameter bounds.
+Get CompressionParameter/DecompressionParameter bounds.
[clinic start generated code]*/
static PyObject *
_zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress)
-/*[clinic end generated code: output=4acf5a876f0620ca input=84e669591e487008]*/
+/*[clinic end generated code: output=4acf5a876f0620ca input=45742ef0a3531b65]*/
{
ZSTD_bounds bound;
if (is_compress) {
bound = ZSTD_cParam_getBounds(parameter);
if (ZSTD_isError(bound.error)) {
- _zstd_state* const mod_state = get_zstd_state(module);
+ _zstd_state* mod_state = get_zstd_state(module);
set_zstd_error(mod_state, ERR_GET_C_BOUNDS, bound.error);
return NULL;
}
@@ -426,7 +447,7 @@ _zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress)
else {
bound = ZSTD_dParam_getBounds(parameter);
if (ZSTD_isError(bound.error)) {
- _zstd_state* const mod_state = get_zstd_state(module);
+ _zstd_state* mod_state = get_zstd_state(module);
set_zstd_error(mod_state, ERR_GET_D_BOUNDS, bound.error);
return NULL;
}
@@ -442,24 +463,23 @@ _zstd.get_frame_size
A bytes-like object, it should start from the beginning of a frame,
and contains at least one complete frame.
-Get the size of a zstd frame, including frame header and 4-byte checksum if it has one.
-
-It will iterate all blocks' headers within a frame, to accumulate the frame size.
+Get the size of a Zstandard frame, including the header and optional checksum.
[clinic start generated code]*/
static PyObject *
_zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer)
-/*[clinic end generated code: output=a7384c2f8780f442 input=7d3ad24311893bf3]*/
+/*[clinic end generated code: output=a7384c2f8780f442 input=3b9f73f8c8129d38]*/
{
size_t frame_size;
- frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf, frame_buffer->len);
+ frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf,
+ frame_buffer->len);
if (ZSTD_isError(frame_size)) {
- _zstd_state* const mod_state = get_zstd_state(module);
+ _zstd_state* mod_state = get_zstd_state(module);
PyErr_Format(mod_state->ZstdError,
- "Error when finding the compressed size of a zstd frame. "
- "Make sure the frame_buffer argument starts from the "
- "beginning of a frame, and its length not less than this "
+ "Error when finding the compressed size of a Zstandard frame. "
+ "Ensure the frame_buffer argument starts from the "
+ "beginning of a frame, and its length is not less than this "
"complete frame. Zstd error message: %s.",
ZSTD_getErrorName(frame_size));
return NULL;
@@ -472,14 +492,14 @@ _zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer)
_zstd.get_frame_info
frame_buffer: Py_buffer
- A bytes-like object, containing the header of a zstd frame.
+ A bytes-like object, containing the header of a Zstandard frame.
-Internal function, get zstd frame infomation from a frame header.
+Get Zstandard frame infomation from a frame header.
[clinic start generated code]*/
static PyObject *
_zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer)
-/*[clinic end generated code: output=56e033cf48001929 input=1816f14656b6aa22]*/
+/*[clinic end generated code: output=56e033cf48001929 input=94b240583ae22ca5]*/
{
uint64_t decompressed_size;
uint32_t dict_id;
@@ -491,12 +511,12 @@ _zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer)
/* #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) */
if (decompressed_size == ZSTD_CONTENTSIZE_ERROR) {
- _zstd_state* const mod_state = get_zstd_state(module);
+ _zstd_state* mod_state = get_zstd_state(module);
PyErr_SetString(mod_state->ZstdError,
"Error when getting information from the header of "
- "a zstd frame. Make sure the frame_buffer argument "
+ "a Zstandard frame. Ensure the frame_buffer argument "
"starts from the beginning of a frame, and its length "
- "not less than the frame header (6~18 bytes).");
+ "is not less than the frame header (6~18 bytes).");
return NULL;
}
@@ -518,30 +538,20 @@ _zstd.set_parameter_types
d_parameter_type: object(subclass_of='&PyType_Type')
DecompressionParameter IntEnum type object
-Internal function, set CompressionParameter/DecompressionParameter types for validity check.
+Set CompressionParameter and DecompressionParameter types for validity check.
[clinic start generated code]*/
static PyObject *
_zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type,
PyObject *d_parameter_type)
-/*[clinic end generated code: output=f3313b1294f19502 input=30402523871b8280]*/
+/*[clinic end generated code: output=f3313b1294f19502 input=75d7a953580fae5f]*/
{
- _zstd_state* const mod_state = get_zstd_state(module);
-
- if (!PyType_Check(c_parameter_type) || !PyType_Check(d_parameter_type)) {
- PyErr_SetString(PyExc_ValueError,
- "The two arguments should be CompressionParameter and "
- "DecompressionParameter types.");
- return NULL;
- }
+ _zstd_state* mod_state = get_zstd_state(module);
- Py_XDECREF(mod_state->CParameter_type);
Py_INCREF(c_parameter_type);
- mod_state->CParameter_type = (PyTypeObject*)c_parameter_type;
-
- Py_XDECREF(mod_state->DParameter_type);
+ Py_XSETREF(mod_state->CParameter_type, (PyTypeObject*)c_parameter_type);
Py_INCREF(d_parameter_type);
- mod_state->DParameter_type = (PyTypeObject*)d_parameter_type;
+ Py_XSETREF(mod_state->DParameter_type, (PyTypeObject*)d_parameter_type);
Py_RETURN_NONE;
}
@@ -586,7 +596,7 @@ do { \
Py_DECREF(v); \
} while (0)
- _zstd_state* const mod_state = get_zstd_state(m);
+ _zstd_state* mod_state = get_zstd_state(m);
/* Reusable objects & variables */
mod_state->CParameter_type = NULL;
@@ -604,7 +614,6 @@ do { \
return -1;
}
if (PyModule_AddType(m, (PyTypeObject *)mod_state->ZstdError) < 0) {
- Py_DECREF(mod_state->ZstdError);
return -1;
}
@@ -692,7 +701,7 @@ do { \
static int
_zstd_traverse(PyObject *module, visitproc visit, void *arg)
{
- _zstd_state* const mod_state = get_zstd_state(module);
+ _zstd_state* mod_state = get_zstd_state(module);
Py_VISIT(mod_state->ZstdDict_type);
Py_VISIT(mod_state->ZstdCompressor_type);
@@ -709,7 +718,7 @@ _zstd_traverse(PyObject *module, visitproc visit, void *arg)
static int
_zstd_clear(PyObject *module)
{
- _zstd_state* const mod_state = get_zstd_state(module);
+ _zstd_state* mod_state = get_zstd_state(module);
Py_CLEAR(mod_state->ZstdDict_type);
Py_CLEAR(mod_state->ZstdCompressor_type);