diff options
Diffstat (limited to 'Modules/_zstd/zstddict.c')
-rw-r--r-- | Modules/_zstd/zstddict.c | 108 |
1 files changed, 60 insertions, 48 deletions
diff --git a/Modules/_zstd/zstddict.c b/Modules/_zstd/zstddict.c index 7df187a6fa6..14f74aaed46 100644 --- a/Modules/_zstd/zstddict.c +++ b/Modules/_zstd/zstddict.c @@ -15,8 +15,8 @@ class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec" #include "Python.h" #include "_zstdmodule.h" -#include "zstddict.h" #include "clinic/zstddict.c.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked #include <zstd.h> // ZSTD_freeDDict(), ZSTD_getDictID_fromDict() @@ -25,7 +25,7 @@ class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec" /*[clinic input] @classmethod _zstd.ZstdDict.__new__ as _zstd_ZstdDict_new - dict_content: object + dict_content: Py_buffer The content of a Zstandard dictionary as a bytes-like object. / * @@ -41,18 +41,27 @@ by multiple ZstdCompressor or ZstdDecompressor objects. [clinic start generated code]*/ static PyObject * -_zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content, +_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content, int is_raw) -/*[clinic end generated code: output=3ebff839cb3be6d7 input=6b5de413869ae878]*/ +/*[clinic end generated code: output=685b7406a48b0949 input=9e8c493e31c98383]*/ { + /* All dictionaries must be at least 8 bytes */ + if (dict_content->len < 8) { + PyErr_SetString(PyExc_ValueError, + "Zstandard dictionary content too short " + "(must have at least eight bytes)"); + return NULL; + } + ZstdDict* self = PyObject_GC_New(ZstdDict, type); if (self == NULL) { - goto error; + return NULL; } - self->dict_content = NULL; self->d_dict = NULL; + self->dict_buffer = NULL; self->dict_id = 0; + self->lock = (PyMutex){0}; /* ZSTD_CDict dict */ self->c_dicts = PyDict_New(); @@ -60,37 +69,26 @@ _zstd_ZstdDict_new_impl(PyTypeObject *type, PyObject *dict_content, goto error; } - /* Check dict_content's type */ - self->dict_content = PyBytes_FromObject(dict_content); - if (self->dict_content == NULL) { - PyErr_SetString(PyExc_TypeError, - "dict_content argument should be bytes-like object."); - goto error; - } - - /* Both ordinary dictionary and "raw content" dictionary should - at least 8 bytes */ - if (Py_SIZE(self->dict_content) < 8) { - PyErr_SetString(PyExc_ValueError, - "Zstandard dictionary content should at least 8 bytes."); + self->dict_buffer = PyMem_Malloc(dict_content->len); + if (!self->dict_buffer) { + PyErr_NoMemory(); goto error; } + memcpy(self->dict_buffer, dict_content->buf, dict_content->len); + self->dict_len = dict_content->len; /* Get dict_id, 0 means "raw content" dictionary. */ - self->dict_id = ZSTD_getDictID_fromDict(PyBytes_AS_STRING(self->dict_content), - Py_SIZE(self->dict_content)); + self->dict_id = ZSTD_getDictID_fromDict(self->dict_buffer, self->dict_len); /* Check validity for ordinary dictionary */ if (!is_raw && self->dict_id == 0) { - char *msg = "Invalid Zstandard dictionary and is_raw not set.\n"; - PyErr_SetString(PyExc_ValueError, msg); + PyErr_SetString(PyExc_ValueError, "invalid Zstandard dictionary"); goto error; } - // Can only track self once self->dict_content is included PyObject_GC_Track(self); - return (PyObject*)self; + return (PyObject *)self; error: Py_XDECREF(self); @@ -109,12 +107,14 @@ ZstdDict_dealloc(PyObject *ob) ZSTD_freeDDict(self->d_dict); } - /* Release dict_content after Free ZSTD_CDict/ZSTD_DDict instances */ - Py_CLEAR(self->dict_content); + assert(!PyMutex_IsLocked(&self->lock)); + + /* Release dict_buffer after freeing ZSTD_CDict/ZSTD_DDict instances */ + PyMem_Free(self->dict_buffer); Py_CLEAR(self->c_dicts); PyTypeObject *tp = Py_TYPE(self); - PyObject_GC_Del(ob); + tp->tp_free(self); Py_DECREF(tp); } @@ -125,31 +125,42 @@ PyDoc_STRVAR(ZstdDict_dictid_doc, "The special value '0' means a 'raw content' dictionary," "without any restrictions on format or content."); -PyDoc_STRVAR(ZstdDict_dictcontent_doc, -"The content of a Zstandard dictionary, as a bytes object."); - static PyObject * -ZstdDict_str(PyObject *ob) +ZstdDict_repr(PyObject *ob) { ZstdDict *dict = ZstdDict_CAST(ob); return PyUnicode_FromFormat("<ZstdDict dict_id=%u dict_size=%zd>", - dict->dict_id, Py_SIZE(dict->dict_content)); + (unsigned int)dict->dict_id, dict->dict_len); } static PyMemberDef ZstdDict_members[] = { {"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, ZstdDict_dictid_doc}, - {"dict_content", Py_T_OBJECT_EX, offsetof(ZstdDict, dict_content), Py_READONLY, ZstdDict_dictcontent_doc}, {NULL} }; /*[clinic input] -@critical_section +@getter +_zstd.ZstdDict.dict_content + +The content of a Zstandard dictionary, as a bytes object. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self) +/*[clinic end generated code: output=0d05caa5b550eabb input=4ed526d1c151c596]*/ +{ + return PyBytes_FromStringAndSize(self->dict_buffer, self->dict_len); +} + +/*[clinic input] @getter _zstd.ZstdDict.as_digested_dict Load as a digested dictionary to compressor. -Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict) +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_digested_dict) + 1. Some advanced compression parameters of compressor may be overridden by parameters of digested dictionary. 2. ZstdDict has a digested dictionaries cache for each compression level. @@ -160,19 +171,20 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digeste static PyObject * _zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self) -/*[clinic end generated code: output=09b086e7a7320dbb input=585448c79f31f74a]*/ +/*[clinic end generated code: output=09b086e7a7320dbb input=ee45e1b4a48f6f2c]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_DIGESTED); } /*[clinic input] -@critical_section @getter _zstd.ZstdDict.as_undigested_dict Load as an undigested dictionary to compressor. -Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict) +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_undigested_dict) + 1. The advanced compression parameters of compressor will not be overridden. 2. Loading an undigested dictionary is costly. If load an undigested dictionary multiple times, consider reusing a compressor object. @@ -181,19 +193,20 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undiges static PyObject * _zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self) -/*[clinic end generated code: output=43c7a989e6d4253a input=022b0829ffb1c220]*/ +/*[clinic end generated code: output=43c7a989e6d4253a input=d39210eedec76fed]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_UNDIGESTED); } /*[clinic input] -@critical_section @getter _zstd.ZstdDict.as_prefix Load as a prefix to compressor/decompressor. -Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix) +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_prefix) + 1. Prefix is compatible with long distance matching, while dictionary is not. 2. It only works for the first frame, then the compressor/decompressor will return to no prefix state. @@ -202,12 +215,13 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix) static PyObject * _zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self) -/*[clinic end generated code: output=6f7130c356595a16 input=09fb82a6a5407e87]*/ +/*[clinic end generated code: output=6f7130c356595a16 input=d59757b0b5a9551a]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_PREFIX); } static PyGetSetDef ZstdDict_getset[] = { + _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF @@ -218,8 +232,7 @@ static Py_ssize_t ZstdDict_length(PyObject *ob) { ZstdDict *self = ZstdDict_CAST(ob); - assert(PyBytes_Check(self->dict_content)); - return Py_SIZE(self->dict_content); + return self->dict_len; } static int @@ -227,7 +240,6 @@ ZstdDict_traverse(PyObject *ob, visitproc visit, void *arg) { ZstdDict *self = ZstdDict_CAST(ob); Py_VISIT(self->c_dicts); - Py_VISIT(self->dict_content); return 0; } @@ -235,7 +247,7 @@ static int ZstdDict_clear(PyObject *ob) { ZstdDict *self = ZstdDict_CAST(ob); - Py_CLEAR(self->dict_content); + Py_CLEAR(self->c_dicts); return 0; } @@ -244,7 +256,7 @@ static PyType_Slot zstddict_slots[] = { {Py_tp_getset, ZstdDict_getset}, {Py_tp_new, _zstd_ZstdDict_new}, {Py_tp_dealloc, ZstdDict_dealloc}, - {Py_tp_str, ZstdDict_str}, + {Py_tp_repr, ZstdDict_repr}, {Py_tp_doc, (void *)_zstd_ZstdDict_new__doc__}, {Py_sq_length, ZstdDict_length}, {Py_tp_traverse, ZstdDict_traverse}, |