diff options
Diffstat (limited to 'Modules/_zstd/zstddict.c')
-rw-r--r-- | Modules/_zstd/zstddict.c | 273 |
1 files changed, 273 insertions, 0 deletions
diff --git a/Modules/_zstd/zstddict.c b/Modules/_zstd/zstddict.c new file mode 100644 index 00000000000..14f74aaed46 --- /dev/null +++ b/Modules/_zstd/zstddict.c @@ -0,0 +1,273 @@ +/* Low level interface to the Zstandard algorthm & the zstd library. */ + +/* ZstdDict class definitions */ + +/*[clinic input] +module _zstd +class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dcc175ec974f81c]*/ + +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 +#endif + +#include "Python.h" + +#include "_zstdmodule.h" +#include "clinic/zstddict.c.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked + +#include <zstd.h> // ZSTD_freeDDict(), ZSTD_getDictID_fromDict() + +#define ZstdDict_CAST(op) ((ZstdDict *)op) + +/*[clinic input] +@classmethod +_zstd.ZstdDict.__new__ as _zstd_ZstdDict_new + dict_content: Py_buffer + The content of a Zstandard dictionary as a bytes-like object. + / + * + is_raw: bool = False + If true, perform no checks on *dict_content*, useful for some + advanced cases. Otherwise, check that the content represents + a Zstandard dictionary created by the zstd library or CLI. + +Represents a Zstandard dictionary. + +The dictionary can be used for compression or decompression, and can be shared +by multiple ZstdCompressor or ZstdDecompressor objects. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content, + int is_raw) +/*[clinic end generated code: output=685b7406a48b0949 input=9e8c493e31c98383]*/ +{ + /* All dictionaries must be at least 8 bytes */ + if (dict_content->len < 8) { + PyErr_SetString(PyExc_ValueError, + "Zstandard dictionary content too short " + "(must have at least eight bytes)"); + return NULL; + } + + ZstdDict* self = PyObject_GC_New(ZstdDict, type); + if (self == NULL) { + return NULL; + } + + self->d_dict = NULL; + self->dict_buffer = NULL; + self->dict_id = 0; + self->lock = (PyMutex){0}; + + /* ZSTD_CDict dict */ + self->c_dicts = PyDict_New(); + if (self->c_dicts == NULL) { + goto error; + } + + self->dict_buffer = PyMem_Malloc(dict_content->len); + if (!self->dict_buffer) { + PyErr_NoMemory(); + goto error; + } + memcpy(self->dict_buffer, dict_content->buf, dict_content->len); + self->dict_len = dict_content->len; + + /* Get dict_id, 0 means "raw content" dictionary. */ + self->dict_id = ZSTD_getDictID_fromDict(self->dict_buffer, self->dict_len); + + /* Check validity for ordinary dictionary */ + if (!is_raw && self->dict_id == 0) { + PyErr_SetString(PyExc_ValueError, "invalid Zstandard dictionary"); + goto error; + } + + PyObject_GC_Track(self); + + return (PyObject *)self; + +error: + Py_XDECREF(self); + return NULL; +} + +static void +ZstdDict_dealloc(PyObject *ob) +{ + ZstdDict *self = ZstdDict_CAST(ob); + + PyObject_GC_UnTrack(self); + + /* Free ZSTD_DDict instance */ + if (self->d_dict) { + ZSTD_freeDDict(self->d_dict); + } + + assert(!PyMutex_IsLocked(&self->lock)); + + /* Release dict_buffer after freeing ZSTD_CDict/ZSTD_DDict instances */ + PyMem_Free(self->dict_buffer); + Py_CLEAR(self->c_dicts); + + PyTypeObject *tp = Py_TYPE(self); + tp->tp_free(self); + Py_DECREF(tp); +} + +PyDoc_STRVAR(ZstdDict_dictid_doc, +"the Zstandard dictionary, an int between 0 and 2**32.\n\n" +"A non-zero value represents an ordinary Zstandard dictionary, " +"conforming to the standardised format.\n\n" +"The special value '0' means a 'raw content' dictionary," +"without any restrictions on format or content."); + +static PyObject * +ZstdDict_repr(PyObject *ob) +{ + ZstdDict *dict = ZstdDict_CAST(ob); + return PyUnicode_FromFormat("<ZstdDict dict_id=%u dict_size=%zd>", + (unsigned int)dict->dict_id, dict->dict_len); +} + +static PyMemberDef ZstdDict_members[] = { + {"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, ZstdDict_dictid_doc}, + {NULL} +}; + +/*[clinic input] +@getter +_zstd.ZstdDict.dict_content + +The content of a Zstandard dictionary, as a bytes object. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self) +/*[clinic end generated code: output=0d05caa5b550eabb input=4ed526d1c151c596]*/ +{ + return PyBytes_FromStringAndSize(self->dict_buffer, self->dict_len); +} + +/*[clinic input] +@getter +_zstd.ZstdDict.as_digested_dict + +Load as a digested dictionary to compressor. + +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_digested_dict) + +1. Some advanced compression parameters of compressor may be overridden + by parameters of digested dictionary. +2. ZstdDict has a digested dictionaries cache for each compression level. + It's faster when loading again a digested dictionary with the same + compression level. +3. No need to use this for decompression. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self) +/*[clinic end generated code: output=09b086e7a7320dbb input=ee45e1b4a48f6f2c]*/ +{ + return Py_BuildValue("Oi", self, DICT_TYPE_DIGESTED); +} + +/*[clinic input] +@getter +_zstd.ZstdDict.as_undigested_dict + +Load as an undigested dictionary to compressor. + +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_undigested_dict) + +1. The advanced compression parameters of compressor will not be overridden. +2. Loading an undigested dictionary is costly. If load an undigested dictionary + multiple times, consider reusing a compressor object. +3. No need to use this for decompression. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self) +/*[clinic end generated code: output=43c7a989e6d4253a input=d39210eedec76fed]*/ +{ + return Py_BuildValue("Oi", self, DICT_TYPE_UNDIGESTED); +} + +/*[clinic input] +@getter +_zstd.ZstdDict.as_prefix + +Load as a prefix to compressor/decompressor. + +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_prefix) + +1. Prefix is compatible with long distance matching, while dictionary is not. +2. It only works for the first frame, then the compressor/decompressor will + return to no prefix state. +3. When decompressing, must use the same prefix as when compressing." +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self) +/*[clinic end generated code: output=6f7130c356595a16 input=d59757b0b5a9551a]*/ +{ + return Py_BuildValue("Oi", self, DICT_TYPE_PREFIX); +} + +static PyGetSetDef ZstdDict_getset[] = { + _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF + _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF + _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF + _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF + {NULL} +}; + +static Py_ssize_t +ZstdDict_length(PyObject *ob) +{ + ZstdDict *self = ZstdDict_CAST(ob); + return self->dict_len; +} + +static int +ZstdDict_traverse(PyObject *ob, visitproc visit, void *arg) +{ + ZstdDict *self = ZstdDict_CAST(ob); + Py_VISIT(self->c_dicts); + return 0; +} + +static int +ZstdDict_clear(PyObject *ob) +{ + ZstdDict *self = ZstdDict_CAST(ob); + Py_CLEAR(self->c_dicts); + return 0; +} + +static PyType_Slot zstddict_slots[] = { + {Py_tp_members, ZstdDict_members}, + {Py_tp_getset, ZstdDict_getset}, + {Py_tp_new, _zstd_ZstdDict_new}, + {Py_tp_dealloc, ZstdDict_dealloc}, + {Py_tp_repr, ZstdDict_repr}, + {Py_tp_doc, (void *)_zstd_ZstdDict_new__doc__}, + {Py_sq_length, ZstdDict_length}, + {Py_tp_traverse, ZstdDict_traverse}, + {Py_tp_clear, ZstdDict_clear}, + {0, 0} +}; + +PyType_Spec zstd_dict_type_spec = { + .name = "compression.zstd.ZstdDict", + .basicsize = sizeof(ZstdDict), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE + | Py_TPFLAGS_HAVE_GC, + .slots = zstddict_slots, +}; |