diff options
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/Setup | 3 | ||||
-rw-r--r-- | Modules/Setup.stdlib.in | 5 | ||||
-rw-r--r-- | Modules/_cursesmodule.c | 63 | ||||
-rw-r--r-- | Modules/_io/bufferedio.c | 9 | ||||
-rw-r--r-- | Modules/_io/fileio.c | 3 | ||||
-rw-r--r-- | Modules/_io/textio.c | 3 | ||||
-rw-r--r-- | Modules/_remotedebuggingmodule.c (renamed from Modules/_testexternalinspection.c) | 479 | ||||
-rw-r--r-- | Modules/_zstd/_zstdmodule.c | 914 | ||||
-rw-r--r-- | Modules/_zstd/_zstdmodule.h | 204 | ||||
-rw-r--r-- | Modules/_zstd/buffer.h | 104 | ||||
-rw-r--r-- | Modules/_zstd/clinic/_zstdmodule.c.h | 432 | ||||
-rw-r--r-- | Modules/_zstd/clinic/compressor.c.h | 255 | ||||
-rw-r--r-- | Modules/_zstd/clinic/decompressor.c.h | 230 | ||||
-rw-r--r-- | Modules/_zstd/clinic/zdict.c.h | 207 | ||||
-rw-r--r-- | Modules/_zstd/compressor.c | 707 | ||||
-rw-r--r-- | Modules/_zstd/decompressor.c | 891 | ||||
-rw-r--r-- | Modules/_zstd/zdict.c | 286 | ||||
-rw-r--r-- | Modules/clinic/_cursesmodule.c.h | 56 | ||||
-rw-r--r-- | Modules/clinic/posixmodule.c.h | 6 | ||||
-rw-r--r-- | Modules/posixmodule.c | 105 |
20 files changed, 4751 insertions, 211 deletions
diff --git a/Modules/Setup b/Modules/Setup index 65c22d48ba0..f23f082d9ef 100644 --- a/Modules/Setup +++ b/Modules/Setup @@ -200,6 +200,7 @@ PYTHONPATH=$(COREPYTHONPATH) #_dbm _dbmmodule.c -lgdbm_compat -DUSE_GDBM_COMPAT #_gdbm _gdbmmodule.c -lgdbm #_lzma _lzmamodule.c -llzma +#_zstd _zstd/_zstdmodule.c -lzstd -I$(srcdir)/Modules/_zstd #_uuid _uuidmodule.c -luuid #zlib zlibmodule.c -lz @@ -286,7 +287,7 @@ PYTHONPATH=$(COREPYTHONPATH) #_testcapi _testcapimodule.c #_testimportmultiple _testimportmultiple.c #_testmultiphase _testmultiphase.c -#_testexternalinspection _testexternalinspection.c +#_remotedebugging _remotedebuggingmodule.c #_testsinglephase _testsinglephase.c # --- diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index 33e60f37d19..1512187bb09 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -33,6 +33,7 @@ # Modules that should always be present (POSIX and Windows): @MODULE_ARRAY_TRUE@array arraymodule.c @MODULE__ASYNCIO_TRUE@_asyncio _asynciomodule.c +@MODULE__REMOTEDEBUGGING_TRUE@_remotedebugging _remotedebuggingmodule.c @MODULE__BISECT_TRUE@_bisect _bisectmodule.c @MODULE__CSV_TRUE@_csv _csv.c @MODULE__HEAPQ_TRUE@_heapq _heapqmodule.c @@ -64,10 +65,11 @@ @MODULE__DECIMAL_TRUE@_decimal _decimal/_decimal.c # compression libs and binascii (optional CRC32 from zlib) -# bindings need -lbz2, -lz, or -llzma, respectively +# bindings need -lbz2, -llzma, -lzstd, or -lz, respectively @MODULE_BINASCII_TRUE@binascii binascii.c @MODULE__BZ2_TRUE@_bz2 _bz2module.c @MODULE__LZMA_TRUE@_lzma _lzmamodule.c +@MODULE__ZSTD_TRUE@_zstd _zstd/_zstdmodule.c _zstd/zdict.c _zstd/compressor.c _zstd/decompressor.c @MODULE_ZLIB_TRUE@zlib zlibmodule.c # dbm/gdbm @@ -186,7 +188,6 @@ @MODULE__TESTIMPORTMULTIPLE_TRUE@_testimportmultiple _testimportmultiple.c @MODULE__TESTMULTIPHASE_TRUE@_testmultiphase _testmultiphase.c @MODULE__TESTSINGLEPHASE_TRUE@_testsinglephase _testsinglephase.c -@MODULE__TESTEXTERNALINSPECTION_TRUE@_testexternalinspection _testexternalinspection.c @MODULE__CTYPES_TEST_TRUE@_ctypes_test _ctypes/_ctypes_test.c # Limited API template modules; must be built as shared modules. diff --git a/Modules/_cursesmodule.c b/Modules/_cursesmodule.c index bf18cb51605..cd185bc2b02 100644 --- a/Modules/_cursesmodule.c +++ b/Modules/_cursesmodule.c @@ -787,7 +787,8 @@ Window_TwoArgNoReturnFunction(wresize, int, "ii;lines,columns") static PyObject * PyCursesWindow_New(cursesmodule_state *state, - WINDOW *win, const char *encoding) + WINDOW *win, const char *encoding, + PyCursesWindowObject *orig) { if (encoding == NULL) { #if defined(MS_WINDOWS) @@ -821,6 +822,8 @@ PyCursesWindow_New(cursesmodule_state *state, PyErr_NoMemory(); return NULL; } + wo->orig = orig; + Py_XINCREF(orig); PyObject_GC_Track((PyObject *)wo); return (PyObject *)wo; } @@ -838,6 +841,7 @@ PyCursesWindow_dealloc(PyObject *self) if (wo->encoding != NULL) { PyMem_Free(wo->encoding); } + Py_XDECREF(wo->orig); window_type->tp_free(self); Py_DECREF(window_type); } @@ -846,6 +850,8 @@ static int PyCursesWindow_traverse(PyObject *self, visitproc visit, void *arg) { Py_VISIT(Py_TYPE(self)); + PyCursesWindowObject *wo = (PyCursesWindowObject *)self; + Py_VISIT(wo->orig); return 0; } @@ -1453,7 +1459,7 @@ _curses_window_derwin_impl(PyCursesWindowObject *self, int group_left_1, } cursesmodule_state *state = get_cursesmodule_state_by_win(self); - return PyCursesWindow_New(state, win, NULL); + return PyCursesWindow_New(state, win, NULL, self); } /*[clinic input] @@ -2493,7 +2499,7 @@ _curses_window_subwin_impl(PyCursesWindowObject *self, int group_left_1, } cursesmodule_state *state = get_cursesmodule_state_by_win(self); - return PyCursesWindow_New(state, win, self->encoding); + return PyCursesWindow_New(state, win, self->encoding, self); } /*[clinic input] @@ -3237,7 +3243,7 @@ _curses_getwin(PyObject *module, PyObject *file) goto error; } cursesmodule_state *state = get_cursesmodule_state(module); - res = PyCursesWindow_New(state, win, NULL); + res = PyCursesWindow_New(state, win, NULL, NULL); error: fclose(fp); @@ -3410,7 +3416,7 @@ _curses_initscr_impl(PyObject *module) if (curses_initscr_called) { wrefresh(stdscr); cursesmodule_state *state = get_cursesmodule_state(module); - return PyCursesWindow_New(state, stdscr, NULL); + return PyCursesWindow_New(state, stdscr, NULL, NULL); } win = initscr(); @@ -3514,7 +3520,7 @@ _curses_initscr_impl(PyObject *module) #undef SetDictInt cursesmodule_state *state = get_cursesmodule_state(module); - PyObject *winobj = PyCursesWindow_New(state, win, NULL); + PyObject *winobj = PyCursesWindow_New(state, win, NULL, NULL); if (winobj == NULL) { return NULL; } @@ -3898,7 +3904,7 @@ _curses_newpad_impl(PyObject *module, int nlines, int ncols) } cursesmodule_state *state = get_cursesmodule_state(module); - return PyCursesWindow_New(state, win, NULL); + return PyCursesWindow_New(state, win, NULL, NULL); } /*[clinic input] @@ -3939,7 +3945,7 @@ _curses_newwin_impl(PyObject *module, int nlines, int ncols, } cursesmodule_state *state = get_cursesmodule_state(module); - return PyCursesWindow_New(state, win, NULL); + return PyCursesWindow_New(state, win, NULL, NULL); } /*[clinic input] @@ -4720,15 +4726,12 @@ _curses_use_env_impl(PyObject *module, int flag) /*[clinic input] _curses.use_default_colors -Allow use of default values for colors on terminals supporting this feature. - -Use this to support transparency in your application. The default color -is assigned to the color number -1. +Equivalent to assume_default_colors(-1, -1). [clinic start generated code]*/ static PyObject * _curses_use_default_colors_impl(PyObject *module) -/*[clinic end generated code: output=a3b81ff71dd901be input=656844367470e8fc]*/ +/*[clinic end generated code: output=a3b81ff71dd901be input=99ff0b7c69834d1f]*/ { int code; @@ -4744,6 +4747,39 @@ _curses_use_default_colors_impl(PyObject *module) return NULL; } } + +/*[clinic input] +_curses.assume_default_colors + fg: int + bg: int + / + +Allow use of default values for colors on terminals supporting this feature. + +Assign terminal default foreground/background colors to color number -1. +Change the definition of the color-pair 0 to (fg, bg). + +Use this to support transparency in your application. +[clinic start generated code]*/ + +static PyObject * +_curses_assume_default_colors_impl(PyObject *module, int fg, int bg) +/*[clinic end generated code: output=54985397a7d2b3a5 input=7fe301712ef3e9fb]*/ +{ + int code; + + PyCursesStatefulInitialised(module); + PyCursesStatefulInitialisedColor(module); + + code = assume_default_colors(fg, bg); + if (code != ERR) { + Py_RETURN_NONE; + } else { + cursesmodule_state *state = get_cursesmodule_state(module); + PyErr_SetString(state->error, "assume_default_colors() returned ERR"); + return NULL; + } +} #endif /* STRICT_SYSV_CURSES */ @@ -4902,6 +4938,7 @@ static PyMethodDef cursesmodule_methods[] = { _CURSES_UNGET_WCH_METHODDEF _CURSES_USE_ENV_METHODDEF _CURSES_USE_DEFAULT_COLORS_METHODDEF + _CURSES_ASSUME_DEFAULT_COLORS_METHODDEF {NULL, NULL} /* sentinel */ }; diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 2189b1f3800..4724e97982f 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -2555,8 +2555,7 @@ static PyMethodDef bufferedreader_methods[] = { _IO__BUFFERED_TRUNCATE_METHODDEF _IO__BUFFERED___SIZEOF___METHODDEF - {"__reduce__", _PyIOBase_cannot_pickle, METH_NOARGS}, - {"__reduce_ex__", _PyIOBase_cannot_pickle, METH_O}, + {"__getstate__", _PyIOBase_cannot_pickle, METH_NOARGS}, {NULL, NULL} }; @@ -2615,8 +2614,7 @@ static PyMethodDef bufferedwriter_methods[] = { _IO__BUFFERED_TELL_METHODDEF _IO__BUFFERED___SIZEOF___METHODDEF - {"__reduce__", _PyIOBase_cannot_pickle, METH_NOARGS}, - {"__reduce_ex__", _PyIOBase_cannot_pickle, METH_O}, + {"__getstate__", _PyIOBase_cannot_pickle, METH_NOARGS}, {NULL, NULL} }; @@ -2733,8 +2731,7 @@ static PyMethodDef bufferedrandom_methods[] = { _IO_BUFFEREDWRITER_WRITE_METHODDEF _IO__BUFFERED___SIZEOF___METHODDEF - {"__reduce__", _PyIOBase_cannot_pickle, METH_NOARGS}, - {"__reduce_ex__", _PyIOBase_cannot_pickle, METH_O}, + {"__getstate__", _PyIOBase_cannot_pickle, METH_NOARGS}, {NULL, NULL} }; diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 0c5424954be..8fcb27049d6 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -1262,8 +1262,7 @@ static PyMethodDef fileio_methods[] = { _IO_FILEIO_ISATTY_METHODDEF {"_isatty_open_only", _io_FileIO_isatty_open_only, METH_NOARGS}, {"_dealloc_warn", fileio_dealloc_warn, METH_O, NULL}, - {"__reduce__", _PyIOBase_cannot_pickle, METH_NOARGS}, - {"__reduce_ex__", _PyIOBase_cannot_pickle, METH_O}, + {"__getstate__", _PyIOBase_cannot_pickle, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index a5b2ca7240a..86328e46a7b 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -3366,8 +3366,7 @@ static PyMethodDef textiowrapper_methods[] = { _IO_TEXTIOWRAPPER_TELL_METHODDEF _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF - {"__reduce__", _PyIOBase_cannot_pickle, METH_NOARGS}, - {"__reduce_ex__", _PyIOBase_cannot_pickle, METH_O}, + {"__getstate__", _PyIOBase_cannot_pickle, METH_NOARGS}, {NULL, NULL} }; diff --git a/Modules/_testexternalinspection.c b/Modules/_remotedebuggingmodule.c index b65c5821443..cffa9a38331 100644 --- a/Modules/_testexternalinspection.c +++ b/Modules/_remotedebuggingmodule.c @@ -80,37 +80,6 @@ _Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle) return address; } -static int -read_string( - proc_handle_t *handle, - _Py_DebugOffsets* debug_offsets, - uintptr_t address, - char* buffer, - Py_ssize_t size -) { - Py_ssize_t len; - int result = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + debug_offsets->unicode_object.length, - sizeof(Py_ssize_t), - &len - ); - if (result < 0) { - return -1; - } - if (len >= size) { - PyErr_SetString(PyExc_RuntimeError, "Buffer too small"); - return -1; - } - size_t offset = debug_offsets->unicode_object.asciiobject_size; - result = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buffer); - if (result < 0) { - return -1; - } - buffer[len] = '\0'; - return 0; -} - static inline int read_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) { @@ -152,9 +121,9 @@ read_char(proc_handle_t *handle, uintptr_t address, char *result) } static int -read_int(proc_handle_t *handle, uintptr_t address, int *result) +read_sized_int(proc_handle_t *handle, uintptr_t address, void *result, size_t size) { - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(int), result); + int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, size, result); if (res < 0) { return -1; } @@ -188,20 +157,34 @@ read_py_str( uintptr_t address, Py_ssize_t max_len ) { - assert(max_len > 0); - PyObject *result = NULL; + char *buf = NULL; + + Py_ssize_t len; + int res = _Py_RemoteDebug_ReadRemoteMemory( + handle, + address + debug_offsets->unicode_object.length, + sizeof(Py_ssize_t), + &len + ); + if (res < 0) { + goto err; + } - char *buf = (char *)PyMem_RawMalloc(max_len); + buf = (char *)PyMem_RawMalloc(len+1); if (buf == NULL) { PyErr_NoMemory(); return NULL; } - if (read_string(handle, debug_offsets, address, buf, max_len)) { + + size_t offset = debug_offsets->unicode_object.asciiobject_size; + res = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buf); + if (res < 0) { goto err; } + buf[len] = '\0'; - result = PyUnicode_FromString(buf); + result = PyUnicode_FromStringAndSize(buf, len); if (result == NULL) { goto err; } @@ -211,10 +194,63 @@ read_py_str( return result; err: + if (buf != NULL) { + PyMem_RawFree(buf); + } + return NULL; +} + +static PyObject * +read_py_bytes( + proc_handle_t *handle, + _Py_DebugOffsets* debug_offsets, + uintptr_t address +) { + PyObject *result = NULL; + char *buf = NULL; + + Py_ssize_t len; + int res = _Py_RemoteDebug_ReadRemoteMemory( + handle, + address + debug_offsets->bytes_object.ob_size, + sizeof(Py_ssize_t), + &len + ); + if (res < 0) { + goto err; + } + + buf = (char *)PyMem_RawMalloc(len+1); + if (buf == NULL) { + PyErr_NoMemory(); + return NULL; + } + + size_t offset = debug_offsets->bytes_object.ob_sval; + res = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buf); + if (res < 0) { + goto err; + } + buf[len] = '\0'; + + result = PyBytes_FromStringAndSize(buf, len); + if (result == NULL) { + goto err; + } + PyMem_RawFree(buf); + assert(result != NULL); + return result; + +err: + if (buf != NULL) { + PyMem_RawFree(buf); + } return NULL; } + + static long read_py_long(proc_handle_t *handle, _Py_DebugOffsets* offsets, uintptr_t address) { @@ -333,6 +369,15 @@ parse_task_name( } static int +parse_frame_object( + proc_handle_t *handle, + PyObject** result, + struct _Py_DebugOffsets* offsets, + uintptr_t address, + uintptr_t* previous_frame +); + +static int parse_coro_chain( proc_handle_t *handle, struct _Py_DebugOffsets* offsets, @@ -345,28 +390,22 @@ parse_coro_chain( uintptr_t gen_type_addr; int err = read_ptr( handle, - coro_address + sizeof(void*), + coro_address + offsets->pyobject.ob_type, &gen_type_addr); if (err) { return -1; } - uintptr_t gen_name_addr; - err = read_py_ptr( - handle, - coro_address + offsets->gen_object.gi_name, - &gen_name_addr); - if (err) { - return -1; - } - - PyObject *name = read_py_str( - handle, - offsets, - gen_name_addr, - 255 - ); - if (name == NULL) { + PyObject* name = NULL; + uintptr_t prev_frame; + if (parse_frame_object( + handle, + &name, + offsets, + coro_address + offsets->gen_object.gi_iframe, + &prev_frame) + < 0) + { return -1; } @@ -376,11 +415,13 @@ parse_coro_chain( } Py_DECREF(name); - int gi_frame_state; - err = read_int( + int8_t gi_frame_state; + err = read_sized_int( handle, coro_address + offsets->gen_object.gi_frame_state, - &gi_frame_state); + &gi_frame_state, + sizeof(int8_t) + ); if (err) { return -1; } @@ -427,7 +468,7 @@ parse_coro_chain( uintptr_t gi_await_addr_type_addr; int err = read_ptr( handle, - gi_await_addr + sizeof(void*), + gi_await_addr + offsets->pyobject.ob_type, &gi_await_addr_type_addr); if (err) { return -1; @@ -470,7 +511,8 @@ parse_task_awaited_by( struct _Py_DebugOffsets* offsets, struct _Py_AsyncioModuleDebugOffsets* async_offsets, uintptr_t task_address, - PyObject *awaited_by + PyObject *awaited_by, + int recurse_task ); @@ -480,7 +522,8 @@ parse_task( struct _Py_DebugOffsets* offsets, struct _Py_AsyncioModuleDebugOffsets* async_offsets, uintptr_t task_address, - PyObject *render_to + PyObject *render_to, + int recurse_task ) { char is_task; int err = read_char( @@ -508,8 +551,13 @@ parse_task( Py_DECREF(call_stack); if (is_task) { - PyObject *tn = parse_task_name( - handle, offsets, async_offsets, task_address); + PyObject *tn = NULL; + if (recurse_task) { + tn = parse_task_name( + handle, offsets, async_offsets, task_address); + } else { + tn = PyLong_FromUnsignedLongLong(task_address); + } if (tn == NULL) { goto err; } @@ -550,21 +598,23 @@ parse_task( goto err; } - PyObject *awaited_by = PyList_New(0); - if (awaited_by == NULL) { - goto err; - } - if (PyList_Append(result, awaited_by)) { + if (recurse_task) { + PyObject *awaited_by = PyList_New(0); + if (awaited_by == NULL) { + goto err; + } + if (PyList_Append(result, awaited_by)) { + Py_DECREF(awaited_by); + goto err; + } + /* we can operate on a borrowed one to simplify cleanup */ Py_DECREF(awaited_by); - goto err; - } - /* we can operate on a borrowed one to simplify cleanup */ - Py_DECREF(awaited_by); - if (parse_task_awaited_by(handle, offsets, async_offsets, - task_address, awaited_by) - ) { - goto err; + if (parse_task_awaited_by(handle, offsets, async_offsets, + task_address, awaited_by, 1) + ) { + goto err; + } } Py_DECREF(result); @@ -581,7 +631,8 @@ parse_tasks_in_set( struct _Py_DebugOffsets* offsets, struct _Py_AsyncioModuleDebugOffsets* async_offsets, uintptr_t set_addr, - PyObject *awaited_by + PyObject *awaited_by, + int recurse_task ) { uintptr_t set_obj; if (read_py_ptr( @@ -642,7 +693,9 @@ parse_tasks_in_set( offsets, async_offsets, key_addr, - awaited_by) + awaited_by, + recurse_task + ) ) { return -1; } @@ -666,7 +719,8 @@ parse_task_awaited_by( struct _Py_DebugOffsets* offsets, struct _Py_AsyncioModuleDebugOffsets* async_offsets, uintptr_t task_address, - PyObject *awaited_by + PyObject *awaited_by, + int recurse_task ) { uintptr_t task_ab_addr; int err = read_py_ptr( @@ -696,7 +750,9 @@ parse_task_awaited_by( offsets, async_offsets, task_address + async_offsets->asyncio_task_object.task_awaited_by, - awaited_by) + awaited_by, + recurse_task + ) ) { return -1; } @@ -715,7 +771,9 @@ parse_task_awaited_by( offsets, async_offsets, sub_task, - awaited_by) + awaited_by, + recurse_task + ) ) { return -1; } @@ -724,49 +782,204 @@ parse_task_awaited_by( return 0; } +typedef struct +{ + int lineno; + int end_lineno; + int column; + int end_column; +} LocationInfo; + static int -parse_code_object( - proc_handle_t *handle, - PyObject* result, - struct _Py_DebugOffsets* offsets, - uintptr_t address, - uintptr_t* previous_frame -) { - uintptr_t address_of_function_name; - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( +scan_varint(const uint8_t **ptr) +{ + unsigned int read = **ptr; + *ptr = *ptr + 1; + unsigned int val = read & 63; + unsigned int shift = 0; + while (read & 64) { + read = **ptr; + *ptr = *ptr + 1; + shift += 6; + val |= (read & 63) << shift; + } + return val; +} + +static int +scan_signed_varint(const uint8_t **ptr) +{ + unsigned int uval = scan_varint(ptr); + if (uval & 1) { + return -(int)(uval >> 1); + } + else { + return uval >> 1; + } +} + + +static bool +parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info) +{ + const uint8_t* ptr = (const uint8_t*)(linetable); + uint64_t addr = 0; + info->lineno = firstlineno; + + while (*ptr != '\0') { + // See InternalDocs/code_objects.md for where these magic numbers are from + // and for the decoding algorithm. + uint8_t first_byte = *(ptr++); + uint8_t code = (first_byte >> 3) & 15; + size_t length = (first_byte & 7) + 1; + uintptr_t end_addr = addr + length; + switch (code) { + case PY_CODE_LOCATION_INFO_NONE: { + break; + } + case PY_CODE_LOCATION_INFO_LONG: { + int line_delta = scan_signed_varint(&ptr); + info->lineno += line_delta; + info->end_lineno = info->lineno + scan_varint(&ptr); + info->column = scan_varint(&ptr) - 1; + info->end_column = scan_varint(&ptr) - 1; + break; + } + case PY_CODE_LOCATION_INFO_NO_COLUMNS: { + int line_delta = scan_signed_varint(&ptr); + info->lineno += line_delta; + info->column = info->end_column = -1; + break; + } + case PY_CODE_LOCATION_INFO_ONE_LINE0: + case PY_CODE_LOCATION_INFO_ONE_LINE1: + case PY_CODE_LOCATION_INFO_ONE_LINE2: { + int line_delta = code - 10; + info->lineno += line_delta; + info->end_lineno = info->lineno; + info->column = *(ptr++); + info->end_column = *(ptr++); + break; + } + default: { + uint8_t second_byte = *(ptr++); + assert((second_byte & 128) == 0); + info->column = code << 3 | (second_byte >> 4); + info->end_column = info->column + (second_byte & 15); + break; + } + } + if (addr <= addrq && end_addr > addrq) { + return true; + } + addr = end_addr; + } + return false; +} + +static int +read_remote_pointer(proc_handle_t *handle, uintptr_t address, uintptr_t *out_ptr, const char *error_message) +{ + int bytes_read = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(void *), out_ptr); + if (bytes_read < 0) { + return -1; + } + + if ((void *)(*out_ptr) == NULL) { + PyErr_SetString(PyExc_RuntimeError, error_message); + return -1; + } + + return 0; +} + +static int +read_instruction_ptr(proc_handle_t *handle, struct _Py_DebugOffsets *offsets, + uintptr_t current_frame, uintptr_t *instruction_ptr) +{ + return read_remote_pointer( handle, - address + offsets->code_object.name, - sizeof(void*), - &address_of_function_name + current_frame + offsets->interpreter_frame.instr_ptr, + instruction_ptr, + "No instruction ptr found" ); - if (bytes_read < 0) { +} + +static int +parse_code_object(proc_handle_t *handle, + PyObject **result, + struct _Py_DebugOffsets *offsets, + uintptr_t address, + uintptr_t current_frame, + uintptr_t *previous_frame) +{ + uintptr_t addr_func_name, addr_file_name, addr_linetable, instruction_ptr; + + if (read_remote_pointer(handle, address + offsets->code_object.qualname, &addr_func_name, "No function name found") < 0 || + read_remote_pointer(handle, address + offsets->code_object.filename, &addr_file_name, "No file name found") < 0 || + read_remote_pointer(handle, address + offsets->code_object.linetable, &addr_linetable, "No linetable found") < 0 || + read_instruction_ptr(handle, offsets, current_frame, &instruction_ptr) < 0) { + return -1; + } + + int firstlineno; + if (_Py_RemoteDebug_ReadRemoteMemory(handle, + address + offsets->code_object.firstlineno, + sizeof(int), + &firstlineno) < 0) { return -1; } - if ((void*)address_of_function_name == NULL) { - PyErr_SetString(PyExc_RuntimeError, "No function name found"); + PyObject *py_linetable = read_py_bytes(handle, offsets, addr_linetable); + if (!py_linetable) { return -1; } - PyObject* py_function_name = read_py_str( - handle, offsets, address_of_function_name, 256); - if (py_function_name == NULL) { + uintptr_t addr_code_adaptive = address + offsets->code_object.co_code_adaptive; + ptrdiff_t addrq = (uint16_t *)instruction_ptr - (uint16_t *)addr_code_adaptive; + + LocationInfo info; + parse_linetable(addrq, PyBytes_AS_STRING(py_linetable), firstlineno, &info); + Py_DECREF(py_linetable); // Done with linetable + + PyObject *py_line = PyLong_FromLong(info.lineno); + if (!py_line) { return -1; } - if (PyList_Append(result, py_function_name) == -1) { - Py_DECREF(py_function_name); + PyObject *py_func_name = read_py_str(handle, offsets, addr_func_name, 256); + if (!py_func_name) { + Py_DECREF(py_line); return -1; } - Py_DECREF(py_function_name); + PyObject *py_file_name = read_py_str(handle, offsets, addr_file_name, 256); + if (!py_file_name) { + Py_DECREF(py_line); + Py_DECREF(py_func_name); + return -1; + } + + PyObject *result_tuple = PyTuple_New(3); + if (!result_tuple) { + Py_DECREF(py_line); + Py_DECREF(py_func_name); + Py_DECREF(py_file_name); + return -1; + } + + PyTuple_SET_ITEM(result_tuple, 0, py_func_name); // steals ref + PyTuple_SET_ITEM(result_tuple, 1, py_file_name); // steals ref + PyTuple_SET_ITEM(result_tuple, 2, py_line); // steals ref + + *result = result_tuple; return 0; } static int parse_frame_object( proc_handle_t *handle, - PyObject* result, + PyObject** result, struct _Py_DebugOffsets* offsets, uintptr_t address, uintptr_t* previous_frame @@ -807,13 +1020,13 @@ parse_frame_object( } return parse_code_object( - handle, result, offsets, address_of_code_object, previous_frame); + handle, result, offsets, address_of_code_object, address, previous_frame); } static int parse_async_frame_object( proc_handle_t *handle, - PyObject* result, + PyObject** result, struct _Py_DebugOffsets* offsets, uintptr_t address, uintptr_t* previous_frame, @@ -863,7 +1076,7 @@ parse_async_frame_object( } if (parse_code_object( - handle, result, offsets, *code_object, previous_frame)) { + handle, result, offsets, *code_object, address, previous_frame)) { return -1; } @@ -1060,15 +1273,24 @@ append_awaited_by_for_thread( return -1; } - PyObject *result_item = PyTuple_New(2); + PyObject* task_id = PyLong_FromUnsignedLongLong(task_addr); + if (task_id == NULL) { + Py_DECREF(tn); + Py_DECREF(current_awaited_by); + return -1; + } + + PyObject *result_item = PyTuple_New(3); if (result_item == NULL) { Py_DECREF(tn); Py_DECREF(current_awaited_by); + Py_DECREF(task_id); return -1; } - PyTuple_SET_ITEM(result_item, 0, tn); // steals ref - PyTuple_SET_ITEM(result_item, 1, current_awaited_by); // steals ref + PyTuple_SET_ITEM(result_item, 0, task_id); // steals ref + PyTuple_SET_ITEM(result_item, 1, tn); // steals ref + PyTuple_SET_ITEM(result_item, 2, current_awaited_by); // steals ref if (PyList_Append(result, result_item)) { Py_DECREF(result_item); return -1; @@ -1076,7 +1298,7 @@ append_awaited_by_for_thread( Py_DECREF(result_item); if (parse_task_awaited_by(handle, debug_offsets, async_offsets, - task_addr, current_awaited_by)) + task_addr, current_awaited_by, 0)) { return -1; } @@ -1325,9 +1547,10 @@ get_stack_trace(PyObject* self, PyObject* args) } while ((void*)address_of_current_frame != NULL) { + PyObject* frame_info = NULL; if (parse_frame_object( handle, - result, + &frame_info, &local_debug_offsets, address_of_current_frame, &address_of_current_frame) @@ -1336,6 +1559,19 @@ get_stack_trace(PyObject* self, PyObject* args) Py_DECREF(result); goto result_err; } + + if (!frame_info) { + continue; + } + + if (PyList_Append(result, frame_info) == -1) { + Py_DECREF(result); + goto result_err; + } + + Py_DECREF(frame_info); + frame_info = NULL; + } result_err: @@ -1457,9 +1693,10 @@ get_async_stack_trace(PyObject* self, PyObject* args) uintptr_t address_of_code_object; while ((void*)address_of_current_frame != NULL) { + PyObject* frame_info = NULL; int res = parse_async_frame_object( handle, - calls, + &frame_info, &local_debug_offsets, address_of_current_frame, &address_of_current_frame, @@ -1471,6 +1708,18 @@ get_async_stack_trace(PyObject* self, PyObject* args) goto result_err; } + if (!frame_info) { + continue; + } + + if (PyList_Append(calls, frame_info) == -1) { + Py_DECREF(calls); + goto result_err; + } + + Py_DECREF(frame_info); + frame_info = NULL; + if (address_of_code_object == address_of_running_task_code_obj) { break; } @@ -1499,7 +1748,7 @@ get_async_stack_trace(PyObject* self, PyObject* args) if (parse_task_awaited_by( handle, &local_debug_offsets, &local_async_debug, - running_task_addr, awaited_by) + running_task_addr, awaited_by, 1) ) { goto result_err; } @@ -1526,13 +1775,13 @@ static PyMethodDef methods[] = { static struct PyModuleDef module = { .m_base = PyModuleDef_HEAD_INIT, - .m_name = "_testexternalinspection", + .m_name = "_remotedebugging", .m_size = -1, .m_methods = methods, }; PyMODINIT_FUNC -PyInit__testexternalinspection(void) +PyInit__remotedebugging(void) { PyObject* mod = PyModule_Create(&module); if (mod == NULL) { diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c new file mode 100644 index 00000000000..18dc13b3fd1 --- /dev/null +++ b/Modules/_zstd/_zstdmodule.c @@ -0,0 +1,914 @@ +/* +Low level interface to Meta's zstd library for use in the compression.zstd +Python module. +*/ + +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 +#endif + +#include "_zstdmodule.h" + +/*[clinic input] +module _zstd + +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b5f5587aac15c14]*/ +#include "clinic/_zstdmodule.c.h" + + +/* Format error message and set ZstdError. */ +void +set_zstd_error(const _zstd_state* const state, + error_type type, size_t zstd_ret) +{ + char *msg; + assert(ZSTD_isError(zstd_ret)); + + switch (type) + { + case ERR_DECOMPRESS: + msg = "Unable to decompress zstd data: %s"; + break; + case ERR_COMPRESS: + msg = "Unable to compress zstd data: %s"; + break; + case ERR_SET_PLEDGED_INPUT_SIZE: + msg = "Unable to set pledged uncompressed content size: %s"; + break; + + case ERR_LOAD_D_DICT: + msg = "Unable to load zstd dictionary or prefix for decompression: %s"; + break; + case ERR_LOAD_C_DICT: + msg = "Unable to load zstd dictionary or prefix for compression: %s"; + break; + + case ERR_GET_C_BOUNDS: + msg = "Unable to get zstd compression parameter bounds: %s"; + break; + case ERR_GET_D_BOUNDS: + msg = "Unable to get zstd decompression parameter bounds: %s"; + break; + case ERR_SET_C_LEVEL: + msg = "Unable to set zstd compression level: %s"; + break; + + case ERR_TRAIN_DICT: + msg = "Unable to train zstd dictionary: %s"; + break; + case ERR_FINALIZE_DICT: + msg = "Unable to finalize zstd dictionary: %s"; + break; + + default: + Py_UNREACHABLE(); + } + PyErr_Format(state->ZstdError, msg, ZSTD_getErrorName(zstd_ret)); +} + +typedef struct { + int parameter; + char parameter_name[32]; +} ParameterInfo; + +static const ParameterInfo cp_list[] = +{ + {ZSTD_c_compressionLevel, "compressionLevel"}, + {ZSTD_c_windowLog, "windowLog"}, + {ZSTD_c_hashLog, "hashLog"}, + {ZSTD_c_chainLog, "chainLog"}, + {ZSTD_c_searchLog, "searchLog"}, + {ZSTD_c_minMatch, "minMatch"}, + {ZSTD_c_targetLength, "targetLength"}, + {ZSTD_c_strategy, "strategy"}, + + {ZSTD_c_enableLongDistanceMatching, "enableLongDistanceMatching"}, + {ZSTD_c_ldmHashLog, "ldmHashLog"}, + {ZSTD_c_ldmMinMatch, "ldmMinMatch"}, + {ZSTD_c_ldmBucketSizeLog, "ldmBucketSizeLog"}, + {ZSTD_c_ldmHashRateLog, "ldmHashRateLog"}, + + {ZSTD_c_contentSizeFlag, "contentSizeFlag"}, + {ZSTD_c_checksumFlag, "checksumFlag"}, + {ZSTD_c_dictIDFlag, "dictIDFlag"}, + + {ZSTD_c_nbWorkers, "nbWorkers"}, + {ZSTD_c_jobSize, "jobSize"}, + {ZSTD_c_overlapLog, "overlapLog"} +}; + +static const ParameterInfo dp_list[] = +{ + {ZSTD_d_windowLogMax, "windowLogMax"} +}; + +void +set_parameter_error(const _zstd_state* const state, int is_compress, + int key_v, int value_v) +{ + ParameterInfo const *list; + int list_size; + char const *name; + char *type; + ZSTD_bounds bounds; + int i; + char pos_msg[128]; + + if (is_compress) { + list = cp_list; + list_size = Py_ARRAY_LENGTH(cp_list); + type = "compression"; + } + else { + list = dp_list; + list_size = Py_ARRAY_LENGTH(dp_list); + type = "decompression"; + } + + /* Find parameter's name */ + name = NULL; + for (i = 0; i < list_size; i++) { + if (key_v == (list+i)->parameter) { + name = (list+i)->parameter_name; + break; + } + } + + /* Unknown parameter */ + if (name == NULL) { + PyOS_snprintf(pos_msg, sizeof(pos_msg), + "unknown parameter (key %d)", key_v); + name = pos_msg; + } + + /* Get parameter bounds */ + if (is_compress) { + bounds = ZSTD_cParam_getBounds(key_v); + } + else { + bounds = ZSTD_dParam_getBounds(key_v); + } + if (ZSTD_isError(bounds.error)) { + PyErr_Format(state->ZstdError, + "Zstd %s parameter \"%s\" is invalid. (zstd v%s)", + type, name, ZSTD_versionString()); + return; + } + + /* Error message */ + PyErr_Format(state->ZstdError, + "Error when setting zstd %s parameter \"%s\", it " + "should %d <= value <= %d, provided value is %d. " + "(zstd v%s, %d-bit build)", + type, name, + bounds.lowerBound, bounds.upperBound, value_v, + ZSTD_versionString(), 8*(int)sizeof(Py_ssize_t)); +} + +static inline _zstd_state* +get_zstd_state(PyObject *module) +{ + void *state = PyModule_GetState(module); + assert(state != NULL); + return (_zstd_state *)state; +} + + +/*[clinic input] +_zstd._train_dict + + samples_bytes: PyBytesObject + Concatenation of samples. + samples_size_list: object(subclass_of='&PyList_Type') + List of samples' sizes. + dict_size: Py_ssize_t + The size of the dictionary. + / + +Internal function, train a zstd dictionary on sample data. +[clinic start generated code]*/ + +static PyObject * +_zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, + PyObject *samples_size_list, Py_ssize_t dict_size) +/*[clinic end generated code: output=ee53c34c8f77886b input=b21d092c695a3a81]*/ +{ + // TODO(emmatyping): The preamble and suffix to this function and _finalize_dict + // are pretty similar. We should see if we can refactor them to share that code. + Py_ssize_t chunks_number; + size_t *chunk_sizes = NULL; + PyObject *dst_dict_bytes = NULL; + size_t zstd_ret; + Py_ssize_t sizes_sum; + Py_ssize_t i; + + /* Check arguments */ + if (dict_size <= 0) { + PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number."); + return NULL; + } + + chunks_number = Py_SIZE(samples_size_list); + if ((size_t) chunks_number > UINT32_MAX) { + PyErr_Format(PyExc_ValueError, + "The number of samples should be <= %u.", UINT32_MAX); + return NULL; + } + + /* Prepare chunk_sizes */ + chunk_sizes = PyMem_New(size_t, chunks_number); + if (chunk_sizes == NULL) { + PyErr_NoMemory(); + goto error; + } + + sizes_sum = 0; + for (i = 0; i < chunks_number; i++) { + PyObject *size = PyList_GetItemRef(samples_size_list, i); + chunk_sizes[i] = PyLong_AsSize_t(size); + Py_DECREF(size); + if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) { + PyErr_Format(PyExc_ValueError, + "Items in samples_size_list should be an int " + "object, with a value between 0 and %u.", SIZE_MAX); + goto error; + } + sizes_sum += chunk_sizes[i]; + } + + if (sizes_sum != Py_SIZE(samples_bytes)) { + PyErr_SetString(PyExc_ValueError, + "The samples size list doesn't match the concatenation's size."); + goto error; + } + + /* Allocate dict buffer */ + dst_dict_bytes = PyBytes_FromStringAndSize(NULL, dict_size); + if (dst_dict_bytes == NULL) { + goto error; + } + + /* Train the dictionary */ + char *dst_dict_buffer = PyBytes_AS_STRING(dst_dict_bytes); + char *samples_buffer = PyBytes_AS_STRING(samples_bytes); + Py_BEGIN_ALLOW_THREADS + zstd_ret = ZDICT_trainFromBuffer(dst_dict_buffer, dict_size, + samples_buffer, + chunk_sizes, (uint32_t)chunks_number); + Py_END_ALLOW_THREADS + + /* Check zstd dict error */ + if (ZDICT_isError(zstd_ret)) { + _zstd_state* const mod_state = get_zstd_state(module); + set_zstd_error(mod_state, ERR_TRAIN_DICT, zstd_ret); + goto error; + } + + /* Resize dict_buffer */ + if (_PyBytes_Resize(&dst_dict_bytes, zstd_ret) < 0) { + goto error; + } + + goto success; + +error: + Py_CLEAR(dst_dict_bytes); + +success: + PyMem_Free(chunk_sizes); + return dst_dict_bytes; +} + +/*[clinic input] +_zstd._finalize_dict + + custom_dict_bytes: PyBytesObject + Custom dictionary content. + samples_bytes: PyBytesObject + Concatenation of samples. + samples_size_list: object(subclass_of='&PyList_Type') + List of samples' sizes. + dict_size: Py_ssize_t + The size of the dictionary. + compression_level: int + Optimize for a specific zstd compression level, 0 means default. + / + +Internal function, finalize a zstd dictionary. +[clinic start generated code]*/ + +static PyObject * +_zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, + PyBytesObject *samples_bytes, + PyObject *samples_size_list, Py_ssize_t dict_size, + int compression_level) +/*[clinic end generated code: output=9c2a7d8c845cee93 input=08531a803d87c56f]*/ +{ + Py_ssize_t chunks_number; + size_t *chunk_sizes = NULL; + PyObject *dst_dict_bytes = NULL; + size_t zstd_ret; + ZDICT_params_t params; + Py_ssize_t sizes_sum; + Py_ssize_t i; + + /* Check arguments */ + if (dict_size <= 0) { + PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number."); + return NULL; + } + + chunks_number = Py_SIZE(samples_size_list); + if ((size_t) chunks_number > UINT32_MAX) { + PyErr_Format(PyExc_ValueError, + "The number of samples should be <= %u.", UINT32_MAX); + return NULL; + } + + /* Prepare chunk_sizes */ + chunk_sizes = PyMem_New(size_t, chunks_number); + if (chunk_sizes == NULL) { + PyErr_NoMemory(); + goto error; + } + + sizes_sum = 0; + for (i = 0; i < chunks_number; i++) { + PyObject *size = PyList_GET_ITEM(samples_size_list, i); + chunk_sizes[i] = PyLong_AsSize_t(size); + if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) { + PyErr_Format(PyExc_ValueError, + "Items in samples_size_list should be an int " + "object, with a value between 0 and %u.", SIZE_MAX); + goto error; + } + sizes_sum += chunk_sizes[i]; + } + + if (sizes_sum != Py_SIZE(samples_bytes)) { + PyErr_SetString(PyExc_ValueError, + "The samples size list doesn't match the concatenation's size."); + goto error; + } + + /* Allocate dict buffer */ + dst_dict_bytes = PyBytes_FromStringAndSize(NULL, dict_size); + if (dst_dict_bytes == NULL) { + goto error; + } + + /* Parameters */ + + /* Optimize for a specific zstd compression level, 0 means default. */ + params.compressionLevel = compression_level; + /* Write log to stderr, 0 = none. */ + params.notificationLevel = 0; + /* Force dictID value, 0 means auto mode (32-bits random value). */ + params.dictID = 0; + + /* Finalize the dictionary */ + Py_BEGIN_ALLOW_THREADS + zstd_ret = ZDICT_finalizeDictionary( + PyBytes_AS_STRING(dst_dict_bytes), dict_size, + PyBytes_AS_STRING(custom_dict_bytes), Py_SIZE(custom_dict_bytes), + PyBytes_AS_STRING(samples_bytes), chunk_sizes, + (uint32_t)chunks_number, params); + Py_END_ALLOW_THREADS + + /* Check zstd dict error */ + if (ZDICT_isError(zstd_ret)) { + _zstd_state* const mod_state = get_zstd_state(module); + set_zstd_error(mod_state, ERR_FINALIZE_DICT, zstd_ret); + goto error; + } + + /* Resize dict_buffer */ + if (_PyBytes_Resize(&dst_dict_bytes, zstd_ret) < 0) { + goto error; + } + + goto success; + +error: + Py_CLEAR(dst_dict_bytes); + +success: + PyMem_Free(chunk_sizes); + return dst_dict_bytes; +} + + +/*[clinic input] +_zstd._get_param_bounds + + is_compress: bool + True for CParameter, False for DParameter. + parameter: int + The parameter to get bounds. + +Internal function, get CParameter/DParameter bounds. +[clinic start generated code]*/ + +static PyObject * +_zstd__get_param_bounds_impl(PyObject *module, int is_compress, + int parameter) +/*[clinic end generated code: output=b751dc710f89ef55 input=fb21ff96aff65df1]*/ +{ + ZSTD_bounds bound; + if (is_compress) { + bound = ZSTD_cParam_getBounds(parameter); + if (ZSTD_isError(bound.error)) { + _zstd_state* const mod_state = get_zstd_state(module); + set_zstd_error(mod_state, ERR_GET_C_BOUNDS, bound.error); + return NULL; + } + } + else { + bound = ZSTD_dParam_getBounds(parameter); + if (ZSTD_isError(bound.error)) { + _zstd_state* const mod_state = get_zstd_state(module); + set_zstd_error(mod_state, ERR_GET_D_BOUNDS, bound.error); + return NULL; + } + } + + return Py_BuildValue("ii", bound.lowerBound, bound.upperBound); +} + +/*[clinic input] +_zstd.get_frame_size + + frame_buffer: Py_buffer + A bytes-like object, it should start from the beginning of a frame, + and contains at least one complete frame. + +Get the size of a zstd frame, including frame header and 4-byte checksum if it has one. + +It will iterate all blocks' headers within a frame, to accumulate the frame size. +[clinic start generated code]*/ + +static PyObject * +_zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer) +/*[clinic end generated code: output=a7384c2f8780f442 input=7d3ad24311893bf3]*/ +{ + size_t frame_size; + + frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf, frame_buffer->len); + if (ZSTD_isError(frame_size)) { + _zstd_state* const mod_state = get_zstd_state(module); + PyErr_Format(mod_state->ZstdError, + "Error when finding the compressed size of a zstd frame. " + "Make sure the frame_buffer argument starts from the " + "beginning of a frame, and its length not less than this " + "complete frame. Zstd error message: %s.", + ZSTD_getErrorName(frame_size)); + return NULL; + } + + return PyLong_FromSize_t(frame_size); +} + +/*[clinic input] +_zstd._get_frame_info + + frame_buffer: Py_buffer + A bytes-like object, containing the header of a zstd frame. + +Internal function, get zstd frame infomation from a frame header. +[clinic start generated code]*/ + +static PyObject * +_zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) +/*[clinic end generated code: output=5462855464ecdf81 input=67f1f8e4b7b89c4d]*/ +{ + uint64_t decompressed_size; + uint32_t dict_id; + + /* ZSTD_getFrameContentSize */ + decompressed_size = ZSTD_getFrameContentSize(frame_buffer->buf, + frame_buffer->len); + + /* #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) + #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) */ + if (decompressed_size == ZSTD_CONTENTSIZE_ERROR) { + _zstd_state* const mod_state = get_zstd_state(module); + PyErr_SetString(mod_state->ZstdError, + "Error when getting information from the header of " + "a zstd frame. Make sure the frame_buffer argument " + "starts from the beginning of a frame, and its length " + "not less than the frame header (6~18 bytes)."); + return NULL; + } + + /* ZSTD_getDictID_fromFrame */ + dict_id = ZSTD_getDictID_fromFrame(frame_buffer->buf, frame_buffer->len); + + /* Build tuple */ + if (decompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) { + return Py_BuildValue("OI", Py_None, dict_id); + } + return Py_BuildValue("KI", decompressed_size, dict_id); +} + +/*[clinic input] +_zstd._set_parameter_types + + c_parameter_type: object(subclass_of='&PyType_Type') + CParameter IntEnum type object + d_parameter_type: object(subclass_of='&PyType_Type') + DParameter IntEnum type object + +Internal function, set CParameter/DParameter types for validity check. +[clinic start generated code]*/ + +static PyObject * +_zstd__set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, + PyObject *d_parameter_type) +/*[clinic end generated code: output=a13d4890ccbd2873 input=3e7d0d37c3a1045a]*/ +{ + _zstd_state* const mod_state = get_zstd_state(module); + + if (!PyType_Check(c_parameter_type) || !PyType_Check(d_parameter_type)) { + PyErr_SetString(PyExc_ValueError, + "The two arguments should be CParameter and " + "DParameter types."); + return NULL; + } + + Py_XDECREF(mod_state->CParameter_type); + Py_INCREF(c_parameter_type); + mod_state->CParameter_type = (PyTypeObject*) c_parameter_type; + + Py_XDECREF(mod_state->DParameter_type); + Py_INCREF(d_parameter_type); + mod_state->DParameter_type = (PyTypeObject*)d_parameter_type; + + Py_RETURN_NONE; +} + +static PyMethodDef _zstd_methods[] = { + _ZSTD__TRAIN_DICT_METHODDEF + _ZSTD__FINALIZE_DICT_METHODDEF + _ZSTD__GET_PARAM_BOUNDS_METHODDEF + _ZSTD_GET_FRAME_SIZE_METHODDEF + _ZSTD__GET_FRAME_INFO_METHODDEF + _ZSTD__SET_PARAMETER_TYPES_METHODDEF + + {0} +}; + + +#define ADD_INT_PREFIX_MACRO(module, macro) \ + do { \ + if (PyModule_AddIntConstant(module, "_" #macro, macro) < 0) { \ + return -1; \ + } \ + } while(0) + +static int +add_parameters(PyObject *module) +{ + /* If add new parameters, please also add to cp_list/dp_list above. */ + + /* Compression parameters */ + ADD_INT_PREFIX_MACRO(module, ZSTD_c_compressionLevel); + ADD_INT_PREFIX_MACRO(module, ZSTD_c_windowLog); + ADD_INT_PREFIX_MACRO(module, ZSTD_c_hashLog); + ADD_INT_PREFIX_MACRO(module, ZSTD_c_chainLog); + ADD_INT_PREFIX_MACRO(module, ZSTD_c_searchLog); + ADD_INT_PREFIX_MACRO(module, ZSTD_c_minMatch); + ADD_INT_PREFIX_MACRO(module, ZSTD_c_targetLength); + ADD_INT_PREFIX_MACRO(module, ZSTD_c_strategy); + + ADD_INT_PREFIX_MACRO(module, ZSTD_c_enableLongDistanceMatching); + ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashLog); + ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmMinMatch); + ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmBucketSizeLog); + ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashRateLog); + + ADD_INT_PREFIX_MACRO(module, ZSTD_c_contentSizeFlag); + ADD_INT_PREFIX_MACRO(module, ZSTD_c_checksumFlag); + ADD_INT_PREFIX_MACRO(module, ZSTD_c_dictIDFlag); + + ADD_INT_PREFIX_MACRO(module, ZSTD_c_nbWorkers); + ADD_INT_PREFIX_MACRO(module, ZSTD_c_jobSize); + ADD_INT_PREFIX_MACRO(module, ZSTD_c_overlapLog); + + /* Decompression parameters */ + ADD_INT_PREFIX_MACRO(module, ZSTD_d_windowLogMax); + + /* ZSTD_strategy enum */ + ADD_INT_PREFIX_MACRO(module, ZSTD_fast); + ADD_INT_PREFIX_MACRO(module, ZSTD_dfast); + ADD_INT_PREFIX_MACRO(module, ZSTD_greedy); + ADD_INT_PREFIX_MACRO(module, ZSTD_lazy); + ADD_INT_PREFIX_MACRO(module, ZSTD_lazy2); + ADD_INT_PREFIX_MACRO(module, ZSTD_btlazy2); + ADD_INT_PREFIX_MACRO(module, ZSTD_btopt); + ADD_INT_PREFIX_MACRO(module, ZSTD_btultra); + ADD_INT_PREFIX_MACRO(module, ZSTD_btultra2); + + return 0; +} + +static inline PyObject * +get_zstd_version_info(void) +{ + uint32_t ver = ZSTD_versionNumber(); + uint32_t major, minor, release; + + major = ver / 10000; + minor = (ver / 100) % 100; + release = ver % 100; + + return Py_BuildValue("III", major, minor, release); +} + +static inline int +add_vars_to_module(PyObject *module) +{ + PyObject *obj; + + /* zstd_version, a str. */ + if (PyModule_AddStringConstant(module, "zstd_version", + ZSTD_versionString()) < 0) { + return -1; + } + + /* zstd_version_info, a tuple. */ + obj = get_zstd_version_info(); + if (PyModule_AddObjectRef(module, "zstd_version_info", obj) < 0) { + Py_XDECREF(obj); + return -1; + } + Py_DECREF(obj); + + /* Add zstd parameters */ + if (add_parameters(module) < 0) { + return -1; + } + + /* _compressionLevel_values: (default, min, max) + ZSTD_defaultCLevel() was added in zstd v1.5.0 */ + obj = Py_BuildValue("iii", +#if ZSTD_VERSION_NUMBER < 10500 + ZSTD_CLEVEL_DEFAULT, +#else + ZSTD_defaultCLevel(), +#endif + ZSTD_minCLevel(), + ZSTD_maxCLevel()); + if (PyModule_AddObjectRef(module, + "_compressionLevel_values", + obj) < 0) { + Py_XDECREF(obj); + return -1; + } + Py_DECREF(obj); + + /* _ZSTD_CStreamSizes */ + obj = Py_BuildValue("II", + (uint32_t)ZSTD_CStreamInSize(), + (uint32_t)ZSTD_CStreamOutSize()); + if (PyModule_AddObjectRef(module, "_ZSTD_CStreamSizes", obj) < 0) { + Py_XDECREF(obj); + return -1; + } + Py_DECREF(obj); + + /* _ZSTD_DStreamSizes */ + obj = Py_BuildValue("II", + (uint32_t)ZSTD_DStreamInSize(), + (uint32_t)ZSTD_DStreamOutSize()); + if (PyModule_AddObjectRef(module, "_ZSTD_DStreamSizes", obj) < 0) { + Py_XDECREF(obj); + return -1; + } + Py_DECREF(obj); + + /* _ZSTD_CONFIG */ + obj = Py_BuildValue("isOOO", 8*(int)sizeof(Py_ssize_t), "c", + Py_False, + Py_True, +/* User mremap output buffer */ +#if defined(HAVE_MREMAP) + Py_True +#else + Py_False +#endif + ); + if (PyModule_AddObjectRef(module, "_ZSTD_CONFIG", obj) < 0) { + Py_XDECREF(obj); + return -1; + } + Py_DECREF(obj); + + return 0; +} + +#define ADD_STR_TO_STATE_MACRO(STR) \ + do { \ + mod_state->str_##STR = PyUnicode_FromString(#STR); \ + if (mod_state->str_##STR == NULL) { \ + return -1; \ + } \ + } while(0) + +static inline int +add_type_to_module(PyObject *module, const char *name, + PyType_Spec *type_spec, PyTypeObject **dest) +{ + PyObject *temp = PyType_FromModuleAndSpec(module, type_spec, NULL); + + if (PyModule_AddObjectRef(module, name, temp) < 0) { + Py_XDECREF(temp); + return -1; + } + + *dest = (PyTypeObject*) temp; + + return 0; +} + +static inline int +add_constant_to_type(PyTypeObject *type, const char *name, long value) +{ + PyObject *temp; + + temp = PyLong_FromLong(value); + if (temp == NULL) { + return -1; + } + + int rc = PyObject_SetAttrString((PyObject*) type, name, temp); + Py_DECREF(temp); + return rc; +} + +static int _zstd_exec(PyObject *module) { + _zstd_state* const mod_state = get_zstd_state(module); + + /* Reusable objects & variables */ + mod_state->empty_bytes = PyBytes_FromStringAndSize(NULL, 0); + if (mod_state->empty_bytes == NULL) { + return -1; + } + + mod_state->empty_readonly_memoryview = + PyMemoryView_FromMemory((char*)mod_state, 0, PyBUF_READ); + if (mod_state->empty_readonly_memoryview == NULL) { + return -1; + } + + /* Add str to module state */ + ADD_STR_TO_STATE_MACRO(read); + ADD_STR_TO_STATE_MACRO(readinto); + ADD_STR_TO_STATE_MACRO(write); + ADD_STR_TO_STATE_MACRO(flush); + + mod_state->CParameter_type = NULL; + mod_state->DParameter_type = NULL; + + /* Add variables to module */ + if (add_vars_to_module(module) < 0) { + return -1; + } + + /* ZstdError */ + mod_state->ZstdError = PyErr_NewExceptionWithDoc( + "_zstd.ZstdError", + "Call to the underlying zstd library failed.", + NULL, NULL); + if (mod_state->ZstdError == NULL) { + return -1; + } + + if (PyModule_AddObjectRef(module, "ZstdError", mod_state->ZstdError) < 0) { + Py_DECREF(mod_state->ZstdError); + return -1; + } + + /* ZstdDict */ + if (add_type_to_module(module, + "ZstdDict", + &zstddict_type_spec, + &mod_state->ZstdDict_type) < 0) { + return -1; + } + + // ZstdCompressor + if (add_type_to_module(module, + "ZstdCompressor", + &zstdcompressor_type_spec, + &mod_state->ZstdCompressor_type) < 0) { + return -1; + } + + // Add EndDirective enum to ZstdCompressor + if (add_constant_to_type(mod_state->ZstdCompressor_type, + "CONTINUE", + ZSTD_e_continue) < 0) { + return -1; + } + + if (add_constant_to_type(mod_state->ZstdCompressor_type, + "FLUSH_BLOCK", + ZSTD_e_flush) < 0) { + return -1; + } + + if (add_constant_to_type(mod_state->ZstdCompressor_type, + "FLUSH_FRAME", + ZSTD_e_end) < 0) { + return -1; + } + + // ZstdDecompressor + if (add_type_to_module(module, + "ZstdDecompressor", + &ZstdDecompressor_type_spec, + &mod_state->ZstdDecompressor_type) < 0) { + return -1; + } + + return 0; +} + +static int +_zstd_traverse(PyObject *module, visitproc visit, void *arg) +{ + _zstd_state* const mod_state = get_zstd_state(module); + + Py_VISIT(mod_state->empty_bytes); + Py_VISIT(mod_state->empty_readonly_memoryview); + Py_VISIT(mod_state->str_read); + Py_VISIT(mod_state->str_readinto); + Py_VISIT(mod_state->str_write); + Py_VISIT(mod_state->str_flush); + + Py_VISIT(mod_state->ZstdDict_type); + Py_VISIT(mod_state->ZstdCompressor_type); + + Py_VISIT(mod_state->ZstdDecompressor_type); + + Py_VISIT(mod_state->ZstdError); + + Py_VISIT(mod_state->CParameter_type); + Py_VISIT(mod_state->DParameter_type); + return 0; +} + +static int +_zstd_clear(PyObject *module) +{ + _zstd_state* const mod_state = get_zstd_state(module); + + Py_CLEAR(mod_state->empty_bytes); + Py_CLEAR(mod_state->empty_readonly_memoryview); + Py_CLEAR(mod_state->str_read); + Py_CLEAR(mod_state->str_readinto); + Py_CLEAR(mod_state->str_write); + Py_CLEAR(mod_state->str_flush); + + Py_CLEAR(mod_state->ZstdDict_type); + Py_CLEAR(mod_state->ZstdCompressor_type); + + Py_CLEAR(mod_state->ZstdDecompressor_type); + + Py_CLEAR(mod_state->ZstdError); + + Py_CLEAR(mod_state->CParameter_type); + Py_CLEAR(mod_state->DParameter_type); + return 0; +} + +static void +_zstd_free(void *module) +{ + (void)_zstd_clear((PyObject *)module); +} + +static struct PyModuleDef_Slot _zstd_slots[] = { + {Py_mod_exec, _zstd_exec}, + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, + + {0} +}; + +struct PyModuleDef _zstdmodule = { + PyModuleDef_HEAD_INIT, + .m_name = "_zstd", + .m_size = sizeof(_zstd_state), + .m_slots = _zstd_slots, + .m_methods = _zstd_methods, + .m_traverse = _zstd_traverse, + .m_clear = _zstd_clear, + .m_free = _zstd_free +}; + +PyMODINIT_FUNC +PyInit__zstd(void) +{ + return PyModuleDef_Init(&_zstdmodule); +} diff --git a/Modules/_zstd/_zstdmodule.h b/Modules/_zstd/_zstdmodule.h new file mode 100644 index 00000000000..d50f1489e6f --- /dev/null +++ b/Modules/_zstd/_zstdmodule.h @@ -0,0 +1,204 @@ +#pragma once +/* +Low level interface to Meta's zstd library for use in the compression.zstd +Python module. +*/ + +/* Declarations shared between different parts of the _zstd module*/ + +#include "Python.h" + +#include "zstd.h" +#include "zdict.h" + + +// if you update the minimum version, you should update the compile +// check in configure.ac +#define PYTHON_MINIMUM_SUPPORTED_ZSTD_VERSION 10405 + +#if ZSTD_VERSION_NUMBER < PYTHON_MINIMUM_SUPPORTED_ZSTD_VERSION + #error "_zstd module requires zstd v1.4.5+" +#endif + +/* Forward declaration of module state */ +typedef struct _zstd_state _zstd_state; + +/* Forward reference of module def */ +extern PyModuleDef _zstdmodule; + +/* For clinic type calculations */ +static inline _zstd_state * +get_zstd_state_from_type(PyTypeObject *type) { + PyObject *module = PyType_GetModuleByDef(type, &_zstdmodule); + if (module == NULL) { + return NULL; + } + void *state = PyModule_GetState(module); + assert(state != NULL); + return (_zstd_state *)state; +} + +extern PyType_Spec zstddict_type_spec; +extern PyType_Spec zstdcompressor_type_spec; +extern PyType_Spec ZstdDecompressor_type_spec; + +struct _zstd_state { + PyObject *empty_bytes; + PyObject *empty_readonly_memoryview; + PyObject *str_read; + PyObject *str_readinto; + PyObject *str_write; + PyObject *str_flush; + + PyTypeObject *ZstdDict_type; + PyTypeObject *ZstdCompressor_type; + PyTypeObject *ZstdDecompressor_type; + PyObject *ZstdError; + + PyTypeObject *CParameter_type; + PyTypeObject *DParameter_type; +}; + +typedef struct { + PyObject_HEAD + + /* Reusable compress/decompress dictionary, they are created once and + can be shared by multiple threads concurrently, since its usage is + read-only. + c_dicts is a dict, int(compressionLevel):PyCapsule(ZSTD_CDict*) */ + ZSTD_DDict *d_dict; + PyObject *c_dicts; + + /* Content of the dictionary, bytes object. */ + PyObject *dict_content; + /* Dictionary id */ + uint32_t dict_id; + + /* __init__ has been called, 0 or 1. */ + int inited; +} ZstdDict; + +typedef struct { + PyObject_HEAD + + /* Compression context */ + ZSTD_CCtx *cctx; + + /* ZstdDict object in use */ + PyObject *dict; + + /* Last mode, initialized to ZSTD_e_end */ + int last_mode; + + /* (nbWorker >= 1) ? 1 : 0 */ + int use_multithread; + + /* Compression level */ + int compression_level; + + /* __init__ has been called, 0 or 1. */ + int inited; +} ZstdCompressor; + +typedef struct { + PyObject_HEAD + + /* Decompression context */ + ZSTD_DCtx *dctx; + + /* ZstdDict object in use */ + PyObject *dict; + + /* Unconsumed input data */ + char *input_buffer; + size_t input_buffer_size; + size_t in_begin, in_end; + + /* Unused data */ + PyObject *unused_data; + + /* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */ + char needs_input; + + /* For decompress(), 0 or 1. + 1 when both input and output streams are at a frame edge, means a + frame is completely decoded and fully flushed, or the decompressor + just be initialized. */ + char at_frame_edge; + + /* For ZstdDecompressor, 0 or 1. + 1 means the end of the first frame has been reached. */ + char eof; + + /* Used for fast reset above three variables */ + char _unused_char_for_align; + + /* __init__ has been called, 0 or 1. */ + int inited; +} ZstdDecompressor; + +typedef enum { + TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class + TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function +} decompress_type; + +typedef enum { + ERR_DECOMPRESS, + ERR_COMPRESS, + ERR_SET_PLEDGED_INPUT_SIZE, + + ERR_LOAD_D_DICT, + ERR_LOAD_C_DICT, + + ERR_GET_C_BOUNDS, + ERR_GET_D_BOUNDS, + ERR_SET_C_LEVEL, + + ERR_TRAIN_DICT, + ERR_FINALIZE_DICT +} error_type; + +typedef enum { + DICT_TYPE_DIGESTED = 0, + DICT_TYPE_UNDIGESTED = 1, + DICT_TYPE_PREFIX = 2 +} dictionary_type; + +static inline int +mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out) { + return in->size == in->pos && out->size != out->pos; +} + +/* Format error message and set ZstdError. */ +extern void +set_zstd_error(const _zstd_state* const state, + const error_type type, size_t zstd_ret); + +extern void +set_parameter_error(const _zstd_state* const state, int is_compress, + int key_v, int value_v); + +static const char init_twice_msg[] = "__init__ method is called twice."; + +extern int +_PyZstd_load_c_dict(ZstdCompressor *self, PyObject *dict); + +extern int +_PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict); + +extern int +_PyZstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options, + const char *arg_name, const char *arg_type); + +extern int +_PyZstd_set_d_parameters(ZstdDecompressor *self, PyObject *options); + +extern PyObject * +decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, + Py_ssize_t max_length, + Py_ssize_t initial_size, + decompress_type type); + +extern PyObject * +compress_impl(ZstdCompressor *self, Py_buffer *data, + ZSTD_EndDirective end_directive); diff --git a/Modules/_zstd/buffer.h b/Modules/_zstd/buffer.h new file mode 100644 index 00000000000..319b1214833 --- /dev/null +++ b/Modules/_zstd/buffer.h @@ -0,0 +1,104 @@ +/* +Low level interface to Meta's zstd library for use in the compression.zstd +Python module. +*/ + +#include "_zstdmodule.h" +#include "pycore_blocks_output_buffer.h" + +/* Blocks output buffer wrapper code */ + +/* Initialize the buffer, and grow the buffer. + Return 0 on success + Return -1 on failure */ +static inline int +_OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, + Py_ssize_t max_length) +{ + /* Ensure .list was set to NULL */ + assert(buffer->list == NULL); + + Py_ssize_t res = _BlocksOutputBuffer_InitAndGrow(buffer, max_length, &ob->dst); + if (res < 0) { + return -1; + } + ob->size = (size_t) res; + ob->pos = 0; + return 0; +} + +/* Initialize the buffer, with an initial size. + init_size: the initial size. + Return 0 on success + Return -1 on failure */ +static inline int +_OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, + Py_ssize_t max_length, + Py_ssize_t init_size) +{ + Py_ssize_t block_size; + + /* Ensure .list was set to NULL */ + assert(buffer->list == NULL); + + /* Get block size */ + if (0 <= max_length && max_length < init_size) { + block_size = max_length; + } + else { + block_size = init_size; + } + + Py_ssize_t res = _BlocksOutputBuffer_InitWithSize(buffer, block_size, &ob->dst); + if (res < 0) { + return -1; + } + // Set max_length, InitWithSize doesn't do this + buffer->max_length = max_length; + ob->size = (size_t) res; + ob->pos = 0; + return 0; +} + +/* Grow the buffer. + Return 0 on success + Return -1 on failure */ +static inline int +_OutputBuffer_Grow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob) +{ + assert(ob->pos == ob->size); + Py_ssize_t res = _BlocksOutputBuffer_Grow(buffer, &ob->dst, 0); + if (res < 0) { + return -1; + } + ob->size = (size_t) res; + ob->pos = 0; + return 0; +} + +/* Finish the buffer. + Return a bytes object on success + Return NULL on failure */ +static inline PyObject * +_OutputBuffer_Finish(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob) +{ + return _BlocksOutputBuffer_Finish(buffer, ob->size - ob->pos); +} + +/* Clean up the buffer */ +static inline void +_OutputBuffer_OnError(_BlocksOutputBuffer *buffer) +{ + _BlocksOutputBuffer_OnError(buffer); +} + +/* Whether the output data has reached max_length. +The avail_out must be 0, please check it before calling. */ +static inline int +_OutputBuffer_ReachedMaxLength(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob) +{ + /* Ensure (data size == allocated size) */ + assert(ob->pos == ob->size); + + return buffer->allocated == buffer->max_length; +} diff --git a/Modules/_zstd/clinic/_zstdmodule.c.h b/Modules/_zstd/clinic/_zstdmodule.c.h new file mode 100644 index 00000000000..4b78bded67b --- /dev/null +++ b/Modules/_zstd/clinic/_zstdmodule.c.h @@ -0,0 +1,432 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_abstract.h" // _PyNumber_Index() +#include "pycore_modsupport.h" // _PyArg_CheckPositional() + +PyDoc_STRVAR(_zstd__train_dict__doc__, +"_train_dict($module, samples_bytes, samples_size_list, dict_size, /)\n" +"--\n" +"\n" +"Internal function, train a zstd dictionary on sample data.\n" +"\n" +" samples_bytes\n" +" Concatenation of samples.\n" +" samples_size_list\n" +" List of samples\' sizes.\n" +" dict_size\n" +" The size of the dictionary."); + +#define _ZSTD__TRAIN_DICT_METHODDEF \ + {"_train_dict", _PyCFunction_CAST(_zstd__train_dict), METH_FASTCALL, _zstd__train_dict__doc__}, + +static PyObject * +_zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, + PyObject *samples_size_list, Py_ssize_t dict_size); + +static PyObject * +_zstd__train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyBytesObject *samples_bytes; + PyObject *samples_size_list; + Py_ssize_t dict_size; + + if (!_PyArg_CheckPositional("_train_dict", nargs, 3, 3)) { + goto exit; + } + if (!PyBytes_Check(args[0])) { + _PyArg_BadArgument("_train_dict", "argument 1", "bytes", args[0]); + goto exit; + } + samples_bytes = (PyBytesObject *)args[0]; + if (!PyList_Check(args[1])) { + _PyArg_BadArgument("_train_dict", "argument 2", "list", args[1]); + goto exit; + } + samples_size_list = args[1]; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[2]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + dict_size = ival; + } + return_value = _zstd__train_dict_impl(module, samples_bytes, samples_size_list, dict_size); + +exit: + return return_value; +} + +PyDoc_STRVAR(_zstd__finalize_dict__doc__, +"_finalize_dict($module, custom_dict_bytes, samples_bytes,\n" +" samples_size_list, dict_size, compression_level, /)\n" +"--\n" +"\n" +"Internal function, finalize a zstd dictionary.\n" +"\n" +" custom_dict_bytes\n" +" Custom dictionary content.\n" +" samples_bytes\n" +" Concatenation of samples.\n" +" samples_size_list\n" +" List of samples\' sizes.\n" +" dict_size\n" +" The size of the dictionary.\n" +" compression_level\n" +" Optimize for a specific zstd compression level, 0 means default."); + +#define _ZSTD__FINALIZE_DICT_METHODDEF \ + {"_finalize_dict", _PyCFunction_CAST(_zstd__finalize_dict), METH_FASTCALL, _zstd__finalize_dict__doc__}, + +static PyObject * +_zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, + PyBytesObject *samples_bytes, + PyObject *samples_size_list, Py_ssize_t dict_size, + int compression_level); + +static PyObject * +_zstd__finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyBytesObject *custom_dict_bytes; + PyBytesObject *samples_bytes; + PyObject *samples_size_list; + Py_ssize_t dict_size; + int compression_level; + + if (!_PyArg_CheckPositional("_finalize_dict", nargs, 5, 5)) { + goto exit; + } + if (!PyBytes_Check(args[0])) { + _PyArg_BadArgument("_finalize_dict", "argument 1", "bytes", args[0]); + goto exit; + } + custom_dict_bytes = (PyBytesObject *)args[0]; + if (!PyBytes_Check(args[1])) { + _PyArg_BadArgument("_finalize_dict", "argument 2", "bytes", args[1]); + goto exit; + } + samples_bytes = (PyBytesObject *)args[1]; + if (!PyList_Check(args[2])) { + _PyArg_BadArgument("_finalize_dict", "argument 3", "list", args[2]); + goto exit; + } + samples_size_list = args[2]; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[3]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + dict_size = ival; + } + compression_level = PyLong_AsInt(args[4]); + if (compression_level == -1 && PyErr_Occurred()) { + goto exit; + } + return_value = _zstd__finalize_dict_impl(module, custom_dict_bytes, samples_bytes, samples_size_list, dict_size, compression_level); + +exit: + return return_value; +} + +PyDoc_STRVAR(_zstd__get_param_bounds__doc__, +"_get_param_bounds($module, /, is_compress, parameter)\n" +"--\n" +"\n" +"Internal function, get CParameter/DParameter bounds.\n" +"\n" +" is_compress\n" +" True for CParameter, False for DParameter.\n" +" parameter\n" +" The parameter to get bounds."); + +#define _ZSTD__GET_PARAM_BOUNDS_METHODDEF \ + {"_get_param_bounds", _PyCFunction_CAST(_zstd__get_param_bounds), METH_FASTCALL|METH_KEYWORDS, _zstd__get_param_bounds__doc__}, + +static PyObject * +_zstd__get_param_bounds_impl(PyObject *module, int is_compress, + int parameter); + +static PyObject * +_zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(is_compress), &_Py_ID(parameter), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"is_compress", "parameter", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_get_param_bounds", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + int is_compress; + int parameter; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 2, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + is_compress = PyObject_IsTrue(args[0]); + if (is_compress < 0) { + goto exit; + } + parameter = PyLong_AsInt(args[1]); + if (parameter == -1 && PyErr_Occurred()) { + goto exit; + } + return_value = _zstd__get_param_bounds_impl(module, is_compress, parameter); + +exit: + return return_value; +} + +PyDoc_STRVAR(_zstd_get_frame_size__doc__, +"get_frame_size($module, /, frame_buffer)\n" +"--\n" +"\n" +"Get the size of a zstd frame, including frame header and 4-byte checksum if it has one.\n" +"\n" +" frame_buffer\n" +" A bytes-like object, it should start from the beginning of a frame,\n" +" and contains at least one complete frame.\n" +"\n" +"It will iterate all blocks\' headers within a frame, to accumulate the frame size."); + +#define _ZSTD_GET_FRAME_SIZE_METHODDEF \ + {"get_frame_size", _PyCFunction_CAST(_zstd_get_frame_size), METH_FASTCALL|METH_KEYWORDS, _zstd_get_frame_size__doc__}, + +static PyObject * +_zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer); + +static PyObject * +_zstd_get_frame_size(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(frame_buffer), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"frame_buffer", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "get_frame_size", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + Py_buffer frame_buffer = {NULL, NULL}; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &frame_buffer, PyBUF_SIMPLE) != 0) { + goto exit; + } + return_value = _zstd_get_frame_size_impl(module, &frame_buffer); + +exit: + /* Cleanup for frame_buffer */ + if (frame_buffer.obj) { + PyBuffer_Release(&frame_buffer); + } + + return return_value; +} + +PyDoc_STRVAR(_zstd__get_frame_info__doc__, +"_get_frame_info($module, /, frame_buffer)\n" +"--\n" +"\n" +"Internal function, get zstd frame infomation from a frame header.\n" +"\n" +" frame_buffer\n" +" A bytes-like object, containing the header of a zstd frame."); + +#define _ZSTD__GET_FRAME_INFO_METHODDEF \ + {"_get_frame_info", _PyCFunction_CAST(_zstd__get_frame_info), METH_FASTCALL|METH_KEYWORDS, _zstd__get_frame_info__doc__}, + +static PyObject * +_zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer); + +static PyObject * +_zstd__get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(frame_buffer), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"frame_buffer", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_get_frame_info", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + Py_buffer frame_buffer = {NULL, NULL}; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &frame_buffer, PyBUF_SIMPLE) != 0) { + goto exit; + } + return_value = _zstd__get_frame_info_impl(module, &frame_buffer); + +exit: + /* Cleanup for frame_buffer */ + if (frame_buffer.obj) { + PyBuffer_Release(&frame_buffer); + } + + return return_value; +} + +PyDoc_STRVAR(_zstd__set_parameter_types__doc__, +"_set_parameter_types($module, /, c_parameter_type, d_parameter_type)\n" +"--\n" +"\n" +"Internal function, set CParameter/DParameter types for validity check.\n" +"\n" +" c_parameter_type\n" +" CParameter IntEnum type object\n" +" d_parameter_type\n" +" DParameter IntEnum type object"); + +#define _ZSTD__SET_PARAMETER_TYPES_METHODDEF \ + {"_set_parameter_types", _PyCFunction_CAST(_zstd__set_parameter_types), METH_FASTCALL|METH_KEYWORDS, _zstd__set_parameter_types__doc__}, + +static PyObject * +_zstd__set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, + PyObject *d_parameter_type); + +static PyObject * +_zstd__set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(c_parameter_type), &_Py_ID(d_parameter_type), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"c_parameter_type", "d_parameter_type", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_set_parameter_types", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject *c_parameter_type; + PyObject *d_parameter_type; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 2, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!PyObject_TypeCheck(args[0], &PyType_Type)) { + _PyArg_BadArgument("_set_parameter_types", "argument 'c_parameter_type'", (&PyType_Type)->tp_name, args[0]); + goto exit; + } + c_parameter_type = args[0]; + if (!PyObject_TypeCheck(args[1], &PyType_Type)) { + _PyArg_BadArgument("_set_parameter_types", "argument 'd_parameter_type'", (&PyType_Type)->tp_name, args[1]); + goto exit; + } + d_parameter_type = args[1]; + return_value = _zstd__set_parameter_types_impl(module, c_parameter_type, d_parameter_type); + +exit: + return return_value; +} +/*[clinic end generated code: output=077c8ea2b11fb188 input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/compressor.c.h b/Modules/_zstd/clinic/compressor.c.h new file mode 100644 index 00000000000..d7909cdf89f --- /dev/null +++ b/Modules/_zstd/clinic/compressor.c.h @@ -0,0 +1,255 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +PyDoc_STRVAR(_zstd_ZstdCompressor___init____doc__, +"ZstdCompressor(level=None, options=None, zstd_dict=None)\n" +"--\n" +"\n" +"Create a compressor object for compressing data incrementally.\n" +"\n" +" level\n" +" The compression level to use, defaults to ZSTD_CLEVEL_DEFAULT.\n" +" options\n" +" A dict object that contains advanced compression parameters.\n" +" zstd_dict\n" +" A ZstdDict object, a pre-trained zstd dictionary.\n" +"\n" +"Thread-safe at method level. For one-shot compression, use the compress()\n" +"function instead."); + +static int +_zstd_ZstdCompressor___init___impl(ZstdCompressor *self, PyObject *level, + PyObject *options, PyObject *zstd_dict); + +static int +_zstd_ZstdCompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs) +{ + int return_value = -1; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 3 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(level), &_Py_ID(options), &_Py_ID(zstd_dict), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"level", "options", "zstd_dict", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "ZstdCompressor", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[3]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; + PyObject *level = Py_None; + PyObject *options = Py_None; + PyObject *zstd_dict = Py_None; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, + /*minpos*/ 0, /*maxpos*/ 3, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!fastargs) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (fastargs[0]) { + level = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (fastargs[1]) { + options = fastargs[1]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + zstd_dict = fastargs[2]; +skip_optional_pos: + return_value = _zstd_ZstdCompressor___init___impl((ZstdCompressor *)self, level, options, zstd_dict); + +exit: + return return_value; +} + +PyDoc_STRVAR(_zstd_ZstdCompressor_compress__doc__, +"compress($self, /, data, mode=ZstdCompressor.CONTINUE)\n" +"--\n" +"\n" +"Provide data to the compressor object.\n" +"\n" +" mode\n" +" Can be these 3 values ZstdCompressor.CONTINUE,\n" +" ZstdCompressor.FLUSH_BLOCK, ZstdCompressor.FLUSH_FRAME\n" +"\n" +"Return a chunk of compressed data if possible, or b\'\' otherwise. When you have\n" +"finished providing data to the compressor, call the flush() method to finish\n" +"the compression process."); + +#define _ZSTD_ZSTDCOMPRESSOR_COMPRESS_METHODDEF \ + {"compress", _PyCFunction_CAST(_zstd_ZstdCompressor_compress), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdCompressor_compress__doc__}, + +static PyObject * +_zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data, + int mode); + +static PyObject * +_zstd_ZstdCompressor_compress(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(data), &_Py_ID(mode), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"data", "mode", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "compress", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int mode = ZSTD_e_continue; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + mode = PyLong_AsInt(args[1]); + if (mode == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = _zstd_ZstdCompressor_compress_impl((ZstdCompressor *)self, &data, mode); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} + +PyDoc_STRVAR(_zstd_ZstdCompressor_flush__doc__, +"flush($self, /, mode=ZstdCompressor.FLUSH_FRAME)\n" +"--\n" +"\n" +"Finish the compression process.\n" +"\n" +" mode\n" +" Can be these 2 values ZstdCompressor.FLUSH_FRAME,\n" +" ZstdCompressor.FLUSH_BLOCK\n" +"\n" +"Flush any remaining data left in internal buffers. Since zstd data consists\n" +"of one or more independent frames, the compressor object can still be used\n" +"after this method is called."); + +#define _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF \ + {"flush", _PyCFunction_CAST(_zstd_ZstdCompressor_flush), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdCompressor_flush__doc__}, + +static PyObject * +_zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode); + +static PyObject * +_zstd_ZstdCompressor_flush(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(mode), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"mode", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "flush", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + int mode = ZSTD_e_end; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + mode = PyLong_AsInt(args[0]); + if (mode == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = _zstd_ZstdCompressor_flush_impl((ZstdCompressor *)self, mode); + +exit: + return return_value; +} +/*[clinic end generated code: output=ef69eab155be39f6 input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/decompressor.c.h b/Modules/_zstd/clinic/decompressor.c.h new file mode 100644 index 00000000000..9359c637203 --- /dev/null +++ b/Modules/_zstd/clinic/decompressor.c.h @@ -0,0 +1,230 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_abstract.h" // _PyNumber_Index() +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +PyDoc_STRVAR(_zstd_ZstdDecompressor___init____doc__, +"ZstdDecompressor(zstd_dict=None, options=None)\n" +"--\n" +"\n" +"Create a decompressor object for decompressing data incrementally.\n" +"\n" +" zstd_dict\n" +" A ZstdDict object, a pre-trained zstd dictionary.\n" +" options\n" +" A dict object that contains advanced decompression parameters.\n" +"\n" +"Thread-safe at method level. For one-shot decompression, use the decompress()\n" +"function instead."); + +static int +_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self, + PyObject *zstd_dict, PyObject *options); + +static int +_zstd_ZstdDecompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs) +{ + int return_value = -1; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(zstd_dict), &_Py_ID(options), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"zstd_dict", "options", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "ZstdDecompressor", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; + PyObject *zstd_dict = Py_None; + PyObject *options = Py_None; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, + /*minpos*/ 0, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!fastargs) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (fastargs[0]) { + zstd_dict = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + options = fastargs[1]; +skip_optional_pos: + return_value = _zstd_ZstdDecompressor___init___impl((ZstdDecompressor *)self, zstd_dict, options); + +exit: + return return_value; +} + +PyDoc_STRVAR(_zstd_ZstdDecompressor_unused_data__doc__, +"A bytes object of un-consumed input data.\n" +"\n" +"When ZstdDecompressor object stops after a frame is\n" +"decompressed, unused input data after the frame. Otherwise this will be b\'\'."); +#if defined(_zstd_ZstdDecompressor_unused_data_DOCSTR) +# undef _zstd_ZstdDecompressor_unused_data_DOCSTR +#endif +#define _zstd_ZstdDecompressor_unused_data_DOCSTR _zstd_ZstdDecompressor_unused_data__doc__ + +#if !defined(_zstd_ZstdDecompressor_unused_data_DOCSTR) +# define _zstd_ZstdDecompressor_unused_data_DOCSTR NULL +#endif +#if defined(_ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF) +# undef _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF +# define _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF {"unused_data", (getter)_zstd_ZstdDecompressor_unused_data_get, (setter)_zstd_ZstdDecompressor_unused_data_set, _zstd_ZstdDecompressor_unused_data_DOCSTR}, +#else +# define _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF {"unused_data", (getter)_zstd_ZstdDecompressor_unused_data_get, NULL, _zstd_ZstdDecompressor_unused_data_DOCSTR}, +#endif + +static PyObject * +_zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self); + +static PyObject * +_zstd_ZstdDecompressor_unused_data_get(PyObject *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _zstd_ZstdDecompressor_unused_data_get_impl((ZstdDecompressor *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_zstd_ZstdDecompressor_decompress__doc__, +"decompress($self, /, data, max_length=-1)\n" +"--\n" +"\n" +"Decompress *data*, returning uncompressed bytes if possible, or b\'\' otherwise.\n" +"\n" +" data\n" +" A bytes-like object, zstd data to be decompressed.\n" +" max_length\n" +" Maximum size of returned data. When it is negative, the size of\n" +" output buffer is unlimited. When it is nonnegative, returns at\n" +" most max_length bytes of decompressed data.\n" +"\n" +"If *max_length* is nonnegative, returns at most *max_length* bytes of\n" +"decompressed data. If this limit is reached and further output can be\n" +"produced, *self.needs_input* will be set to ``False``. In this case, the next\n" +"call to *decompress()* may provide *data* as b\'\' to obtain more of the output.\n" +"\n" +"If all of the input data was decompressed and returned (either because this\n" +"was less than *max_length* bytes, or because *max_length* was negative),\n" +"*self.needs_input* will be set to True.\n" +"\n" +"Attempting to decompress data after the end of a frame is reached raises an\n" +"EOFError. Any data found after the end of the frame is ignored and saved in\n" +"the self.unused_data attribute."); + +#define _ZSTD_ZSTDDECOMPRESSOR_DECOMPRESS_METHODDEF \ + {"decompress", _PyCFunction_CAST(_zstd_ZstdDecompressor_decompress), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdDecompressor_decompress__doc__}, + +static PyObject * +_zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self, + Py_buffer *data, + Py_ssize_t max_length); + +static PyObject * +_zstd_ZstdDecompressor_decompress(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(data), &_Py_ID(max_length), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"data", "max_length", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "decompress", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + Py_ssize_t max_length = -1; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[1]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + max_length = ival; + } +skip_optional_pos: + return_value = _zstd_ZstdDecompressor_decompress_impl((ZstdDecompressor *)self, &data, max_length); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} +/*[clinic end generated code: output=ae703f0465a2906d input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/zdict.c.h b/Modules/_zstd/clinic/zdict.c.h new file mode 100644 index 00000000000..4e0f7b64172 --- /dev/null +++ b/Modules/_zstd/clinic/zdict.c.h @@ -0,0 +1,207 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +PyDoc_STRVAR(_zstd_ZstdDict___init____doc__, +"ZstdDict(dict_content, is_raw=False)\n" +"--\n" +"\n" +"Represents a zstd dictionary, which can be used for compression/decompression.\n" +"\n" +" dict_content\n" +" A bytes-like object, dictionary\'s content.\n" +" is_raw\n" +" This parameter is for advanced user. True means dict_content\n" +" argument is a \"raw content\" dictionary, free of any format\n" +" restriction. False means dict_content argument is an ordinary\n" +" zstd dictionary, was created by zstd functions, follow a\n" +" specified format.\n" +"\n" +"It\'s thread-safe, and can be shared by multiple ZstdCompressor /\n" +"ZstdDecompressor objects."); + +static int +_zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content, + int is_raw); + +static int +_zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) +{ + int return_value = -1; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(dict_content), &_Py_ID(is_raw), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"dict_content", "is_raw", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "ZstdDict", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; + PyObject *dict_content; + int is_raw = 0; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, + /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!fastargs) { + goto exit; + } + dict_content = fastargs[0]; + if (!noptargs) { + goto skip_optional_pos; + } + is_raw = PyObject_IsTrue(fastargs[1]); + if (is_raw < 0) { + goto exit; + } +skip_optional_pos: + return_value = _zstd_ZstdDict___init___impl((ZstdDict *)self, dict_content, is_raw); + +exit: + return return_value; +} + +PyDoc_STRVAR(_zstd_ZstdDict_as_digested_dict__doc__, +"Load as a digested dictionary to compressor.\n" +"\n" +"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict)\n" +"1. Some advanced compression parameters of compressor may be overridden\n" +" by parameters of digested dictionary.\n" +"2. ZstdDict has a digested dictionaries cache for each compression level.\n" +" It\'s faster when loading again a digested dictionary with the same\n" +" compression level.\n" +"3. No need to use this for decompression."); +#if defined(_zstd_ZstdDict_as_digested_dict_DOCSTR) +# undef _zstd_ZstdDict_as_digested_dict_DOCSTR +#endif +#define _zstd_ZstdDict_as_digested_dict_DOCSTR _zstd_ZstdDict_as_digested_dict__doc__ + +#if !defined(_zstd_ZstdDict_as_digested_dict_DOCSTR) +# define _zstd_ZstdDict_as_digested_dict_DOCSTR NULL +#endif +#if defined(_ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF) +# undef _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF +# define _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF {"as_digested_dict", (getter)_zstd_ZstdDict_as_digested_dict_get, (setter)_zstd_ZstdDict_as_digested_dict_set, _zstd_ZstdDict_as_digested_dict_DOCSTR}, +#else +# define _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF {"as_digested_dict", (getter)_zstd_ZstdDict_as_digested_dict_get, NULL, _zstd_ZstdDict_as_digested_dict_DOCSTR}, +#endif + +static PyObject * +_zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self); + +static PyObject * +_zstd_ZstdDict_as_digested_dict_get(PyObject *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _zstd_ZstdDict_as_digested_dict_get_impl((ZstdDict *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_zstd_ZstdDict_as_undigested_dict__doc__, +"Load as an undigested dictionary to compressor.\n" +"\n" +"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict)\n" +"1. The advanced compression parameters of compressor will not be overridden.\n" +"2. Loading an undigested dictionary is costly. If load an undigested dictionary\n" +" multiple times, consider reusing a compressor object.\n" +"3. No need to use this for decompression."); +#if defined(_zstd_ZstdDict_as_undigested_dict_DOCSTR) +# undef _zstd_ZstdDict_as_undigested_dict_DOCSTR +#endif +#define _zstd_ZstdDict_as_undigested_dict_DOCSTR _zstd_ZstdDict_as_undigested_dict__doc__ + +#if !defined(_zstd_ZstdDict_as_undigested_dict_DOCSTR) +# define _zstd_ZstdDict_as_undigested_dict_DOCSTR NULL +#endif +#if defined(_ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF) +# undef _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF +# define _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF {"as_undigested_dict", (getter)_zstd_ZstdDict_as_undigested_dict_get, (setter)_zstd_ZstdDict_as_undigested_dict_set, _zstd_ZstdDict_as_undigested_dict_DOCSTR}, +#else +# define _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF {"as_undigested_dict", (getter)_zstd_ZstdDict_as_undigested_dict_get, NULL, _zstd_ZstdDict_as_undigested_dict_DOCSTR}, +#endif + +static PyObject * +_zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self); + +static PyObject * +_zstd_ZstdDict_as_undigested_dict_get(PyObject *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _zstd_ZstdDict_as_undigested_dict_get_impl((ZstdDict *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_zstd_ZstdDict_as_prefix__doc__, +"Load as a prefix to compressor/decompressor.\n" +"\n" +"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix)\n" +"1. Prefix is compatible with long distance matching, while dictionary is not.\n" +"2. It only works for the first frame, then the compressor/decompressor will\n" +" return to no prefix state.\n" +"3. When decompressing, must use the same prefix as when compressing.\""); +#if defined(_zstd_ZstdDict_as_prefix_DOCSTR) +# undef _zstd_ZstdDict_as_prefix_DOCSTR +#endif +#define _zstd_ZstdDict_as_prefix_DOCSTR _zstd_ZstdDict_as_prefix__doc__ + +#if !defined(_zstd_ZstdDict_as_prefix_DOCSTR) +# define _zstd_ZstdDict_as_prefix_DOCSTR NULL +#endif +#if defined(_ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF) +# undef _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF +# define _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF {"as_prefix", (getter)_zstd_ZstdDict_as_prefix_get, (setter)_zstd_ZstdDict_as_prefix_set, _zstd_ZstdDict_as_prefix_DOCSTR}, +#else +# define _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF {"as_prefix", (getter)_zstd_ZstdDict_as_prefix_get, NULL, _zstd_ZstdDict_as_prefix_DOCSTR}, +#endif + +static PyObject * +_zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self); + +static PyObject * +_zstd_ZstdDict_as_prefix_get(PyObject *self, void *Py_UNUSED(context)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _zstd_ZstdDict_as_prefix_get_impl((ZstdDict *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} +/*[clinic end generated code: output=59257c053f74eda7 input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c new file mode 100644 index 00000000000..d0f677be821 --- /dev/null +++ b/Modules/_zstd/compressor.c @@ -0,0 +1,707 @@ +/* +Low level interface to Meta's zstd library for use in the compression.zstd +Python module. +*/ + +/* ZstdCompressor class definitions */ + +/*[clinic input] +module _zstd +class _zstd.ZstdCompressor "ZstdCompressor *" "clinic_state()->ZstdCompressor_type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=875bf614798f80cb]*/ + + +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 +#endif + +#include "_zstdmodule.h" + +#include "buffer.h" + +#include <stddef.h> // offsetof() + + +#define ZstdCompressor_CAST(op) ((ZstdCompressor *)op) + +int +_PyZstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options, + const char *arg_name, const char* arg_type) +{ + size_t zstd_ret; + _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state == NULL) { + return -1; + } + + /* Integer compression level */ + if (PyLong_Check(level_or_options)) { + int level = PyLong_AsInt(level_or_options); + if (level == -1 && PyErr_Occurred()) { + PyErr_Format(PyExc_ValueError, + "Compression level should be an int value between %d and %d.", + ZSTD_minCLevel(), ZSTD_maxCLevel()); + return -1; + } + + /* Save for generating ZSTD_CDICT */ + self->compression_level = level; + + /* Set compressionLevel to compression context */ + zstd_ret = ZSTD_CCtx_setParameter(self->cctx, + ZSTD_c_compressionLevel, + level); + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret); + return -1; + } + return 0; + } + + /* Options dict */ + if (PyDict_Check(level_or_options)) { + PyObject *key, *value; + Py_ssize_t pos = 0; + + while (PyDict_Next(level_or_options, &pos, &key, &value)) { + /* Check key type */ + if (Py_TYPE(key) == mod_state->DParameter_type) { + PyErr_SetString(PyExc_TypeError, + "Key of compression option dict should " + "NOT be DParameter."); + return -1; + } + + int key_v = PyLong_AsInt(key); + if (key_v == -1 && PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Key of options dict should be a CParameter attribute."); + return -1; + } + + // TODO(emmatyping): check bounds when there is a value error here for better + // error message? + int value_v = PyLong_AsInt(value); + if (value_v == -1 && PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Value of option dict should be an int."); + return -1; + } + + if (key_v == ZSTD_c_compressionLevel) { + /* Save for generating ZSTD_CDICT */ + self->compression_level = value_v; + } + else if (key_v == ZSTD_c_nbWorkers) { + /* From zstd library doc: + 1. When nbWorkers >= 1, triggers asynchronous mode when + used with ZSTD_compressStream2(). + 2, Default value is `0`, aka "single-threaded mode" : no + worker is spawned, compression is performed inside + caller's thread, all invocations are blocking. */ + if (value_v != 0) { + self->use_multithread = 1; + } + } + + /* Set parameter to compression context */ + zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); + if (ZSTD_isError(zstd_ret)) { + set_parameter_error(mod_state, 1, key_v, value_v); + return -1; + } + } + return 0; + } + PyErr_Format(PyExc_TypeError, "Invalid type for %s. Expected %s", arg_name, arg_type); + return -1; +} + +static void +capsule_free_cdict(PyObject *capsule) +{ + ZSTD_CDict *cdict = PyCapsule_GetPointer(capsule, NULL); + ZSTD_freeCDict(cdict); +} + +ZSTD_CDict * +_get_CDict(ZstdDict *self, int compressionLevel) +{ + PyObject *level = NULL; + PyObject *capsule; + ZSTD_CDict *cdict; + + // TODO(emmatyping): refactor critical section code into a lock_held function + Py_BEGIN_CRITICAL_SECTION(self); + + /* int level object */ + level = PyLong_FromLong(compressionLevel); + if (level == NULL) { + goto error; + } + + /* Get PyCapsule object from self->c_dicts */ + capsule = PyDict_GetItemWithError(self->c_dicts, level); + if (capsule == NULL) { + if (PyErr_Occurred()) { + goto error; + } + + /* Create ZSTD_CDict instance */ + char *dict_buffer = PyBytes_AS_STRING(self->dict_content); + Py_ssize_t dict_len = Py_SIZE(self->dict_content); + Py_BEGIN_ALLOW_THREADS + cdict = ZSTD_createCDict(dict_buffer, + dict_len, + compressionLevel); + Py_END_ALLOW_THREADS + + if (cdict == NULL) { + _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state != NULL) { + PyErr_SetString(mod_state->ZstdError, + "Failed to create ZSTD_CDict instance from zstd " + "dictionary content. Maybe the content is corrupted."); + } + goto error; + } + + /* Put ZSTD_CDict instance into PyCapsule object */ + capsule = PyCapsule_New(cdict, NULL, capsule_free_cdict); + if (capsule == NULL) { + ZSTD_freeCDict(cdict); + goto error; + } + + /* Add PyCapsule object to self->c_dicts */ + if (PyDict_SetItem(self->c_dicts, level, capsule) < 0) { + Py_DECREF(capsule); + goto error; + } + Py_DECREF(capsule); + } + else { + /* ZSTD_CDict instance already exists */ + cdict = PyCapsule_GetPointer(capsule, NULL); + } + goto success; + +error: + cdict = NULL; +success: + Py_XDECREF(level); + Py_END_CRITICAL_SECTION(); + return cdict; +} + +int +_PyZstd_load_c_dict(ZstdCompressor *self, PyObject *dict) { + + size_t zstd_ret; + _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state == NULL) { + return -1; + } + ZstdDict *zd; + int type, ret; + + /* Check ZstdDict */ + ret = PyObject_IsInstance(dict, (PyObject*)mod_state->ZstdDict_type); + if (ret < 0) { + return -1; + } + else if (ret > 0) { + /* When compressing, use undigested dictionary by default. */ + zd = (ZstdDict*)dict; + type = DICT_TYPE_UNDIGESTED; + goto load; + } + + /* Check (ZstdDict, type) */ + if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2) { + /* Check ZstdDict */ + ret = PyObject_IsInstance(PyTuple_GET_ITEM(dict, 0), + (PyObject*)mod_state->ZstdDict_type); + if (ret < 0) { + return -1; + } + else if (ret > 0) { + /* type == -1 may indicate an error. */ + type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); + if (type == DICT_TYPE_DIGESTED || + type == DICT_TYPE_UNDIGESTED || + type == DICT_TYPE_PREFIX) + { + assert(type >= 0); + zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); + goto load; + } + } + } + + /* Wrong type */ + PyErr_SetString(PyExc_TypeError, + "zstd_dict argument should be ZstdDict object."); + return -1; + +load: + if (type == DICT_TYPE_DIGESTED) { + /* Get ZSTD_CDict */ + ZSTD_CDict *c_dict = _get_CDict(zd, self->compression_level); + if (c_dict == NULL) { + return -1; + } + /* Reference a prepared dictionary. + It overrides some compression context's parameters. */ + Py_BEGIN_CRITICAL_SECTION(self); + zstd_ret = ZSTD_CCtx_refCDict(self->cctx, c_dict); + Py_END_CRITICAL_SECTION(); + } + else if (type == DICT_TYPE_UNDIGESTED) { + /* Load a dictionary. + It doesn't override compression context's parameters. */ + Py_BEGIN_CRITICAL_SECTION2(self, zd); + zstd_ret = ZSTD_CCtx_loadDictionary( + self->cctx, + PyBytes_AS_STRING(zd->dict_content), + Py_SIZE(zd->dict_content)); + Py_END_CRITICAL_SECTION2(); + } + else if (type == DICT_TYPE_PREFIX) { + /* Load a prefix */ + Py_BEGIN_CRITICAL_SECTION2(self, zd); + zstd_ret = ZSTD_CCtx_refPrefix( + self->cctx, + PyBytes_AS_STRING(zd->dict_content), + Py_SIZE(zd->dict_content)); + Py_END_CRITICAL_SECTION2(); + } + else { + Py_UNREACHABLE(); + } + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + set_zstd_error(mod_state, ERR_LOAD_C_DICT, zstd_ret); + return -1; + } + return 0; +} + +#define clinic_state() (get_zstd_state_from_type(type)) +#include "clinic/compressor.c.h" +#undef clinic_state + +static PyObject * +_zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs)) +{ + ZstdCompressor *self; + self = PyObject_GC_New(ZstdCompressor, type); + if (self == NULL) { + goto error; + } + + self->inited = 0; + self->dict = NULL; + self->use_multithread = 0; + + + /* Compression context */ + self->cctx = ZSTD_createCCtx(); + if (self->cctx == NULL) { + _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state != NULL) { + PyErr_SetString(mod_state->ZstdError, + "Unable to create ZSTD_CCtx instance."); + } + goto error; + } + + /* Last mode */ + self->last_mode = ZSTD_e_end; + + return (PyObject*)self; + +error: + if (self != NULL) { + PyObject_GC_Del(self); + } + return NULL; +} + +static void +ZstdCompressor_dealloc(PyObject *ob) +{ + ZstdCompressor *self = ZstdCompressor_CAST(ob); + + PyObject_GC_UnTrack(self); + + /* Free compression context */ + ZSTD_freeCCtx(self->cctx); + + /* Py_XDECREF the dict after free the compression context */ + Py_CLEAR(self->dict); + + PyTypeObject *tp = Py_TYPE(self); + PyObject_GC_Del(ob); + Py_DECREF(tp); +} + +/*[clinic input] +_zstd.ZstdCompressor.__init__ + + level: object = None + The compression level to use, defaults to ZSTD_CLEVEL_DEFAULT. + options: object = None + A dict object that contains advanced compression parameters. + zstd_dict: object = None + A ZstdDict object, a pre-trained zstd dictionary. + +Create a compressor object for compressing data incrementally. + +Thread-safe at method level. For one-shot compression, use the compress() +function instead. +[clinic start generated code]*/ + +static int +_zstd_ZstdCompressor___init___impl(ZstdCompressor *self, PyObject *level, + PyObject *options, PyObject *zstd_dict) +/*[clinic end generated code: output=215e6c4342732f96 input=9f79b0d8d34c8ef0]*/ +{ + /* Only called once */ + if (self->inited) { + PyErr_SetString(PyExc_RuntimeError, init_twice_msg); + return -1; + } + self->inited = 1; + + if (level != Py_None && options != Py_None) { + PyErr_SetString(PyExc_RuntimeError, "Only one of level or options should be used."); + return -1; + } + + /* Set compressLevel/options to compression context */ + if (level != Py_None) { + if (_PyZstd_set_c_parameters(self, level, "level", "int") < 0) { + return -1; + } + } + + if (options != Py_None) { + if (_PyZstd_set_c_parameters(self, options, "options", "dict") < 0) { + return -1; + } + } + + /* Load dictionary to compression context */ + if (zstd_dict != Py_None) { + if (_PyZstd_load_c_dict(self, zstd_dict) < 0) { + return -1; + } + + /* Py_INCREF the dict */ + Py_INCREF(zstd_dict); + self->dict = zstd_dict; + } + + // We can only start tracking self with the GC once self->dict is set. + PyObject_GC_Track(self); + return 0; +} + +PyObject * +compress_impl(ZstdCompressor *self, Py_buffer *data, + ZSTD_EndDirective end_directive) +{ + ZSTD_inBuffer in; + ZSTD_outBuffer out; + _BlocksOutputBuffer buffer = {.list = NULL}; + size_t zstd_ret; + PyObject *ret; + + /* Prepare input & output buffers */ + if (data != NULL) { + in.src = data->buf; + in.size = data->len; + in.pos = 0; + } + else { + in.src = ∈ + in.size = 0; + in.pos = 0; + } + + /* Calculate output buffer's size */ + size_t output_buffer_size = ZSTD_compressBound(in.size); + if (output_buffer_size > (size_t) PY_SSIZE_T_MAX) { + PyErr_NoMemory(); + goto error; + } + + if (_OutputBuffer_InitWithSize(&buffer, &out, -1, + (Py_ssize_t) output_buffer_size) < 0) { + goto error; + } + + + /* zstd stream compress */ + while (1) { + Py_BEGIN_ALLOW_THREADS + zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, end_directive); + Py_END_ALLOW_THREADS + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state != NULL) { + set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); + } + goto error; + } + + /* Finished */ + if (zstd_ret == 0) { + break; + } + + /* Output buffer should be exhausted, grow the buffer. */ + assert(out.pos == out.size); + if (out.pos == out.size) { + if (_OutputBuffer_Grow(&buffer, &out) < 0) { + goto error; + } + } + } + + /* Return a bytes object */ + ret = _OutputBuffer_Finish(&buffer, &out); + if (ret != NULL) { + return ret; + } + +error: + _OutputBuffer_OnError(&buffer); + return NULL; +} + +static PyObject * +compress_mt_continue_impl(ZstdCompressor *self, Py_buffer *data) +{ + ZSTD_inBuffer in; + ZSTD_outBuffer out; + _BlocksOutputBuffer buffer = {.list = NULL}; + size_t zstd_ret; + PyObject *ret; + + /* Prepare input & output buffers */ + in.src = data->buf; + in.size = data->len; + in.pos = 0; + + if (_OutputBuffer_InitAndGrow(&buffer, &out, -1) < 0) { + goto error; + } + + /* zstd stream compress */ + while (1) { + Py_BEGIN_ALLOW_THREADS + do { + zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, ZSTD_e_continue); + } while (out.pos != out.size && in.pos != in.size && !ZSTD_isError(zstd_ret)); + Py_END_ALLOW_THREADS + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state != NULL) { + set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); + } + goto error; + } + + /* Like compress_impl(), output as much as possible. */ + if (out.pos == out.size) { + if (_OutputBuffer_Grow(&buffer, &out) < 0) { + goto error; + } + } + else if (in.pos == in.size) { + /* Finished */ + assert(mt_continue_should_break(&in, &out)); + break; + } + } + + /* Return a bytes object */ + ret = _OutputBuffer_Finish(&buffer, &out); + if (ret != NULL) { + return ret; + } + +error: + _OutputBuffer_OnError(&buffer); + return NULL; +} + +/*[clinic input] +_zstd.ZstdCompressor.compress + + data: Py_buffer + mode: int(c_default="ZSTD_e_continue") = ZstdCompressor.CONTINUE + Can be these 3 values ZstdCompressor.CONTINUE, + ZstdCompressor.FLUSH_BLOCK, ZstdCompressor.FLUSH_FRAME + +Provide data to the compressor object. + +Return a chunk of compressed data if possible, or b'' otherwise. When you have +finished providing data to the compressor, call the flush() method to finish +the compression process. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data, + int mode) +/*[clinic end generated code: output=ed7982d1cf7b4f98 input=ac2c21d180f579ea]*/ +{ + PyObject *ret; + + /* Check mode value */ + if (mode != ZSTD_e_continue && + mode != ZSTD_e_flush && + mode != ZSTD_e_end) + { + PyErr_SetString(PyExc_ValueError, + "mode argument wrong value, it should be one of " + "ZstdCompressor.CONTINUE, ZstdCompressor.FLUSH_BLOCK, " + "ZstdCompressor.FLUSH_FRAME."); + return NULL; + } + + /* Thread-safe code */ + Py_BEGIN_CRITICAL_SECTION(self); + + /* Compress */ + if (self->use_multithread && mode == ZSTD_e_continue) { + ret = compress_mt_continue_impl(self, data); + } + else { + ret = compress_impl(self, data, mode); + } + + if (ret) { + self->last_mode = mode; + } + else { + self->last_mode = ZSTD_e_end; + + /* Resetting cctx's session never fail */ + ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); + } + Py_END_CRITICAL_SECTION(); + + return ret; +} + +/*[clinic input] +_zstd.ZstdCompressor.flush + + mode: int(c_default="ZSTD_e_end") = ZstdCompressor.FLUSH_FRAME + Can be these 2 values ZstdCompressor.FLUSH_FRAME, + ZstdCompressor.FLUSH_BLOCK + +Finish the compression process. + +Flush any remaining data left in internal buffers. Since zstd data consists +of one or more independent frames, the compressor object can still be used +after this method is called. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) +/*[clinic end generated code: output=b7cf2c8d64dcf2e3 input=a766870301932b85]*/ +{ + PyObject *ret; + + /* Check mode value */ + if (mode != ZSTD_e_end && mode != ZSTD_e_flush) { + PyErr_SetString(PyExc_ValueError, + "mode argument wrong value, it should be " + "ZstdCompressor.FLUSH_FRAME or " + "ZstdCompressor.FLUSH_BLOCK."); + return NULL; + } + + /* Thread-safe code */ + Py_BEGIN_CRITICAL_SECTION(self); + ret = compress_impl(self, NULL, mode); + + if (ret) { + self->last_mode = mode; + } + else { + self->last_mode = ZSTD_e_end; + + /* Resetting cctx's session never fail */ + ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); + } + Py_END_CRITICAL_SECTION(); + + return ret; +} + +static PyMethodDef ZstdCompressor_methods[] = { + _ZSTD_ZSTDCOMPRESSOR_COMPRESS_METHODDEF + _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF + + {0} +}; + +PyDoc_STRVAR(ZstdCompressor_last_mode_doc, +"The last mode used to this compressor object, its value can be .CONTINUE,\n" +".FLUSH_BLOCK, .FLUSH_FRAME. Initialized to .FLUSH_FRAME.\n\n" +"It can be used to get the current state of a compressor, such as, data flushed,\n" +"a frame ended."); + +static PyMemberDef ZstdCompressor_members[] = { + {"last_mode", Py_T_INT, offsetof(ZstdCompressor, last_mode), + Py_READONLY, ZstdCompressor_last_mode_doc}, + {0} +}; + +static int +ZstdCompressor_traverse(PyObject *ob, visitproc visit, void *arg) +{ + ZstdCompressor *self = ZstdCompressor_CAST(ob); + Py_VISIT(self->dict); + return 0; +} + +static int +ZstdCompressor_clear(PyObject *ob) +{ + ZstdCompressor *self = ZstdCompressor_CAST(ob); + Py_CLEAR(self->dict); + return 0; +} + +static PyType_Slot zstdcompressor_slots[] = { + {Py_tp_new, _zstd_ZstdCompressor_new}, + {Py_tp_dealloc, ZstdCompressor_dealloc}, + {Py_tp_init, _zstd_ZstdCompressor___init__}, + {Py_tp_methods, ZstdCompressor_methods}, + {Py_tp_members, ZstdCompressor_members}, + {Py_tp_doc, (char*)_zstd_ZstdCompressor___init____doc__}, + {Py_tp_traverse, ZstdCompressor_traverse}, + {Py_tp_clear, ZstdCompressor_clear}, + {0} +}; + +PyType_Spec zstdcompressor_type_spec = { + .name = "_zstd.ZstdCompressor", + .basicsize = sizeof(ZstdCompressor), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + .slots = zstdcompressor_slots, +}; diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c new file mode 100644 index 00000000000..4e3a28068be --- /dev/null +++ b/Modules/_zstd/decompressor.c @@ -0,0 +1,891 @@ +/* +Low level interface to Meta's zstd library for use in the compression.zstd +Python module. +*/ + +/* ZstdDecompressor class definition */ + +/*[clinic input] +module _zstd +class _zstd.ZstdDecompressor "ZstdDecompressor *" "clinic_state()->ZstdDecompressor_type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4e6eae327c0c0c76]*/ + +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 +#endif + +#include "_zstdmodule.h" + +#include "buffer.h" + +#include <stddef.h> // offsetof() + +#define ZstdDecompressor_CAST(op) ((ZstdDecompressor *)op) + +static inline ZSTD_DDict * +_get_DDict(ZstdDict *self) +{ + ZSTD_DDict *ret; + + /* Already created */ + if (self->d_dict != NULL) { + return self->d_dict; + } + + Py_BEGIN_CRITICAL_SECTION(self); + if (self->d_dict == NULL) { + /* Create ZSTD_DDict instance from dictionary content */ + char *dict_buffer = PyBytes_AS_STRING(self->dict_content); + Py_ssize_t dict_len = Py_SIZE(self->dict_content); + Py_BEGIN_ALLOW_THREADS + self->d_dict = ZSTD_createDDict(dict_buffer, + dict_len); + Py_END_ALLOW_THREADS + + if (self->d_dict == NULL) { + _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state != NULL) { + PyErr_SetString(mod_state->ZstdError, + "Failed to create ZSTD_DDict instance from zstd " + "dictionary content. Maybe the content is corrupted."); + } + } + } + + /* Don't lose any exception */ + ret = self->d_dict; + Py_END_CRITICAL_SECTION(); + + return ret; +} + +/* Set decompression parameters to decompression context */ +int +_PyZstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) +{ + size_t zstd_ret; + PyObject *key, *value; + Py_ssize_t pos; + _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state == NULL) { + return -1; + } + + if (!PyDict_Check(options)) { + PyErr_SetString(PyExc_TypeError, + "options argument should be dict object."); + return -1; + } + + pos = 0; + while (PyDict_Next(options, &pos, &key, &value)) { + /* Check key type */ + if (Py_TYPE(key) == mod_state->CParameter_type) { + PyErr_SetString(PyExc_TypeError, + "Key of decompression options dict should " + "NOT be CParameter."); + return -1; + } + + /* Both key & value should be 32-bit signed int */ + int key_v = PyLong_AsInt(key); + if (key_v == -1 && PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Key of options dict should be a DParameter attribute."); + return -1; + } + + // TODO(emmatyping): check bounds when there is a value error here for better + // error message? + int value_v = PyLong_AsInt(value); + if (value_v == -1 && PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Value of options dict should be an int."); + return -1; + } + + /* Set parameter to compression context */ + Py_BEGIN_CRITICAL_SECTION(self); + zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v); + Py_END_CRITICAL_SECTION(); + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + set_parameter_error(mod_state, 0, key_v, value_v); + return -1; + } + } + return 0; +} + +/* Load dictionary or prefix to decompression context */ +int +_PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) +{ + size_t zstd_ret; + _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state == NULL) { + return -1; + } + ZstdDict *zd; + int type, ret; + + /* Check ZstdDict */ + ret = PyObject_IsInstance(dict, (PyObject*)mod_state->ZstdDict_type); + if (ret < 0) { + return -1; + } + else if (ret > 0) { + /* When decompressing, use digested dictionary by default. */ + zd = (ZstdDict*)dict; + type = DICT_TYPE_DIGESTED; + goto load; + } + + /* Check (ZstdDict, type) */ + if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2) { + /* Check ZstdDict */ + ret = PyObject_IsInstance(PyTuple_GET_ITEM(dict, 0), + (PyObject*)mod_state->ZstdDict_type); + if (ret < 0) { + return -1; + } + else if (ret > 0) { + /* type == -1 may indicate an error. */ + type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); + if (type == DICT_TYPE_DIGESTED || + type == DICT_TYPE_UNDIGESTED || + type == DICT_TYPE_PREFIX) + { + assert(type >= 0); + zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); + goto load; + } + } + } + + /* Wrong type */ + PyErr_SetString(PyExc_TypeError, + "zstd_dict argument should be ZstdDict object."); + return -1; + +load: + if (type == DICT_TYPE_DIGESTED) { + /* Get ZSTD_DDict */ + ZSTD_DDict *d_dict = _get_DDict(zd); + if (d_dict == NULL) { + return -1; + } + /* Reference a prepared dictionary */ + Py_BEGIN_CRITICAL_SECTION(self); + zstd_ret = ZSTD_DCtx_refDDict(self->dctx, d_dict); + Py_END_CRITICAL_SECTION(); + } + else if (type == DICT_TYPE_UNDIGESTED) { + /* Load a dictionary */ + Py_BEGIN_CRITICAL_SECTION2(self, zd); + zstd_ret = ZSTD_DCtx_loadDictionary( + self->dctx, + PyBytes_AS_STRING(zd->dict_content), + Py_SIZE(zd->dict_content)); + Py_END_CRITICAL_SECTION2(); + } + else if (type == DICT_TYPE_PREFIX) { + /* Load a prefix */ + Py_BEGIN_CRITICAL_SECTION2(self, zd); + zstd_ret = ZSTD_DCtx_refPrefix( + self->dctx, + PyBytes_AS_STRING(zd->dict_content), + Py_SIZE(zd->dict_content)); + Py_END_CRITICAL_SECTION2(); + } + else { + /* Impossible code path */ + PyErr_SetString(PyExc_SystemError, + "load_d_dict() impossible code path"); + return -1; + } + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + set_zstd_error(mod_state, ERR_LOAD_D_DICT, zstd_ret); + return -1; + } + return 0; +} + + + +/* + Given the two types of decompressors (defined in _zstdmodule.h): + + typedef enum { + TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class + TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function + } decompress_type; + + Decompress implementation for <D>, <E>, pseudo code: + + initialize_output_buffer + while True: + decompress_data + set_object_flag # .eof for <D>, .at_frame_edge for <E>. + + if output_buffer_exhausted: + if output_buffer_reached_max_length: + finish + grow_output_buffer + elif input_buffer_exhausted: + finish + + ZSTD_decompressStream()'s size_t return value: + - 0 when a frame is completely decoded and fully flushed, zstd's internal + buffer has no data. + - An error code, which can be tested using ZSTD_isError(). + - Or any other value > 0, which means there is still some decoding or + flushing to do to complete current frame. + + Note, decompressing "an empty input" in any case will make it > 0. + + <E> supports multiple frames, has an .at_frame_edge flag, it means both the + input and output streams are at a frame edge. The flag can be set by this + statement: + + .at_frame_edge = (zstd_ret == 0) ? 1 : 0 + + But if decompressing "an empty input" at "a frame edge", zstd_ret will be + non-zero, then .at_frame_edge will be wrongly set to false. To solve this + problem, two AFE checks are needed to ensure that: when at "a frame edge", + empty input will not be decompressed. + + // AFE check + if (self->at_frame_edge && in->pos == in->size) { + finish + } + + In <E>, if .at_frame_edge is eventually set to true, but input stream has + unconsumed data (in->pos < in->size), then the outer function + stream_decompress() will set .at_frame_edge to false. In this case, + although the output stream is at a frame edge, for the caller, the input + stream is not at a frame edge, see below diagram. This behavior does not + affect the next AFE check, since (in->pos < in->size). + + input stream: --------------|--- + ^ + output stream: ====================| + ^ +*/ +PyObject * +decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, + Py_ssize_t max_length, + Py_ssize_t initial_size, + decompress_type type) +{ + size_t zstd_ret; + ZSTD_outBuffer out; + _BlocksOutputBuffer buffer = {.list = NULL}; + PyObject *ret; + + /* The first AFE check for setting .at_frame_edge flag */ + if (type == TYPE_ENDLESS_DECOMPRESSOR) { + if (self->at_frame_edge && in->pos == in->size) { + _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state == NULL) { + return NULL; + } + ret = mod_state->empty_bytes; + Py_INCREF(ret); + return ret; + } + } + + /* Initialize the output buffer */ + if (initial_size >= 0) { + if (_OutputBuffer_InitWithSize(&buffer, &out, max_length, initial_size) < 0) { + goto error; + } + } + else { + if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) { + goto error; + } + } + assert(out.pos == 0); + + while (1) { + /* Decompress */ + Py_BEGIN_ALLOW_THREADS + zstd_ret = ZSTD_decompressStream(self->dctx, &out, in); + Py_END_ALLOW_THREADS + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state != NULL) { + set_zstd_error(mod_state, ERR_DECOMPRESS, zstd_ret); + } + goto error; + } + + /* Set .eof/.af_frame_edge flag */ + if (type == TYPE_DECOMPRESSOR) { + /* ZstdDecompressor class stops when a frame is decompressed */ + if (zstd_ret == 0) { + self->eof = 1; + break; + } + } + else if (type == TYPE_ENDLESS_DECOMPRESSOR) { + /* decompress() function supports multiple frames */ + self->at_frame_edge = (zstd_ret == 0) ? 1 : 0; + + /* The second AFE check for setting .at_frame_edge flag */ + if (self->at_frame_edge && in->pos == in->size) { + break; + } + } + + /* Need to check out before in. Maybe zstd's internal buffer still has + a few bytes can be output, grow the buffer and continue. */ + if (out.pos == out.size) { + /* Output buffer exhausted */ + + /* Output buffer reached max_length */ + if (_OutputBuffer_ReachedMaxLength(&buffer, &out)) { + break; + } + + /* Grow output buffer */ + if (_OutputBuffer_Grow(&buffer, &out) < 0) { + goto error; + } + assert(out.pos == 0); + + } + else if (in->pos == in->size) { + /* Finished */ + break; + } + } + + /* Return a bytes object */ + ret = _OutputBuffer_Finish(&buffer, &out); + if (ret != NULL) { + return ret; + } + +error: + _OutputBuffer_OnError(&buffer); + return NULL; +} + +void +decompressor_reset_session(ZstdDecompressor *self, + decompress_type type) +{ + // TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here + // and ensure lock is always held + + /* Reset variables */ + self->in_begin = 0; + self->in_end = 0; + + if (type == TYPE_DECOMPRESSOR) { + Py_CLEAR(self->unused_data); + } + + /* Reset variables in one operation */ + self->needs_input = 1; + self->at_frame_edge = 1; + self->eof = 0; + self->_unused_char_for_align = 0; + + /* Resetting session never fail */ + ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only); +} + +PyObject * +stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length, + decompress_type type) +{ + Py_ssize_t initial_buffer_size = -1; + ZSTD_inBuffer in; + PyObject *ret = NULL; + int use_input_buffer; + + if (type == TYPE_DECOMPRESSOR) { + /* Check .eof flag */ + if (self->eof) { + PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame."); + assert(ret == NULL); + goto success; + } + } + else if (type == TYPE_ENDLESS_DECOMPRESSOR) { + /* Fast path for the first frame */ + if (self->at_frame_edge && self->in_begin == self->in_end) { + /* Read decompressed size */ + uint64_t decompressed_size = ZSTD_getFrameContentSize(data->buf, data->len); + + /* These two zstd constants always > PY_SSIZE_T_MAX: + ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1) + ZSTD_CONTENTSIZE_ERROR is (0ULL - 2) + + Use ZSTD_findFrameCompressedSize() to check complete frame, + prevent allocating too much memory for small input chunk. */ + + if (decompressed_size <= (uint64_t) PY_SSIZE_T_MAX && + !ZSTD_isError(ZSTD_findFrameCompressedSize(data->buf, data->len)) ) + { + initial_buffer_size = (Py_ssize_t) decompressed_size; + } + } + } + + /* Prepare input buffer w/wo unconsumed data */ + if (self->in_begin == self->in_end) { + /* No unconsumed data */ + use_input_buffer = 0; + + in.src = data->buf; + in.size = data->len; + in.pos = 0; + } + else if (data->len == 0) { + /* Has unconsumed data, fast path for b'' */ + assert(self->in_begin < self->in_end); + + use_input_buffer = 1; + + in.src = self->input_buffer + self->in_begin; + in.size = self->in_end - self->in_begin; + in.pos = 0; + } + else { + /* Has unconsumed data */ + use_input_buffer = 1; + + /* Unconsumed data size in input_buffer */ + size_t used_now = self->in_end - self->in_begin; + assert(self->in_end > self->in_begin); + + /* Number of bytes we can append to input buffer */ + size_t avail_now = self->input_buffer_size - self->in_end; + assert(self->input_buffer_size >= self->in_end); + + /* Number of bytes we can append if we move existing contents to + beginning of buffer */ + size_t avail_total = self->input_buffer_size - used_now; + assert(self->input_buffer_size >= used_now); + + if (avail_total < (size_t) data->len) { + char *tmp; + size_t new_size = used_now + data->len; + + /* Allocate with new size */ + tmp = PyMem_Malloc(new_size); + if (tmp == NULL) { + PyErr_NoMemory(); + goto error; + } + + /* Copy unconsumed data to the beginning of new buffer */ + memcpy(tmp, + self->input_buffer + self->in_begin, + used_now); + + /* Switch to new buffer */ + PyMem_Free(self->input_buffer); + self->input_buffer = tmp; + self->input_buffer_size = new_size; + + /* Set begin & end position */ + self->in_begin = 0; + self->in_end = used_now; + } + else if (avail_now < (size_t) data->len) { + /* Move unconsumed data to the beginning. + Overlap is possible, so use memmove(). */ + memmove(self->input_buffer, + self->input_buffer + self->in_begin, + used_now); + + /* Set begin & end position */ + self->in_begin = 0; + self->in_end = used_now; + } + + /* Copy data to input buffer */ + memcpy(self->input_buffer + self->in_end, data->buf, data->len); + self->in_end += data->len; + + in.src = self->input_buffer + self->in_begin; + in.size = used_now + data->len; + in.pos = 0; + } + assert(in.pos == 0); + + /* Decompress */ + ret = decompress_impl(self, &in, + max_length, initial_buffer_size, + type); + if (ret == NULL) { + goto error; + } + + /* Unconsumed input data */ + if (in.pos == in.size) { + if (type == TYPE_DECOMPRESSOR) { + if (Py_SIZE(ret) == max_length || self->eof) { + self->needs_input = 0; + } + else { + self->needs_input = 1; + } + } + else if (type == TYPE_ENDLESS_DECOMPRESSOR) { + if (Py_SIZE(ret) == max_length && !self->at_frame_edge) { + self->needs_input = 0; + } + else { + self->needs_input = 1; + } + } + + if (use_input_buffer) { + /* Clear input_buffer */ + self->in_begin = 0; + self->in_end = 0; + } + } + else { + size_t data_size = in.size - in.pos; + + self->needs_input = 0; + + if (type == TYPE_ENDLESS_DECOMPRESSOR) { + self->at_frame_edge = 0; + } + + if (!use_input_buffer) { + /* Discard buffer if it's too small + (resizing it may needlessly copy the current contents) */ + if (self->input_buffer != NULL && + self->input_buffer_size < data_size) + { + PyMem_Free(self->input_buffer); + self->input_buffer = NULL; + self->input_buffer_size = 0; + } + + /* Allocate if necessary */ + if (self->input_buffer == NULL) { + self->input_buffer = PyMem_Malloc(data_size); + if (self->input_buffer == NULL) { + PyErr_NoMemory(); + goto error; + } + self->input_buffer_size = data_size; + } + + /* Copy unconsumed data */ + memcpy(self->input_buffer, (char*)in.src + in.pos, data_size); + self->in_begin = 0; + self->in_end = data_size; + } + else { + /* Use input buffer */ + self->in_begin += in.pos; + } + } + + goto success; + +error: + /* Reset decompressor's states/session */ + decompressor_reset_session(self, type); + + Py_CLEAR(ret); +success: + + return ret; +} + + +static PyObject * +_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + ZstdDecompressor *self; + self = PyObject_GC_New(ZstdDecompressor, type); + if (self == NULL) { + goto error; + } + + self->inited = 0; + self->dict = NULL; + self->input_buffer = NULL; + self->input_buffer_size = 0; + self->in_begin = -1; + self->in_end = -1; + self->unused_data = NULL; + self->eof = 0; + + /* needs_input flag */ + self->needs_input = 1; + + /* at_frame_edge flag */ + self->at_frame_edge = 1; + + /* Decompression context */ + self->dctx = ZSTD_createDCtx(); + if (self->dctx == NULL) { + _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state != NULL) { + PyErr_SetString(mod_state->ZstdError, + "Unable to create ZSTD_DCtx instance."); + } + goto error; + } + + return (PyObject*)self; + +error: + if (self != NULL) { + PyObject_GC_Del(self); + } + return NULL; +} + +static void +ZstdDecompressor_dealloc(PyObject *ob) +{ + ZstdDecompressor *self = ZstdDecompressor_CAST(ob); + + PyObject_GC_UnTrack(self); + + /* Free decompression context */ + ZSTD_freeDCtx(self->dctx); + + /* Py_CLEAR the dict after free decompression context */ + Py_CLEAR(self->dict); + + /* Free unconsumed input data buffer */ + PyMem_Free(self->input_buffer); + + /* Free unused data */ + Py_CLEAR(self->unused_data); + + PyTypeObject *tp = Py_TYPE(self); + PyObject_GC_Del(ob); + Py_DECREF(tp); +} + +/*[clinic input] +_zstd.ZstdDecompressor.__init__ + + zstd_dict: object = None + A ZstdDict object, a pre-trained zstd dictionary. + options: object = None + A dict object that contains advanced decompression parameters. + +Create a decompressor object for decompressing data incrementally. + +Thread-safe at method level. For one-shot decompression, use the decompress() +function instead. +[clinic start generated code]*/ + +static int +_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self, + PyObject *zstd_dict, PyObject *options) +/*[clinic end generated code: output=703af2f1ec226642 input=8fd72999acc1a146]*/ +{ + /* Only called once */ + if (self->inited) { + PyErr_SetString(PyExc_RuntimeError, init_twice_msg); + return -1; + } + self->inited = 1; + + /* Load dictionary to decompression context */ + if (zstd_dict != Py_None) { + if (_PyZstd_load_d_dict(self, zstd_dict) < 0) { + return -1; + } + + /* Py_INCREF the dict */ + Py_INCREF(zstd_dict); + self->dict = zstd_dict; + } + + /* Set option to decompression context */ + if (options != Py_None) { + if (_PyZstd_set_d_parameters(self, options) < 0) { + return -1; + } + } + + // We can only start tracking self with the GC once self->dict is set. + PyObject_GC_Track(self); + return 0; +} + +/*[clinic input] +@critical_section +@getter +_zstd.ZstdDecompressor.unused_data + +A bytes object of un-consumed input data. + +When ZstdDecompressor object stops after a frame is +decompressed, unused input data after the frame. Otherwise this will be b''. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) +/*[clinic end generated code: output=f3a20940f11b6b09 input=5233800bef00df04]*/ +{ + PyObject *ret; + + /* Thread-safe code */ + Py_BEGIN_CRITICAL_SECTION(self); + + if (!self->eof) { + _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state == NULL) { + return NULL; + } + ret = mod_state->empty_bytes; + Py_INCREF(ret); + } + else { + if (self->unused_data == NULL) { + self->unused_data = PyBytes_FromStringAndSize( + self->input_buffer + self->in_begin, + self->in_end - self->in_begin); + ret = self->unused_data; + Py_XINCREF(ret); + } + else { + ret = self->unused_data; + Py_INCREF(ret); + } + } + + Py_END_CRITICAL_SECTION(); + + return ret; +} + +/*[clinic input] +_zstd.ZstdDecompressor.decompress + + data: Py_buffer + A bytes-like object, zstd data to be decompressed. + max_length: Py_ssize_t = -1 + Maximum size of returned data. When it is negative, the size of + output buffer is unlimited. When it is nonnegative, returns at + most max_length bytes of decompressed data. + +Decompress *data*, returning uncompressed bytes if possible, or b'' otherwise. + +If *max_length* is nonnegative, returns at most *max_length* bytes of +decompressed data. If this limit is reached and further output can be +produced, *self.needs_input* will be set to ``False``. In this case, the next +call to *decompress()* may provide *data* as b'' to obtain more of the output. + +If all of the input data was decompressed and returned (either because this +was less than *max_length* bytes, or because *max_length* was negative), +*self.needs_input* will be set to True. + +Attempting to decompress data after the end of a frame is reached raises an +EOFError. Any data found after the end of the frame is ignored and saved in +the self.unused_data attribute. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self, + Py_buffer *data, + Py_ssize_t max_length) +/*[clinic end generated code: output=a4302b3c940dbec6 input=830e455bc9a50b6e]*/ +{ + PyObject *ret; + /* Thread-safe code */ + Py_BEGIN_CRITICAL_SECTION(self); + + ret = stream_decompress(self, data, max_length, TYPE_DECOMPRESSOR); + Py_END_CRITICAL_SECTION(); + return ret; +} + +#define clinic_state() (get_zstd_state_from_type(type)) +#include "clinic/decompressor.c.h" +#undef clinic_state + +static PyMethodDef ZstdDecompressor_methods[] = { + _ZSTD_ZSTDDECOMPRESSOR_DECOMPRESS_METHODDEF + + {0} +}; + +PyDoc_STRVAR(ZstdDecompressor_eof_doc, +"True means the end of the first frame has been reached. If decompress data\n" +"after that, an EOFError exception will be raised."); + +PyDoc_STRVAR(ZstdDecompressor_needs_input_doc, +"If the max_length output limit in .decompress() method has been reached, and\n" +"the decompressor has (or may has) unconsumed input data, it will be set to\n" +"False. In this case, pass b'' to .decompress() method may output further data."); + +static PyMemberDef ZstdDecompressor_members[] = { + {"eof", Py_T_BOOL, offsetof(ZstdDecompressor, eof), + Py_READONLY, ZstdDecompressor_eof_doc}, + + {"needs_input", Py_T_BOOL, offsetof(ZstdDecompressor, needs_input), + Py_READONLY, ZstdDecompressor_needs_input_doc}, + + {0} +}; + +static PyGetSetDef ZstdDecompressor_getset[] = { + _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF + + {0} +}; + +static int +ZstdDecompressor_traverse(PyObject *ob, visitproc visit, void *arg) +{ + ZstdDecompressor *self = ZstdDecompressor_CAST(ob); + Py_VISIT(self->dict); + return 0; +} + +static int +ZstdDecompressor_clear(PyObject *ob) +{ + ZstdDecompressor *self = ZstdDecompressor_CAST(ob); + Py_CLEAR(self->dict); + Py_CLEAR(self->unused_data); + return 0; +} + +static PyType_Slot ZstdDecompressor_slots[] = { + {Py_tp_new, _zstd_ZstdDecompressor_new}, + {Py_tp_dealloc, ZstdDecompressor_dealloc}, + {Py_tp_init, _zstd_ZstdDecompressor___init__}, + {Py_tp_methods, ZstdDecompressor_methods}, + {Py_tp_members, ZstdDecompressor_members}, + {Py_tp_getset, ZstdDecompressor_getset}, + {Py_tp_doc, (char*)_zstd_ZstdDecompressor___init____doc__}, + {Py_tp_traverse, ZstdDecompressor_traverse}, + {Py_tp_clear, ZstdDecompressor_clear}, + {0} +}; + +PyType_Spec ZstdDecompressor_type_spec = { + .name = "_zstd.ZstdDecompressor", + .basicsize = sizeof(ZstdDecompressor), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + .slots = ZstdDecompressor_slots, +}; diff --git a/Modules/_zstd/zdict.c b/Modules/_zstd/zdict.c new file mode 100644 index 00000000000..28ab964a6ca --- /dev/null +++ b/Modules/_zstd/zdict.c @@ -0,0 +1,286 @@ +/* +Low level interface to Meta's zstd library for use in the compression.zstd +Python module. +*/ + +/* ZstdDict class definitions */ + +/*[clinic input] +module _zstd +class _zstd.ZstdDict "ZstdDict *" "clinic_state()->ZstdDict_type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=a5d1254c497e52ba]*/ + +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 +#endif + +#include "_zstdmodule.h" + +#include <stddef.h> // offsetof() + +#define ZstdDict_CAST(op) ((ZstdDict *)op) + +static PyObject * +_zstd_ZstdDict_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs)) +{ + ZstdDict *self; + self = PyObject_GC_New(ZstdDict, type); + if (self == NULL) { + goto error; + } + + self->dict_content = NULL; + self->inited = 0; + self->d_dict = NULL; + + /* ZSTD_CDict dict */ + self->c_dicts = PyDict_New(); + if (self->c_dicts == NULL) { + goto error; + } + + return (PyObject*)self; + +error: + if (self != NULL) { + PyObject_GC_Del(self); + } + return NULL; +} + +static void +ZstdDict_dealloc(PyObject *ob) +{ + ZstdDict *self = ZstdDict_CAST(ob); + + PyObject_GC_UnTrack(self); + + /* Free ZSTD_DDict instance */ + ZSTD_freeDDict(self->d_dict); + + /* Release dict_content after Free ZSTD_CDict/ZSTD_DDict instances */ + Py_CLEAR(self->dict_content); + Py_CLEAR(self->c_dicts); + + PyTypeObject *tp = Py_TYPE(self); + PyObject_GC_Del(ob); + Py_DECREF(tp); +} + +/*[clinic input] +_zstd.ZstdDict.__init__ + + dict_content: object + A bytes-like object, dictionary's content. + is_raw: bool = False + This parameter is for advanced user. True means dict_content + argument is a "raw content" dictionary, free of any format + restriction. False means dict_content argument is an ordinary + zstd dictionary, was created by zstd functions, follow a + specified format. + +Represents a zstd dictionary, which can be used for compression/decompression. + +It's thread-safe, and can be shared by multiple ZstdCompressor / +ZstdDecompressor objects. +[clinic start generated code]*/ + +static int +_zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content, + int is_raw) +/*[clinic end generated code: output=c5f5a0d8377d037c input=e6750f62a513b3ee]*/ +{ + /* Only called once */ + if (self->inited) { + PyErr_SetString(PyExc_RuntimeError, init_twice_msg); + return -1; + } + self->inited = 1; + + /* Check dict_content's type */ + self->dict_content = PyBytes_FromObject(dict_content); + if (self->dict_content == NULL) { + PyErr_SetString(PyExc_TypeError, + "dict_content argument should be bytes-like object."); + return -1; + } + + /* Both ordinary dictionary and "raw content" dictionary should + at least 8 bytes */ + if (Py_SIZE(self->dict_content) < 8) { + PyErr_SetString(PyExc_ValueError, + "Zstd dictionary content should at least 8 bytes."); + return -1; + } + + /* Get dict_id, 0 means "raw content" dictionary. */ + self->dict_id = ZSTD_getDictID_fromDict(PyBytes_AS_STRING(self->dict_content), + Py_SIZE(self->dict_content)); + + /* Check validity for ordinary dictionary */ + if (!is_raw && self->dict_id == 0) { + char *msg = "The dict_content argument is not a valid zstd " + "dictionary. The first 4 bytes of a valid zstd dictionary " + "should be a magic number: b'\\x37\\xA4\\x30\\xEC'.\n" + "If you are an advanced user, and can be sure that " + "dict_content argument is a \"raw content\" zstd " + "dictionary, set is_raw parameter to True."; + PyErr_SetString(PyExc_ValueError, msg); + return -1; + } + + // Can only track self once self->dict_content is included + PyObject_GC_Track(self); + return 0; +} + +#define clinic_state() (get_zstd_state(type)) +#include "clinic/zdict.c.h" +#undef clinic_state + +PyDoc_STRVAR(ZstdDict_dictid_doc, +"ID of zstd dictionary, a 32-bit unsigned int value.\n\n" +"Non-zero means ordinary dictionary, was created by zstd functions, follow\n" +"a specified format.\n\n" +"0 means a \"raw content\" dictionary, free of any format restriction, used\n" +"for advanced user."); + +PyDoc_STRVAR(ZstdDict_dictcontent_doc, +"The content of zstd dictionary, a bytes object, it's the same as dict_content\n" +"argument in ZstdDict.__init__() method. It can be used with other programs."); + +static PyObject * +ZstdDict_str(PyObject *ob) +{ + ZstdDict *dict = ZstdDict_CAST(ob); + return PyUnicode_FromFormat("<ZstdDict dict_id=%u dict_size=%zd>", + dict->dict_id, Py_SIZE(dict->dict_content)); +} + +static PyMemberDef ZstdDict_members[] = { + {"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, ZstdDict_dictid_doc}, + {"dict_content", Py_T_OBJECT_EX, offsetof(ZstdDict, dict_content), Py_READONLY, ZstdDict_dictcontent_doc}, + {0} +}; + +/*[clinic input] +@critical_section +@getter +_zstd.ZstdDict.as_digested_dict + +Load as a digested dictionary to compressor. + +Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict) +1. Some advanced compression parameters of compressor may be overridden + by parameters of digested dictionary. +2. ZstdDict has a digested dictionaries cache for each compression level. + It's faster when loading again a digested dictionary with the same + compression level. +3. No need to use this for decompression. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self) +/*[clinic end generated code: output=09b086e7a7320dbb input=585448c79f31f74a]*/ +{ + return Py_BuildValue("Oi", self, DICT_TYPE_DIGESTED); +} + +/*[clinic input] +@critical_section +@getter +_zstd.ZstdDict.as_undigested_dict + +Load as an undigested dictionary to compressor. + +Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict) +1. The advanced compression parameters of compressor will not be overridden. +2. Loading an undigested dictionary is costly. If load an undigested dictionary + multiple times, consider reusing a compressor object. +3. No need to use this for decompression. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self) +/*[clinic end generated code: output=43c7a989e6d4253a input=022b0829ffb1c220]*/ +{ + return Py_BuildValue("Oi", self, DICT_TYPE_UNDIGESTED); +} + +/*[clinic input] +@critical_section +@getter +_zstd.ZstdDict.as_prefix + +Load as a prefix to compressor/decompressor. + +Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix) +1. Prefix is compatible with long distance matching, while dictionary is not. +2. It only works for the first frame, then the compressor/decompressor will + return to no prefix state. +3. When decompressing, must use the same prefix as when compressing." +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self) +/*[clinic end generated code: output=6f7130c356595a16 input=09fb82a6a5407e87]*/ +{ + return Py_BuildValue("Oi", self, DICT_TYPE_PREFIX); +} + +static PyGetSetDef ZstdDict_getset[] = { + _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF + + _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF + + _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF + + {0} +}; + +static Py_ssize_t +ZstdDict_length(PyObject *ob) +{ + ZstdDict *self = ZstdDict_CAST(ob); + assert(PyBytes_Check(self->dict_content)); + return Py_SIZE(self->dict_content); +} + +static int +ZstdDict_traverse(PyObject *ob, visitproc visit, void *arg) +{ + ZstdDict *self = ZstdDict_CAST(ob); + Py_VISIT(self->c_dicts); + Py_VISIT(self->dict_content); + return 0; +} + +static int +ZstdDict_clear(PyObject *ob) +{ + ZstdDict *self = ZstdDict_CAST(ob); + Py_CLEAR(self->dict_content); + return 0; +} + +static PyType_Slot zstddict_slots[] = { + {Py_tp_members, ZstdDict_members}, + {Py_tp_getset, ZstdDict_getset}, + {Py_tp_new, _zstd_ZstdDict_new}, + {Py_tp_dealloc, ZstdDict_dealloc}, + {Py_tp_init, _zstd_ZstdDict___init__}, + {Py_tp_str, ZstdDict_str}, + {Py_tp_doc, (char*)_zstd_ZstdDict___init____doc__}, + {Py_sq_length, ZstdDict_length}, + {Py_tp_traverse, ZstdDict_traverse}, + {Py_tp_clear, ZstdDict_clear}, + {0} +}; + +PyType_Spec zstddict_type_spec = { + .name = "_zstd.ZstdDict", + .basicsize = sizeof(ZstdDict), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + .slots = zstddict_slots, +}; diff --git a/Modules/clinic/_cursesmodule.c.h b/Modules/clinic/_cursesmodule.c.h index 3a1c1698b1b..552360eb80a 100644 --- a/Modules/clinic/_cursesmodule.c.h +++ b/Modules/clinic/_cursesmodule.c.h @@ -4263,10 +4263,7 @@ PyDoc_STRVAR(_curses_use_default_colors__doc__, "use_default_colors($module, /)\n" "--\n" "\n" -"Allow use of default values for colors on terminals supporting this feature.\n" -"\n" -"Use this to support transparency in your application. The default color\n" -"is assigned to the color number -1."); +"Equivalent to assume_default_colors(-1, -1)."); #define _CURSES_USE_DEFAULT_COLORS_METHODDEF \ {"use_default_colors", (PyCFunction)_curses_use_default_colors, METH_NOARGS, _curses_use_default_colors__doc__}, @@ -4282,6 +4279,51 @@ _curses_use_default_colors(PyObject *module, PyObject *Py_UNUSED(ignored)) #endif /* !defined(STRICT_SYSV_CURSES) */ +#if !defined(STRICT_SYSV_CURSES) + +PyDoc_STRVAR(_curses_assume_default_colors__doc__, +"assume_default_colors($module, fg, bg, /)\n" +"--\n" +"\n" +"Allow use of default values for colors on terminals supporting this feature.\n" +"\n" +"Assign terminal default foreground/background colors to color number -1.\n" +"Change the definition of the color-pair 0 to (fg, bg).\n" +"\n" +"Use this to support transparency in your application."); + +#define _CURSES_ASSUME_DEFAULT_COLORS_METHODDEF \ + {"assume_default_colors", _PyCFunction_CAST(_curses_assume_default_colors), METH_FASTCALL, _curses_assume_default_colors__doc__}, + +static PyObject * +_curses_assume_default_colors_impl(PyObject *module, int fg, int bg); + +static PyObject * +_curses_assume_default_colors(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + int fg; + int bg; + + if (!_PyArg_CheckPositional("assume_default_colors", nargs, 2, 2)) { + goto exit; + } + fg = PyLong_AsInt(args[0]); + if (fg == -1 && PyErr_Occurred()) { + goto exit; + } + bg = PyLong_AsInt(args[1]); + if (bg == -1 && PyErr_Occurred()) { + goto exit; + } + return_value = _curses_assume_default_colors_impl(module, fg, bg); + +exit: + return return_value; +} + +#endif /* !defined(STRICT_SYSV_CURSES) */ + PyDoc_STRVAR(_curses_has_extended_color_support__doc__, "has_extended_color_support($module, /)\n" "--\n" @@ -4394,4 +4436,8 @@ _curses_has_extended_color_support(PyObject *module, PyObject *Py_UNUSED(ignored #ifndef _CURSES_USE_DEFAULT_COLORS_METHODDEF #define _CURSES_USE_DEFAULT_COLORS_METHODDEF #endif /* !defined(_CURSES_USE_DEFAULT_COLORS_METHODDEF) */ -/*[clinic end generated code: output=dbbbe86a4171799a input=a9049054013a1b77]*/ + +#ifndef _CURSES_ASSUME_DEFAULT_COLORS_METHODDEF + #define _CURSES_ASSUME_DEFAULT_COLORS_METHODDEF +#endif /* !defined(_CURSES_ASSUME_DEFAULT_COLORS_METHODDEF) */ +/*[clinic end generated code: output=42b2923d88c8d0f6 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 0125e247ee4..6b8cc3d07ab 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -1471,7 +1471,7 @@ os_getcwdb(PyObject *module, PyObject *Py_UNUSED(ignored)) PyDoc_STRVAR(os_link__doc__, "link($module, /, src, dst, *, src_dir_fd=None, dst_dir_fd=None,\n" -" follow_symlinks=True)\n" +" follow_symlinks=(os.name != \'nt\'))\n" "--\n" "\n" "Create a hard link to a file.\n" @@ -1530,7 +1530,7 @@ os_link(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn path_t dst = PATH_T_INITIALIZE_P("link", "dst", 0, 0, 0, 0); int src_dir_fd = DEFAULT_DIR_FD; int dst_dir_fd = DEFAULT_DIR_FD; - int follow_symlinks = 1; + int follow_symlinks = -1; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 2, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -13398,4 +13398,4 @@ os__emscripten_debugger(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__EMSCRIPTEN_DEBUGGER_METHODDEF #define OS__EMSCRIPTEN_DEBUGGER_METHODDEF #endif /* !defined(OS__EMSCRIPTEN_DEBUGGER_METHODDEF) */ -/*[clinic end generated code: output=a5ca2541f2af5462 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=f7b5635e0b948be4 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 04c3b9e987a..964e9888431 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -573,7 +573,11 @@ extern char *ctermid_r(char *); # define HAVE_FACCESSAT_RUNTIME 1 # define HAVE_FCHMODAT_RUNTIME 1 # define HAVE_FCHOWNAT_RUNTIME 1 +#ifdef __wasi__ +# define HAVE_LINKAT_RUNTIME 0 +# else # define HAVE_LINKAT_RUNTIME 1 +# endif # define HAVE_FDOPENDIR_RUNTIME 1 # define HAVE_MKDIRAT_RUNTIME 1 # define HAVE_RENAMEAT_RUNTIME 1 @@ -4346,7 +4350,7 @@ os.link * src_dir_fd : dir_fd = None dst_dir_fd : dir_fd = None - follow_symlinks: bool = True + follow_symlinks: bool(c_default="-1", py_default="(os.name != 'nt')") = PLACEHOLDER Create a hard link to a file. @@ -4364,31 +4368,46 @@ src_dir_fd, dst_dir_fd, and follow_symlinks may not be implemented on your static PyObject * os_link_impl(PyObject *module, path_t *src, path_t *dst, int src_dir_fd, int dst_dir_fd, int follow_symlinks) -/*[clinic end generated code: output=7f00f6007fd5269a input=b0095ebbcbaa7e04]*/ +/*[clinic end generated code: output=7f00f6007fd5269a input=1d5e602d115fed7b]*/ { #ifdef MS_WINDOWS BOOL result = FALSE; #else int result; #endif -#if defined(HAVE_LINKAT) - int linkat_unavailable = 0; -#endif -#ifndef HAVE_LINKAT - if ((src_dir_fd != DEFAULT_DIR_FD) || (dst_dir_fd != DEFAULT_DIR_FD)) { - argument_unavailable_error("link", "src_dir_fd and dst_dir_fd"); - return NULL; +#ifdef HAVE_LINKAT + if (HAVE_LINKAT_RUNTIME) { + if (follow_symlinks < 0) { + follow_symlinks = 1; + } } + else #endif - -#ifndef MS_WINDOWS - if ((src->narrow && dst->wide) || (src->wide && dst->narrow)) { - PyErr_SetString(PyExc_NotImplementedError, - "link: src and dst must be the same type"); - return NULL; - } + { + if ((src_dir_fd != DEFAULT_DIR_FD) || (dst_dir_fd != DEFAULT_DIR_FD)) { + argument_unavailable_error("link", "src_dir_fd and dst_dir_fd"); + return NULL; + } +/* See issue 85527: link() on Linux works like linkat without AT_SYMLINK_FOLLOW, + but on Mac it works like linkat *with* AT_SYMLINK_FOLLOW. */ +#if defined(MS_WINDOWS) || defined(__linux__) + if (follow_symlinks == 1) { + argument_unavailable_error("link", "follow_symlinks=True"); + return NULL; + } +#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || (defined(__sun) && defined(__SVR4)) + if (follow_symlinks == 0) { + argument_unavailable_error("link", "follow_symlinks=False"); + return NULL; + } +#else + if (follow_symlinks >= 0) { + argument_unavailable_error("link", "follow_symlinks"); + return NULL; + } #endif + } if (PySys_Audit("os.link", "OOii", src->object, dst->object, src_dir_fd == DEFAULT_DIR_FD ? -1 : src_dir_fd, @@ -4406,44 +4425,18 @@ os_link_impl(PyObject *module, path_t *src, path_t *dst, int src_dir_fd, #else Py_BEGIN_ALLOW_THREADS #ifdef HAVE_LINKAT - if ((src_dir_fd != DEFAULT_DIR_FD) || - (dst_dir_fd != DEFAULT_DIR_FD) || - (!follow_symlinks)) { - - if (HAVE_LINKAT_RUNTIME) { - - result = linkat(src_dir_fd, src->narrow, - dst_dir_fd, dst->narrow, - follow_symlinks ? AT_SYMLINK_FOLLOW : 0); - - } -#ifdef __APPLE__ - else { - if (src_dir_fd == DEFAULT_DIR_FD && dst_dir_fd == DEFAULT_DIR_FD) { - /* See issue 41355: This matches the behaviour of !HAVE_LINKAT */ - result = link(src->narrow, dst->narrow); - } else { - linkat_unavailable = 1; - } - } -#endif + if (HAVE_LINKAT_RUNTIME) { + result = linkat(src_dir_fd, src->narrow, + dst_dir_fd, dst->narrow, + follow_symlinks ? AT_SYMLINK_FOLLOW : 0); } else -#endif /* HAVE_LINKAT */ +#endif + { + /* linkat not available */ result = link(src->narrow, dst->narrow); - Py_END_ALLOW_THREADS - -#ifdef HAVE_LINKAT - if (linkat_unavailable) { - /* Either or both dir_fd arguments were specified */ - if (src_dir_fd != DEFAULT_DIR_FD) { - argument_unavailable_error("link", "src_dir_fd"); - } else { - argument_unavailable_error("link", "dst_dir_fd"); - } - return NULL; } -#endif + Py_END_ALLOW_THREADS if (result) return path_error2(src, dst); @@ -5935,12 +5928,6 @@ internal_rename(path_t *src, path_t *dst, int src_dir_fd, int dst_dir_fd, int is return path_error2(src, dst); #else - if ((src->narrow && dst->wide) || (src->wide && dst->narrow)) { - PyErr_Format(PyExc_ValueError, - "%s: src and dst must be the same type", function_name); - return NULL; - } - Py_BEGIN_ALLOW_THREADS #ifdef HAVE_RENAMEAT if (dir_fd_specified) { @@ -10613,12 +10600,6 @@ os_symlink_impl(PyObject *module, path_t *src, path_t *dst, #else - if ((src->narrow && dst->wide) || (src->wide && dst->narrow)) { - PyErr_SetString(PyExc_ValueError, - "symlink: src and dst must be the same type"); - return NULL; - } - Py_BEGIN_ALLOW_THREADS #ifdef HAVE_SYMLINKAT if (dir_fd != DEFAULT_DIR_FD) { |