From 1f5682f3a27516833f7c317707dd359280dba6e7 Mon Sep 17 00:00:00 2001 From: mpage Date: Wed, 9 Apr 2025 10:34:12 -0700 Subject: gh-129987: Disable GCC SLP autovectorization for the interpreter loop on x86-64 (#132295) The SLP autovectorizer can cause poor code generation for opcode dispatch, negating any benefit we get from vectorization elsewhere in the interpreter loop. --- Python/ceval.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'Python/ceval.c') diff --git a/Python/ceval.c b/Python/ceval.c index a59b2b7a168..47d068edac2 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -948,7 +948,18 @@ _PyObjectArray_Free(PyObject **array, PyObject **scratch) #include "generated_cases.c.h" #endif -PyObject* _Py_HOT_FUNCTION +#if (defined(__GNUC__) && !defined(__clang__)) && defined(__x86_64__) +/* + * gh-129987: The SLP autovectorizer can cause poor code generation for opcode + * dispatch, negating any benefit we get from vectorization elsewhere in the + * interpreter loop. + */ +#define DONT_SLP_VECTORIZE __attribute__((optimize ("no-tree-slp-vectorize"))) +#else +#define DONT_SLP_VECTORIZE +#endif + +PyObject* _Py_HOT_FUNCTION DONT_SLP_VECTORIZE _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag) { _Py_EnsureTstateNotNULL(tstate); -- cgit v1.2.3