diff options
Diffstat (limited to 'Tools/cases_generator/analyzer.py')
-rw-r--r-- | Tools/cases_generator/analyzer.py | 371 |
1 files changed, 253 insertions, 118 deletions
diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index a4ce207703e..9c2981a68ac 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -1,13 +1,13 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field +import itertools import lexer import parser import re from typing import Optional - @dataclass class Properties: - escapes: bool + escaping_calls: dict[lexer.Token, tuple[lexer.Token, lexer.Token]] error_with_pop: bool error_without_pop: bool deopts: bool @@ -29,14 +29,21 @@ class Properties: needs_prev: bool = False def dump(self, indent: str) -> None: - print(indent, end="") - text = ", ".join([f"{key}: {value}" for (key, value) in self.__dict__.items()]) + simple_properties = self.__dict__.copy() + del simple_properties["escaping_calls"] + text = "escaping_calls:\n" + for tkns in self.escaping_calls.values(): + text += f"{indent} {tkns}\n" + text += ", ".join([f"{key}: {value}" for (key, value) in simple_properties.items()]) print(indent, text, sep="") @staticmethod def from_list(properties: list["Properties"]) -> "Properties": + escaping_calls: dict[lexer.Token, tuple[lexer.Token, lexer.Token]] = {} + for p in properties: + escaping_calls.update(p.escaping_calls) return Properties( - escapes=any(p.escapes for p in properties), + escaping_calls=escaping_calls, error_with_pop=any(p.error_with_pop for p in properties), error_without_pop=any(p.error_without_pop for p in properties), deopts=any(p.deopts for p in properties), @@ -59,9 +66,12 @@ class Properties: def infallible(self) -> bool: return not self.error_with_pop and not self.error_without_pop + @property + def escapes(self) -> bool: + return bool(self.escaping_calls) SKIP_PROPERTIES = Properties( - escapes=False, + escaping_calls={}, error_with_pop=False, error_without_pop=False, deopts=False, @@ -156,6 +166,7 @@ class Uop: stack: StackEffect caches: list[CacheEntry] deferred_refs: dict[lexer.Token, str | None] + output_stores: list[lexer.Token] body: list[lexer.Token] properties: Properties _size: int = -1 @@ -322,11 +333,24 @@ def analyze_stack( ] # Mark variables with matching names at the base of the stack as "peek" modified = False - for input, output in zip(inputs, outputs): - if input.name == output.name and not modified: - input.peek = output.peek = True + input_names: dict[str, lexer.Token] = { i.name : i.first_token for i in op.inputs if i.name != "unused" } + for input, output in itertools.zip_longest(inputs, outputs): + if output is None: + pass + elif input is None: + if output.name in input_names: + raise analysis_error( + f"Reuse of variable '{output.name}' at different stack location", + input_names[output.name]) + elif input.name == output.name: + if not modified: + input.peek = output.peek = True else: modified = True + if output.name in input_names: + raise analysis_error( + f"Reuse of variable '{output.name}' at different stack location", + input_names[output.name]) if isinstance(op, parser.InstDef): output_names = [out.name for out in outputs] for input in inputs: @@ -354,21 +378,46 @@ def analyze_caches(inputs: list[parser.InputEffect]) -> list[CacheEntry]: return [CacheEntry(i.name, int(i.size)) for i in caches] +def find_assignment_target(node: parser.InstDef, idx: int) -> list[lexer.Token]: + """Find the tokens that make up the left-hand side of an assignment""" + offset = 0 + for tkn in reversed(node.block.tokens[: idx]): + if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}: + return node.block.tokens[idx - offset : idx] + offset += 1 + return [] + + +def find_stores_outputs(node: parser.InstDef) -> list[lexer.Token]: + res: list[lexer.Token] = [] + outnames = { out.name for out in node.outputs } + innames = { out.name for out in node.inputs } + for idx, tkn in enumerate(node.block.tokens): + if tkn.kind == "AND": + name = node.block.tokens[idx+1] + if name.text in outnames: + res.append(name) + if tkn.kind != "EQUALS": + continue + lhs = find_assignment_target(node, idx) + assert lhs + while lhs and lhs[0].kind == "COMMENT": + lhs = lhs[1:] + if len(lhs) != 1 or lhs[0].kind != "IDENTIFIER": + continue + name = lhs[0] + if name.text in innames: + raise analysis_error(f"Cannot assign to input variable '{name.text}'", name) + if name.text in outnames: + res.append(name) + return res + def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None]: """Look for PyStackRef_FromPyObjectNew() calls""" - def find_assignment_target(idx: int) -> list[lexer.Token]: - """Find the tokens that make up the left-hand side of an assignment""" - offset = 1 - for tkn in reversed(node.block.tokens[: idx - 1]): - if tkn.kind == "SEMI" or tkn.kind == "LBRACE" or tkn.kind == "RBRACE": - return node.block.tokens[idx - offset : idx - 1] - offset += 1 - return [] - def in_frame_push(idx: int) -> bool: for tkn in reversed(node.block.tokens[: idx - 1]): - if tkn.kind == "SEMI" or tkn.kind == "LBRACE" or tkn.kind == "RBRACE": + if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}: return False if tkn.kind == "IDENTIFIER" and tkn.text == "_PyFrame_PushUnchecked": return True @@ -386,7 +435,7 @@ def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None] continue raise analysis_error("Expected '=' before PyStackRef_FromPyObjectNew", tkn) - lhs = find_assignment_target(idx) + lhs = find_assignment_target(node, idx - 1) if len(lhs) == 0: raise analysis_error( "PyStackRef_FromPyObjectNew() must be assigned to an output", tkn @@ -406,9 +455,13 @@ def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None] ) name = lhs[0].text - if not any(var.name == name for var in node.outputs): + match = ( + any(var.name == name for var in node.inputs) + or any(var.name == name for var in node.outputs) + ) + if not match: raise analysis_error( - f"PyStackRef_FromPyObjectNew() must be assigned to an output, not '{name}'", + f"PyStackRef_FromPyObjectNew() must be assigned to an input or output, not '{name}'", tkn, ) @@ -461,125 +514,203 @@ def has_error_without_pop(op: parser.InstDef) -> bool: NON_ESCAPING_FUNCTIONS = ( - "PyStackRef_FromPyObjectSteal", + "PyCFunction_GET_FLAGS", + "PyCFunction_GET_FUNCTION", + "PyCFunction_GET_SELF", + "PyCell_GetRef", + "PyCell_New", + "PyCell_SwapTakeRef", + "PyExceptionInstance_Class", + "PyException_GetCause", + "PyException_GetContext", + "PyException_GetTraceback", + "PyFloat_AS_DOUBLE", + "PyFloat_FromDouble", + "PyFunction_GET_CODE", + "PyFunction_GET_GLOBALS", + "PyList_GET_ITEM", + "PyList_GET_SIZE", + "PyList_SET_ITEM", + "PyLong_AsLong", + "PyLong_FromLong", + "PyLong_FromSsize_t", + "PySlice_New", "PyStackRef_AsPyObjectBorrow", + "PyStackRef_AsPyObjectNew", "PyStackRef_AsPyObjectSteal", + "PyStackRef_CLEAR", "PyStackRef_CLOSE", "PyStackRef_DUP", - "PyStackRef_CLEAR", + "PyStackRef_False", + "PyStackRef_FromPyObjectImmortal", + "PyStackRef_FromPyObjectNew", + "PyStackRef_FromPyObjectSteal", + "PyStackRef_Is", "PyStackRef_IsNull", + "PyStackRef_None", "PyStackRef_TYPE", - "PyStackRef_False", "PyStackRef_True", - "PyStackRef_None", - "PyStackRef_Is", - "PyStackRef_FromPyObjectNew", - "PyStackRef_AsPyObjectNew", - "PyStackRef_FromPyObjectImmortal", - "Py_INCREF", - "_PyManagedDictPointer_IsValues", - "_PyObject_GetManagedDict", - "_PyObject_ManagedDictPointer", - "_PyObject_InlineValues", - "_PyDictValues_AddToInsertionOrder", - "Py_DECREF", - "Py_XDECREF", - "_Py_DECREF_SPECIALIZED", - "DECREF_INPUTS_AND_REUSE_FLOAT", - "PyUnicode_Append", - "_PyLong_IsZero", - "Py_SIZE", - "Py_TYPE", - "PyList_GET_ITEM", - "PyList_SET_ITEM", "PyTuple_GET_ITEM", - "PyList_GET_SIZE", "PyTuple_GET_SIZE", + "PyType_HasFeature", + "PyUnicode_Append", + "PyUnicode_Concat", + "PyUnicode_GET_LENGTH", + "PyUnicode_READ_CHAR", "Py_ARRAY_LENGTH", + "Py_CLEAR", + "Py_DECREF", + "Py_FatalError", + "Py_INCREF", + "Py_IS_TYPE", + "Py_NewRef", + "Py_REFCNT", + "Py_SIZE", + "Py_TYPE", + "Py_UNREACHABLE", "Py_Unicode_GET_LENGTH", - "PyUnicode_READ_CHAR", - "_Py_SINGLETON", - "PyUnicode_GET_LENGTH", - "_PyLong_IsCompact", - "_PyLong_IsNonNegativeCompact", + "Py_XDECREF", + "_PyCode_CODE", + "_PyDictValues_AddToInsertionOrder", + "_PyErr_Occurred", + "_PyEval_FrameClearAndPop", + "_PyFrame_GetCode", + "_PyFrame_IsIncomplete", + "_PyFrame_PushUnchecked", + "_PyFrame_SetStackPointer", + "_PyFrame_StackPush", + "_PyFunction_SetVersion", + "_PyGen_GetGeneratorFromFrame", + "_PyInterpreterState_GET", + "_PyList_AppendTakeRef", + "_PyList_FromStackRefSteal", + "_PyList_ITEMS", + "_PyLong_Add", "_PyLong_CompactValue", "_PyLong_DigitCount", - "_Py_NewRef", - "_Py_IsImmortal", - "PyLong_FromLong", - "_Py_STR", - "_PyLong_Add", + "_PyLong_IsCompact", + "_PyLong_IsNonNegativeCompact", + "_PyLong_IsZero", "_PyLong_Multiply", "_PyLong_Subtract", - "Py_NewRef", - "_PyList_ITEMS", - "_PyTuple_ITEMS", - "_PyList_AppendTakeRef", - "_Py_atomic_load_uintptr_relaxed", - "_PyFrame_GetCode", + "_PyManagedDictPointer_IsValues", + "_PyObject_GC_IS_TRACKED", + "_PyObject_GC_MAY_BE_TRACKED", + "_PyObject_GC_TRACK", + "_PyObject_GetManagedDict", + "_PyObject_InlineValues", + "_PyObject_ManagedDictPointer", "_PyThreadState_HasStackSpace", - "_PyUnicode_Equal", - "_PyFrame_SetStackPointer", + "_PyTuple_FromArraySteal", + "_PyTuple_FromStackRefSteal", + "_PyTuple_ITEMS", "_PyType_HasFeature", - "PyUnicode_Concat", - "PySlice_New", + "_PyType_NewManagedObject", + "_PyUnicode_Equal", + "_PyUnicode_JoinArray", + "_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY", + "_Py_DECREF_NO_DEALLOC", + "_Py_DECREF_SPECIALIZED", + "_Py_EnterRecursiveCallTstateUnchecked", + "_Py_ID", + "_Py_IsImmortal", + "_Py_IsImmortalLoose", "_Py_LeaveRecursiveCallPy", - "CALL_STAT_INC", - "STAT_INC", + "_Py_LeaveRecursiveCallTstate", + "_Py_NewRef", + "_Py_SINGLETON", + "_Py_STR", + "_Py_atomic_load_uintptr_relaxed", + "_Py_set_eval_breaker_bit", + "advance_backoff_counter", + "assert", + "backoff_counter_triggers", + "initial_temperature_backoff_counter", "maybe_lltrace_resume_frame", - "_PyUnicode_JoinArray", - "_PyEval_FrameClearAndPop", - "_PyFrame_StackPush", - "PyCell_New", - "PyFloat_AS_DOUBLE", - "_PyFrame_PushUnchecked", - "Py_FatalError", - "STACKREFS_TO_PYOBJECTS", - "STACKREFS_TO_PYOBJECTS_CLEANUP", - "CONVERSION_FAILED", - "_PyList_FromStackRefSteal", - "_PyTuple_FromArraySteal", - "_PyTuple_FromStackRefSteal", - "_Py_set_eval_breaker_bit" + "restart_backoff_counter", ) -ESCAPING_FUNCTIONS = ( - "import_name", - "import_from", -) - - -def makes_escaping_api_call(instr: parser.InstDef) -> bool: - if "CALL_INTRINSIC" in instr.name: - return True - if instr.name == "_BINARY_OP": - return True - tkns = iter(instr.tokens) - for tkn in tkns: - if tkn.kind != lexer.IDENTIFIER: - continue +def find_stmt_start(node: parser.InstDef, idx: int) -> lexer.Token: + assert idx < len(node.block.tokens) + while True: + tkn = node.block.tokens[idx-1] + if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}: + break + idx -= 1 + assert idx > 0 + while node.block.tokens[idx].kind == "COMMENT": + idx += 1 + return node.block.tokens[idx] + + +def find_stmt_end(node: parser.InstDef, idx: int) -> lexer.Token: + assert idx < len(node.block.tokens) + while True: + idx += 1 + tkn = node.block.tokens[idx] + if tkn.kind == "SEMI": + return node.block.tokens[idx+1] + +def check_escaping_calls(instr: parser.InstDef, escapes: dict[lexer.Token, tuple[lexer.Token, lexer.Token]]) -> None: + calls = {escapes[t][0] for t in escapes} + in_if = 0 + tkn_iter = iter(instr.block.tokens) + for tkn in tkn_iter: + if tkn.kind == "IF": + next(tkn_iter) + in_if = 1 + if tkn.kind == "IDENTIFIER" and tkn.text in ("DEOPT_IF", "ERROR_IF"): + next(tkn_iter) + in_if = 1 + elif tkn.kind == "LPAREN" and in_if: + in_if += 1 + elif tkn.kind == "RPAREN": + if in_if: + in_if -= 1 + elif tkn in calls and in_if: + raise analysis_error(f"Escaping call '{tkn.text} in condition", tkn) + +def find_escaping_api_calls(instr: parser.InstDef) -> dict[lexer.Token, tuple[lexer.Token, lexer.Token]]: + result: dict[lexer.Token, tuple[lexer.Token, lexer.Token]] = {} + tokens = instr.block.tokens + for idx, tkn in enumerate(tokens): try: - next_tkn = next(tkns) - except StopIteration: - return False + next_tkn = tokens[idx+1] + except IndexError: + break + if tkn.kind == "SWITCH": + raise analysis_error(f"switch statements are not supported due to their complex flow control. Sorry.", tkn) if next_tkn.kind != lexer.LPAREN: continue - if tkn.text in ESCAPING_FUNCTIONS: - return True - if tkn.text == "tp_vectorcall": - return True - if not tkn.text.startswith("Py") and not tkn.text.startswith("_Py"): - continue - if tkn.text.endswith("Check"): - continue - if tkn.text.startswith("Py_Is"): - continue - if tkn.text.endswith("CheckExact"): - continue - if tkn.text in NON_ESCAPING_FUNCTIONS: + if tkn.kind == lexer.IDENTIFIER: + if tkn.text.upper() == tkn.text: + # simple macro + continue + #if not tkn.text.startswith(("Py", "_Py", "monitor")): + # continue + if tkn.text.startswith(("sym_", "optimize_")): + # Optimize functions + continue + if tkn.text.endswith("Check"): + continue + if tkn.text.startswith("Py_Is"): + continue + if tkn.text.endswith("CheckExact"): + continue + if tkn.text in NON_ESCAPING_FUNCTIONS: + continue + elif tkn.kind == "RPAREN": + prev = tokens[idx-1] + if prev.text.endswith("_t") or prev.text == "*" or prev.text == "int": + #cast + continue + elif tkn.kind != "RBRACKET": continue - return True - return False + start = find_stmt_start(instr, idx) + end = find_stmt_end(instr, idx) + result[start] = tkn, end + check_escaping_calls(instr, result) + return result EXITS = { @@ -651,6 +782,7 @@ def effect_depends_on_oparg_1(op: parser.InstDef) -> bool: def compute_properties(op: parser.InstDef) -> Properties: + escaping_calls = find_escaping_api_calls(op) has_free = ( variable_used(op, "PyCell_New") or variable_used(op, "PyCell_GetRef") @@ -671,7 +803,7 @@ def compute_properties(op: parser.InstDef) -> Properties: error_with_pop = has_error_with_pop(op) error_without_pop = has_error_without_pop(op) return Properties( - escapes=makes_escaping_api_call(op), + escaping_calls=escaping_calls, error_with_pop=error_with_pop, error_without_pop=error_without_pop, deopts=deopts_if, @@ -706,6 +838,7 @@ def make_uop( stack=analyze_stack(op), caches=analyze_caches(inputs), deferred_refs=analyze_deferred_refs(op), + output_stores=find_stores_outputs(op), body=op.block.tokens, properties=compute_properties(op), ) @@ -726,6 +859,7 @@ def make_uop( stack=analyze_stack(op, bit), caches=analyze_caches(inputs), deferred_refs=analyze_deferred_refs(op), + output_stores=find_stores_outputs(op), body=op.block.tokens, properties=properties, ) @@ -749,6 +883,7 @@ def make_uop( stack=analyze_stack(op), caches=analyze_caches(inputs), deferred_refs=analyze_deferred_refs(op), + output_stores=find_stores_outputs(op), body=op.block.tokens, properties=properties, ) |