aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Tools/cases_generator/analyzer.py
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/cases_generator/analyzer.py')
-rw-r--r--Tools/cases_generator/analyzer.py371
1 files changed, 253 insertions, 118 deletions
diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py
index a4ce207703e..9c2981a68ac 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -1,13 +1,13 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
+import itertools
import lexer
import parser
import re
from typing import Optional
-
@dataclass
class Properties:
- escapes: bool
+ escaping_calls: dict[lexer.Token, tuple[lexer.Token, lexer.Token]]
error_with_pop: bool
error_without_pop: bool
deopts: bool
@@ -29,14 +29,21 @@ class Properties:
needs_prev: bool = False
def dump(self, indent: str) -> None:
- print(indent, end="")
- text = ", ".join([f"{key}: {value}" for (key, value) in self.__dict__.items()])
+ simple_properties = self.__dict__.copy()
+ del simple_properties["escaping_calls"]
+ text = "escaping_calls:\n"
+ for tkns in self.escaping_calls.values():
+ text += f"{indent} {tkns}\n"
+ text += ", ".join([f"{key}: {value}" for (key, value) in simple_properties.items()])
print(indent, text, sep="")
@staticmethod
def from_list(properties: list["Properties"]) -> "Properties":
+ escaping_calls: dict[lexer.Token, tuple[lexer.Token, lexer.Token]] = {}
+ for p in properties:
+ escaping_calls.update(p.escaping_calls)
return Properties(
- escapes=any(p.escapes for p in properties),
+ escaping_calls=escaping_calls,
error_with_pop=any(p.error_with_pop for p in properties),
error_without_pop=any(p.error_without_pop for p in properties),
deopts=any(p.deopts for p in properties),
@@ -59,9 +66,12 @@ class Properties:
def infallible(self) -> bool:
return not self.error_with_pop and not self.error_without_pop
+ @property
+ def escapes(self) -> bool:
+ return bool(self.escaping_calls)
SKIP_PROPERTIES = Properties(
- escapes=False,
+ escaping_calls={},
error_with_pop=False,
error_without_pop=False,
deopts=False,
@@ -156,6 +166,7 @@ class Uop:
stack: StackEffect
caches: list[CacheEntry]
deferred_refs: dict[lexer.Token, str | None]
+ output_stores: list[lexer.Token]
body: list[lexer.Token]
properties: Properties
_size: int = -1
@@ -322,11 +333,24 @@ def analyze_stack(
]
# Mark variables with matching names at the base of the stack as "peek"
modified = False
- for input, output in zip(inputs, outputs):
- if input.name == output.name and not modified:
- input.peek = output.peek = True
+ input_names: dict[str, lexer.Token] = { i.name : i.first_token for i in op.inputs if i.name != "unused" }
+ for input, output in itertools.zip_longest(inputs, outputs):
+ if output is None:
+ pass
+ elif input is None:
+ if output.name in input_names:
+ raise analysis_error(
+ f"Reuse of variable '{output.name}' at different stack location",
+ input_names[output.name])
+ elif input.name == output.name:
+ if not modified:
+ input.peek = output.peek = True
else:
modified = True
+ if output.name in input_names:
+ raise analysis_error(
+ f"Reuse of variable '{output.name}' at different stack location",
+ input_names[output.name])
if isinstance(op, parser.InstDef):
output_names = [out.name for out in outputs]
for input in inputs:
@@ -354,21 +378,46 @@ def analyze_caches(inputs: list[parser.InputEffect]) -> list[CacheEntry]:
return [CacheEntry(i.name, int(i.size)) for i in caches]
+def find_assignment_target(node: parser.InstDef, idx: int) -> list[lexer.Token]:
+ """Find the tokens that make up the left-hand side of an assignment"""
+ offset = 0
+ for tkn in reversed(node.block.tokens[: idx]):
+ if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}:
+ return node.block.tokens[idx - offset : idx]
+ offset += 1
+ return []
+
+
+def find_stores_outputs(node: parser.InstDef) -> list[lexer.Token]:
+ res: list[lexer.Token] = []
+ outnames = { out.name for out in node.outputs }
+ innames = { out.name for out in node.inputs }
+ for idx, tkn in enumerate(node.block.tokens):
+ if tkn.kind == "AND":
+ name = node.block.tokens[idx+1]
+ if name.text in outnames:
+ res.append(name)
+ if tkn.kind != "EQUALS":
+ continue
+ lhs = find_assignment_target(node, idx)
+ assert lhs
+ while lhs and lhs[0].kind == "COMMENT":
+ lhs = lhs[1:]
+ if len(lhs) != 1 or lhs[0].kind != "IDENTIFIER":
+ continue
+ name = lhs[0]
+ if name.text in innames:
+ raise analysis_error(f"Cannot assign to input variable '{name.text}'", name)
+ if name.text in outnames:
+ res.append(name)
+ return res
+
def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None]:
"""Look for PyStackRef_FromPyObjectNew() calls"""
- def find_assignment_target(idx: int) -> list[lexer.Token]:
- """Find the tokens that make up the left-hand side of an assignment"""
- offset = 1
- for tkn in reversed(node.block.tokens[: idx - 1]):
- if tkn.kind == "SEMI" or tkn.kind == "LBRACE" or tkn.kind == "RBRACE":
- return node.block.tokens[idx - offset : idx - 1]
- offset += 1
- return []
-
def in_frame_push(idx: int) -> bool:
for tkn in reversed(node.block.tokens[: idx - 1]):
- if tkn.kind == "SEMI" or tkn.kind == "LBRACE" or tkn.kind == "RBRACE":
+ if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}:
return False
if tkn.kind == "IDENTIFIER" and tkn.text == "_PyFrame_PushUnchecked":
return True
@@ -386,7 +435,7 @@ def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None]
continue
raise analysis_error("Expected '=' before PyStackRef_FromPyObjectNew", tkn)
- lhs = find_assignment_target(idx)
+ lhs = find_assignment_target(node, idx - 1)
if len(lhs) == 0:
raise analysis_error(
"PyStackRef_FromPyObjectNew() must be assigned to an output", tkn
@@ -406,9 +455,13 @@ def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None]
)
name = lhs[0].text
- if not any(var.name == name for var in node.outputs):
+ match = (
+ any(var.name == name for var in node.inputs)
+ or any(var.name == name for var in node.outputs)
+ )
+ if not match:
raise analysis_error(
- f"PyStackRef_FromPyObjectNew() must be assigned to an output, not '{name}'",
+ f"PyStackRef_FromPyObjectNew() must be assigned to an input or output, not '{name}'",
tkn,
)
@@ -461,125 +514,203 @@ def has_error_without_pop(op: parser.InstDef) -> bool:
NON_ESCAPING_FUNCTIONS = (
- "PyStackRef_FromPyObjectSteal",
+ "PyCFunction_GET_FLAGS",
+ "PyCFunction_GET_FUNCTION",
+ "PyCFunction_GET_SELF",
+ "PyCell_GetRef",
+ "PyCell_New",
+ "PyCell_SwapTakeRef",
+ "PyExceptionInstance_Class",
+ "PyException_GetCause",
+ "PyException_GetContext",
+ "PyException_GetTraceback",
+ "PyFloat_AS_DOUBLE",
+ "PyFloat_FromDouble",
+ "PyFunction_GET_CODE",
+ "PyFunction_GET_GLOBALS",
+ "PyList_GET_ITEM",
+ "PyList_GET_SIZE",
+ "PyList_SET_ITEM",
+ "PyLong_AsLong",
+ "PyLong_FromLong",
+ "PyLong_FromSsize_t",
+ "PySlice_New",
"PyStackRef_AsPyObjectBorrow",
+ "PyStackRef_AsPyObjectNew",
"PyStackRef_AsPyObjectSteal",
+ "PyStackRef_CLEAR",
"PyStackRef_CLOSE",
"PyStackRef_DUP",
- "PyStackRef_CLEAR",
+ "PyStackRef_False",
+ "PyStackRef_FromPyObjectImmortal",
+ "PyStackRef_FromPyObjectNew",
+ "PyStackRef_FromPyObjectSteal",
+ "PyStackRef_Is",
"PyStackRef_IsNull",
+ "PyStackRef_None",
"PyStackRef_TYPE",
- "PyStackRef_False",
"PyStackRef_True",
- "PyStackRef_None",
- "PyStackRef_Is",
- "PyStackRef_FromPyObjectNew",
- "PyStackRef_AsPyObjectNew",
- "PyStackRef_FromPyObjectImmortal",
- "Py_INCREF",
- "_PyManagedDictPointer_IsValues",
- "_PyObject_GetManagedDict",
- "_PyObject_ManagedDictPointer",
- "_PyObject_InlineValues",
- "_PyDictValues_AddToInsertionOrder",
- "Py_DECREF",
- "Py_XDECREF",
- "_Py_DECREF_SPECIALIZED",
- "DECREF_INPUTS_AND_REUSE_FLOAT",
- "PyUnicode_Append",
- "_PyLong_IsZero",
- "Py_SIZE",
- "Py_TYPE",
- "PyList_GET_ITEM",
- "PyList_SET_ITEM",
"PyTuple_GET_ITEM",
- "PyList_GET_SIZE",
"PyTuple_GET_SIZE",
+ "PyType_HasFeature",
+ "PyUnicode_Append",
+ "PyUnicode_Concat",
+ "PyUnicode_GET_LENGTH",
+ "PyUnicode_READ_CHAR",
"Py_ARRAY_LENGTH",
+ "Py_CLEAR",
+ "Py_DECREF",
+ "Py_FatalError",
+ "Py_INCREF",
+ "Py_IS_TYPE",
+ "Py_NewRef",
+ "Py_REFCNT",
+ "Py_SIZE",
+ "Py_TYPE",
+ "Py_UNREACHABLE",
"Py_Unicode_GET_LENGTH",
- "PyUnicode_READ_CHAR",
- "_Py_SINGLETON",
- "PyUnicode_GET_LENGTH",
- "_PyLong_IsCompact",
- "_PyLong_IsNonNegativeCompact",
+ "Py_XDECREF",
+ "_PyCode_CODE",
+ "_PyDictValues_AddToInsertionOrder",
+ "_PyErr_Occurred",
+ "_PyEval_FrameClearAndPop",
+ "_PyFrame_GetCode",
+ "_PyFrame_IsIncomplete",
+ "_PyFrame_PushUnchecked",
+ "_PyFrame_SetStackPointer",
+ "_PyFrame_StackPush",
+ "_PyFunction_SetVersion",
+ "_PyGen_GetGeneratorFromFrame",
+ "_PyInterpreterState_GET",
+ "_PyList_AppendTakeRef",
+ "_PyList_FromStackRefSteal",
+ "_PyList_ITEMS",
+ "_PyLong_Add",
"_PyLong_CompactValue",
"_PyLong_DigitCount",
- "_Py_NewRef",
- "_Py_IsImmortal",
- "PyLong_FromLong",
- "_Py_STR",
- "_PyLong_Add",
+ "_PyLong_IsCompact",
+ "_PyLong_IsNonNegativeCompact",
+ "_PyLong_IsZero",
"_PyLong_Multiply",
"_PyLong_Subtract",
- "Py_NewRef",
- "_PyList_ITEMS",
- "_PyTuple_ITEMS",
- "_PyList_AppendTakeRef",
- "_Py_atomic_load_uintptr_relaxed",
- "_PyFrame_GetCode",
+ "_PyManagedDictPointer_IsValues",
+ "_PyObject_GC_IS_TRACKED",
+ "_PyObject_GC_MAY_BE_TRACKED",
+ "_PyObject_GC_TRACK",
+ "_PyObject_GetManagedDict",
+ "_PyObject_InlineValues",
+ "_PyObject_ManagedDictPointer",
"_PyThreadState_HasStackSpace",
- "_PyUnicode_Equal",
- "_PyFrame_SetStackPointer",
+ "_PyTuple_FromArraySteal",
+ "_PyTuple_FromStackRefSteal",
+ "_PyTuple_ITEMS",
"_PyType_HasFeature",
- "PyUnicode_Concat",
- "PySlice_New",
+ "_PyType_NewManagedObject",
+ "_PyUnicode_Equal",
+ "_PyUnicode_JoinArray",
+ "_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY",
+ "_Py_DECREF_NO_DEALLOC",
+ "_Py_DECREF_SPECIALIZED",
+ "_Py_EnterRecursiveCallTstateUnchecked",
+ "_Py_ID",
+ "_Py_IsImmortal",
+ "_Py_IsImmortalLoose",
"_Py_LeaveRecursiveCallPy",
- "CALL_STAT_INC",
- "STAT_INC",
+ "_Py_LeaveRecursiveCallTstate",
+ "_Py_NewRef",
+ "_Py_SINGLETON",
+ "_Py_STR",
+ "_Py_atomic_load_uintptr_relaxed",
+ "_Py_set_eval_breaker_bit",
+ "advance_backoff_counter",
+ "assert",
+ "backoff_counter_triggers",
+ "initial_temperature_backoff_counter",
"maybe_lltrace_resume_frame",
- "_PyUnicode_JoinArray",
- "_PyEval_FrameClearAndPop",
- "_PyFrame_StackPush",
- "PyCell_New",
- "PyFloat_AS_DOUBLE",
- "_PyFrame_PushUnchecked",
- "Py_FatalError",
- "STACKREFS_TO_PYOBJECTS",
- "STACKREFS_TO_PYOBJECTS_CLEANUP",
- "CONVERSION_FAILED",
- "_PyList_FromStackRefSteal",
- "_PyTuple_FromArraySteal",
- "_PyTuple_FromStackRefSteal",
- "_Py_set_eval_breaker_bit"
+ "restart_backoff_counter",
)
-ESCAPING_FUNCTIONS = (
- "import_name",
- "import_from",
-)
-
-
-def makes_escaping_api_call(instr: parser.InstDef) -> bool:
- if "CALL_INTRINSIC" in instr.name:
- return True
- if instr.name == "_BINARY_OP":
- return True
- tkns = iter(instr.tokens)
- for tkn in tkns:
- if tkn.kind != lexer.IDENTIFIER:
- continue
+def find_stmt_start(node: parser.InstDef, idx: int) -> lexer.Token:
+ assert idx < len(node.block.tokens)
+ while True:
+ tkn = node.block.tokens[idx-1]
+ if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}:
+ break
+ idx -= 1
+ assert idx > 0
+ while node.block.tokens[idx].kind == "COMMENT":
+ idx += 1
+ return node.block.tokens[idx]
+
+
+def find_stmt_end(node: parser.InstDef, idx: int) -> lexer.Token:
+ assert idx < len(node.block.tokens)
+ while True:
+ idx += 1
+ tkn = node.block.tokens[idx]
+ if tkn.kind == "SEMI":
+ return node.block.tokens[idx+1]
+
+def check_escaping_calls(instr: parser.InstDef, escapes: dict[lexer.Token, tuple[lexer.Token, lexer.Token]]) -> None:
+ calls = {escapes[t][0] for t in escapes}
+ in_if = 0
+ tkn_iter = iter(instr.block.tokens)
+ for tkn in tkn_iter:
+ if tkn.kind == "IF":
+ next(tkn_iter)
+ in_if = 1
+ if tkn.kind == "IDENTIFIER" and tkn.text in ("DEOPT_IF", "ERROR_IF"):
+ next(tkn_iter)
+ in_if = 1
+ elif tkn.kind == "LPAREN" and in_if:
+ in_if += 1
+ elif tkn.kind == "RPAREN":
+ if in_if:
+ in_if -= 1
+ elif tkn in calls and in_if:
+ raise analysis_error(f"Escaping call '{tkn.text} in condition", tkn)
+
+def find_escaping_api_calls(instr: parser.InstDef) -> dict[lexer.Token, tuple[lexer.Token, lexer.Token]]:
+ result: dict[lexer.Token, tuple[lexer.Token, lexer.Token]] = {}
+ tokens = instr.block.tokens
+ for idx, tkn in enumerate(tokens):
try:
- next_tkn = next(tkns)
- except StopIteration:
- return False
+ next_tkn = tokens[idx+1]
+ except IndexError:
+ break
+ if tkn.kind == "SWITCH":
+ raise analysis_error(f"switch statements are not supported due to their complex flow control. Sorry.", tkn)
if next_tkn.kind != lexer.LPAREN:
continue
- if tkn.text in ESCAPING_FUNCTIONS:
- return True
- if tkn.text == "tp_vectorcall":
- return True
- if not tkn.text.startswith("Py") and not tkn.text.startswith("_Py"):
- continue
- if tkn.text.endswith("Check"):
- continue
- if tkn.text.startswith("Py_Is"):
- continue
- if tkn.text.endswith("CheckExact"):
- continue
- if tkn.text in NON_ESCAPING_FUNCTIONS:
+ if tkn.kind == lexer.IDENTIFIER:
+ if tkn.text.upper() == tkn.text:
+ # simple macro
+ continue
+ #if not tkn.text.startswith(("Py", "_Py", "monitor")):
+ # continue
+ if tkn.text.startswith(("sym_", "optimize_")):
+ # Optimize functions
+ continue
+ if tkn.text.endswith("Check"):
+ continue
+ if tkn.text.startswith("Py_Is"):
+ continue
+ if tkn.text.endswith("CheckExact"):
+ continue
+ if tkn.text in NON_ESCAPING_FUNCTIONS:
+ continue
+ elif tkn.kind == "RPAREN":
+ prev = tokens[idx-1]
+ if prev.text.endswith("_t") or prev.text == "*" or prev.text == "int":
+ #cast
+ continue
+ elif tkn.kind != "RBRACKET":
continue
- return True
- return False
+ start = find_stmt_start(instr, idx)
+ end = find_stmt_end(instr, idx)
+ result[start] = tkn, end
+ check_escaping_calls(instr, result)
+ return result
EXITS = {
@@ -651,6 +782,7 @@ def effect_depends_on_oparg_1(op: parser.InstDef) -> bool:
def compute_properties(op: parser.InstDef) -> Properties:
+ escaping_calls = find_escaping_api_calls(op)
has_free = (
variable_used(op, "PyCell_New")
or variable_used(op, "PyCell_GetRef")
@@ -671,7 +803,7 @@ def compute_properties(op: parser.InstDef) -> Properties:
error_with_pop = has_error_with_pop(op)
error_without_pop = has_error_without_pop(op)
return Properties(
- escapes=makes_escaping_api_call(op),
+ escaping_calls=escaping_calls,
error_with_pop=error_with_pop,
error_without_pop=error_without_pop,
deopts=deopts_if,
@@ -706,6 +838,7 @@ def make_uop(
stack=analyze_stack(op),
caches=analyze_caches(inputs),
deferred_refs=analyze_deferred_refs(op),
+ output_stores=find_stores_outputs(op),
body=op.block.tokens,
properties=compute_properties(op),
)
@@ -726,6 +859,7 @@ def make_uop(
stack=analyze_stack(op, bit),
caches=analyze_caches(inputs),
deferred_refs=analyze_deferred_refs(op),
+ output_stores=find_stores_outputs(op),
body=op.block.tokens,
properties=properties,
)
@@ -749,6 +883,7 @@ def make_uop(
stack=analyze_stack(op),
caches=analyze_caches(inputs),
deferred_refs=analyze_deferred_refs(op),
+ output_stores=find_stores_outputs(op),
body=op.block.tokens,
properties=properties,
)