diff options
Diffstat (limited to 'Tools/clinic/libclinic/cpp.py')
-rw-r--r-- | Tools/clinic/libclinic/cpp.py | 194 |
1 files changed, 194 insertions, 0 deletions
diff --git a/Tools/clinic/libclinic/cpp.py b/Tools/clinic/libclinic/cpp.py new file mode 100644 index 00000000000..e115d65a88e --- /dev/null +++ b/Tools/clinic/libclinic/cpp.py @@ -0,0 +1,194 @@ +import dataclasses as dc +import re +import sys +from typing import NoReturn + +from .errors import ParseError + + +__all__ = ["Monitor"] + + +TokenAndCondition = tuple[str, str] +TokenStack = list[TokenAndCondition] + +def negate(condition: str) -> str: + """ + Returns a CPP conditional that is the opposite of the conditional passed in. + """ + if condition.startswith('!'): + return condition[1:] + return "!" + condition + + +is_a_simple_defined = re.compile(r'^defined\s*\(\s*[A-Za-z0-9_]+\s*\)$').match + + +@dc.dataclass(repr=False) +class Monitor: + """ + A simple C preprocessor that scans C source and computes, line by line, + what the current C preprocessor #if state is. + + Doesn't handle everything--for example, if you have /* inside a C string, + without a matching */ (also inside a C string), or with a */ inside a C + string but on another line and with preprocessor macros in between... + the parser will get lost. + + Anyway this implementation seems to work well enough for the CPython sources. + """ + filename: str + _: dc.KW_ONLY + verbose: bool = False + + def __post_init__(self) -> None: + self.stack: TokenStack = [] + self.in_comment = False + self.continuation: str | None = None + self.line_number = 0 + + def __repr__(self) -> str: + parts = ( + str(id(self)), + f"line={self.line_number}", + f"condition={self.condition()!r}" + ) + return f"<clinic.Monitor {' '.join(parts)}>" + + def status(self) -> str: + return str(self.line_number).rjust(4) + ": " + self.condition() + + def condition(self) -> str: + """ + Returns the current preprocessor state, as a single #if condition. + """ + return " && ".join(condition for token, condition in self.stack) + + def fail(self, msg: str) -> NoReturn: + raise ParseError(msg, filename=self.filename, lineno=self.line_number) + + def writeline(self, line: str) -> None: + self.line_number += 1 + line = line.strip() + + def pop_stack() -> TokenAndCondition: + if not self.stack: + self.fail(f"#{token} without matching #if / #ifdef / #ifndef!") + return self.stack.pop() + + if self.continuation: + line = self.continuation + line + self.continuation = None + + if not line: + return + + if line.endswith('\\'): + self.continuation = line[:-1].rstrip() + " " + return + + # we have to ignore preprocessor commands inside comments + # + # we also have to handle this: + # /* start + # ... + # */ /* <-- tricky! + # ... + # */ + # and this: + # /* start + # ... + # */ /* also tricky! */ + if self.in_comment: + if '*/' in line: + # snip out the comment and continue + # + # GCC allows + # /* comment + # */ #include <stdio.h> + # maybe other compilers too? + _, _, line = line.partition('*/') + self.in_comment = False + + while True: + if '/*' in line: + if self.in_comment: + self.fail("Nested block comment!") + + before, _, remainder = line.partition('/*') + comment, comment_ends, after = remainder.partition('*/') + if comment_ends: + # snip out the comment + line = before.rstrip() + ' ' + after.lstrip() + continue + # comment continues to eol + self.in_comment = True + line = before.rstrip() + break + + # we actually have some // comments + # (but block comments take precedence) + before, line_comment, comment = line.partition('//') + if line_comment: + line = before.rstrip() + + if not line.startswith('#'): + return + + line = line[1:].lstrip() + assert line + + fields = line.split() + token = fields[0].lower() + condition = ' '.join(fields[1:]).strip() + + if token in {'if', 'ifdef', 'ifndef', 'elif'}: + if not condition: + self.fail(f"Invalid format for #{token} line: no argument!") + if token in {'if', 'elif'}: + if not is_a_simple_defined(condition): + condition = "(" + condition + ")" + if token == 'elif': + previous_token, previous_condition = pop_stack() + self.stack.append((previous_token, negate(previous_condition))) + else: + fields = condition.split() + if len(fields) != 1: + self.fail(f"Invalid format for #{token} line: " + "should be exactly one argument!") + symbol = fields[0] + condition = 'defined(' + symbol + ')' + if token == 'ifndef': + condition = '!' + condition + token = 'if' + + self.stack.append((token, condition)) + + elif token == 'else': + previous_token, previous_condition = pop_stack() + self.stack.append((previous_token, negate(previous_condition))) + + elif token == 'endif': + while pop_stack()[0] != 'if': + pass + + else: + return + + if self.verbose: + print(self.status()) + + +def _main(filenames: list[str] | None = None) -> None: + filenames = filenames or sys.argv[1:] + for filename in filenames: + with open(filename) as f: + cpp = Monitor(filename, verbose=True) + print() + print(filename) + for line in f: + cpp.writeline(line) + + +if __name__ == '__main__': + _main() |