aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Tools/clinic/libclinic/cpp.py
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/clinic/libclinic/cpp.py')
-rw-r--r--Tools/clinic/libclinic/cpp.py194
1 files changed, 194 insertions, 0 deletions
diff --git a/Tools/clinic/libclinic/cpp.py b/Tools/clinic/libclinic/cpp.py
new file mode 100644
index 00000000000..e115d65a88e
--- /dev/null
+++ b/Tools/clinic/libclinic/cpp.py
@@ -0,0 +1,194 @@
+import dataclasses as dc
+import re
+import sys
+from typing import NoReturn
+
+from .errors import ParseError
+
+
+__all__ = ["Monitor"]
+
+
+TokenAndCondition = tuple[str, str]
+TokenStack = list[TokenAndCondition]
+
+def negate(condition: str) -> str:
+ """
+ Returns a CPP conditional that is the opposite of the conditional passed in.
+ """
+ if condition.startswith('!'):
+ return condition[1:]
+ return "!" + condition
+
+
+is_a_simple_defined = re.compile(r'^defined\s*\(\s*[A-Za-z0-9_]+\s*\)$').match
+
+
+@dc.dataclass(repr=False)
+class Monitor:
+ """
+ A simple C preprocessor that scans C source and computes, line by line,
+ what the current C preprocessor #if state is.
+
+ Doesn't handle everything--for example, if you have /* inside a C string,
+ without a matching */ (also inside a C string), or with a */ inside a C
+ string but on another line and with preprocessor macros in between...
+ the parser will get lost.
+
+ Anyway this implementation seems to work well enough for the CPython sources.
+ """
+ filename: str
+ _: dc.KW_ONLY
+ verbose: bool = False
+
+ def __post_init__(self) -> None:
+ self.stack: TokenStack = []
+ self.in_comment = False
+ self.continuation: str | None = None
+ self.line_number = 0
+
+ def __repr__(self) -> str:
+ parts = (
+ str(id(self)),
+ f"line={self.line_number}",
+ f"condition={self.condition()!r}"
+ )
+ return f"<clinic.Monitor {' '.join(parts)}>"
+
+ def status(self) -> str:
+ return str(self.line_number).rjust(4) + ": " + self.condition()
+
+ def condition(self) -> str:
+ """
+ Returns the current preprocessor state, as a single #if condition.
+ """
+ return " && ".join(condition for token, condition in self.stack)
+
+ def fail(self, msg: str) -> NoReturn:
+ raise ParseError(msg, filename=self.filename, lineno=self.line_number)
+
+ def writeline(self, line: str) -> None:
+ self.line_number += 1
+ line = line.strip()
+
+ def pop_stack() -> TokenAndCondition:
+ if not self.stack:
+ self.fail(f"#{token} without matching #if / #ifdef / #ifndef!")
+ return self.stack.pop()
+
+ if self.continuation:
+ line = self.continuation + line
+ self.continuation = None
+
+ if not line:
+ return
+
+ if line.endswith('\\'):
+ self.continuation = line[:-1].rstrip() + " "
+ return
+
+ # we have to ignore preprocessor commands inside comments
+ #
+ # we also have to handle this:
+ # /* start
+ # ...
+ # */ /* <-- tricky!
+ # ...
+ # */
+ # and this:
+ # /* start
+ # ...
+ # */ /* also tricky! */
+ if self.in_comment:
+ if '*/' in line:
+ # snip out the comment and continue
+ #
+ # GCC allows
+ # /* comment
+ # */ #include <stdio.h>
+ # maybe other compilers too?
+ _, _, line = line.partition('*/')
+ self.in_comment = False
+
+ while True:
+ if '/*' in line:
+ if self.in_comment:
+ self.fail("Nested block comment!")
+
+ before, _, remainder = line.partition('/*')
+ comment, comment_ends, after = remainder.partition('*/')
+ if comment_ends:
+ # snip out the comment
+ line = before.rstrip() + ' ' + after.lstrip()
+ continue
+ # comment continues to eol
+ self.in_comment = True
+ line = before.rstrip()
+ break
+
+ # we actually have some // comments
+ # (but block comments take precedence)
+ before, line_comment, comment = line.partition('//')
+ if line_comment:
+ line = before.rstrip()
+
+ if not line.startswith('#'):
+ return
+
+ line = line[1:].lstrip()
+ assert line
+
+ fields = line.split()
+ token = fields[0].lower()
+ condition = ' '.join(fields[1:]).strip()
+
+ if token in {'if', 'ifdef', 'ifndef', 'elif'}:
+ if not condition:
+ self.fail(f"Invalid format for #{token} line: no argument!")
+ if token in {'if', 'elif'}:
+ if not is_a_simple_defined(condition):
+ condition = "(" + condition + ")"
+ if token == 'elif':
+ previous_token, previous_condition = pop_stack()
+ self.stack.append((previous_token, negate(previous_condition)))
+ else:
+ fields = condition.split()
+ if len(fields) != 1:
+ self.fail(f"Invalid format for #{token} line: "
+ "should be exactly one argument!")
+ symbol = fields[0]
+ condition = 'defined(' + symbol + ')'
+ if token == 'ifndef':
+ condition = '!' + condition
+ token = 'if'
+
+ self.stack.append((token, condition))
+
+ elif token == 'else':
+ previous_token, previous_condition = pop_stack()
+ self.stack.append((previous_token, negate(previous_condition)))
+
+ elif token == 'endif':
+ while pop_stack()[0] != 'if':
+ pass
+
+ else:
+ return
+
+ if self.verbose:
+ print(self.status())
+
+
+def _main(filenames: list[str] | None = None) -> None:
+ filenames = filenames or sys.argv[1:]
+ for filename in filenames:
+ with open(filename) as f:
+ cpp = Monitor(filename, verbose=True)
+ print()
+ print(filename)
+ for line in f:
+ cpp.writeline(line)
+
+
+if __name__ == '__main__':
+ _main()