diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2022-09-12 11:09:31 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-12 11:09:31 -0600 |
commit | 1756ffd66a38755cd45de51316d66266ae30e132 (patch) | |
tree | ce0d37b6c3c793e20c05b5d4030c7869968579e0 /Tools/c-analyzer/c_parser/preprocessor/gcc.py | |
parent | 662782e95f97d26bd57b3edc6aaf674e30899f44 (diff) | |
download | cpython-1756ffd66a38755cd45de51316d66266ae30e132.tar.gz cpython-1756ffd66a38755cd45de51316d66266ae30e132.zip |
gh-90110: Fix the c-analyzer Tool (gh-96731)
This includes:
* update the whitelists
* fixes so we can stop ignoring some of the files
* ensure Include/cpython/*.h get analyzed
Diffstat (limited to 'Tools/c-analyzer/c_parser/preprocessor/gcc.py')
-rw-r--r-- | Tools/c-analyzer/c_parser/preprocessor/gcc.py | 157 |
1 files changed, 134 insertions, 23 deletions
diff --git a/Tools/c-analyzer/c_parser/preprocessor/gcc.py b/Tools/c-analyzer/c_parser/preprocessor/gcc.py index bb404a487b7..77080225379 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/gcc.py +++ b/Tools/c-analyzer/c_parser/preprocessor/gcc.py @@ -7,7 +7,12 @@ from . import common as _common TOOL = 'gcc' # https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html -LINE_MARKER_RE = re.compile(r'^# (\d+) "([^"]+)"(?: [1234])*$') +# flags: +# 1 start of a new file +# 2 returning to a file (after including another) +# 3 following text comes from a system header file +# 4 following text treated wrapped in implicit extern "C" block +LINE_MARKER_RE = re.compile(r'^# (\d+) "([^"]+)"((?: [1234])*)$') PREPROC_DIRECTIVE_RE = re.compile(r'^\s*#\s*(\w+)\b.*') COMPILER_DIRECTIVE_RE = re.compile(r''' ^ @@ -40,32 +45,112 @@ POST_ARGS = ( ) -def preprocess(filename, incldirs=None, macros=None, samefiles=None): +def preprocess(filename, + incldirs=None, + includes=None, + macros=None, + samefiles=None, + cwd=None, + ): + if not cwd or not os.path.isabs(cwd): + cwd = os.path.abspath(cwd or '.') + filename = _normpath(filename, cwd) text = _common.preprocess( TOOL, filename, incldirs=incldirs, + includes=includes, macros=macros, #preargs=PRE_ARGS, postargs=POST_ARGS, executable=['gcc'], compiler='unix', + cwd=cwd, ) - return _iter_lines(text, filename, samefiles) + return _iter_lines(text, filename, samefiles, cwd) -def _iter_lines(text, filename, samefiles, *, raw=False): +def _iter_lines(text, reqfile, samefiles, cwd, raw=False): lines = iter(text.splitlines()) - # Build the lines and filter out directives. + # The first line is special. + # The next two lines are consistent. + for expected in [ + f'# 1 "{reqfile}"', + '# 1 "<built-in>"', + '# 1 "<command-line>"', + ]: + line = next(lines) + if line != expected: + raise NotImplementedError((line, expected)) + + # Do all the CLI-provided includes. + filter_reqfile = (lambda f: _filter_reqfile(f, reqfile, samefiles)) + make_info = (lambda lno: _common.FileInfo(reqfile, lno)) + last = None + for line in lines: + assert last != reqfile, (last,) + lno, included, flags = _parse_marker_line(line, reqfile) + if not included: + raise NotImplementedError((line,)) + if included == reqfile: + # This will be the last one. + assert not flags, (line, flags) + else: + assert 1 in flags, (line, flags) + yield from _iter_top_include_lines( + lines, + _normpath(included, cwd), + cwd, + filter_reqfile, + make_info, + raw, + ) + last = included + # The last one is always the requested file. + assert included == reqfile, (line,) + + +def _iter_top_include_lines(lines, topfile, cwd, + filter_reqfile, make_info, + raw): partial = 0 # depth - origfile = None + files = [topfile] + # We start at 1 in case there are source lines (including blank onces) + # before the first marker line. Also, we already verified in + # _parse_marker_line() that the preprocessor reported lno as 1. + lno = 1 for line in lines: - m = LINE_MARKER_RE.match(line) - if m: - lno, origfile = m.groups() - lno = int(lno) - elif _filter_orig_file(origfile, filename, samefiles): + if line == '# 1 "<command-line>" 2': + # We're done with this top-level include. + return + + _lno, included, flags = _parse_marker_line(line) + if included: + lno = _lno + included = _normpath(included, cwd) + # We hit a marker line. + if 1 in flags: + # We're entering a file. + # XXX Cycles are unexpected? + #assert included not in files, (line, files) + files.append(included) + elif 2 in flags: + # We're returning to a file. + assert files and included in files, (line, files) + assert included != files[-1], (line, files) + while files[-1] != included: + files.pop() + # XXX How can a file return to line 1? + #assert lno > 1, (line, lno) + else: + # It's the next line from the file. + assert included == files[-1], (line, files) + assert lno > 1, (line, lno) + elif not files: + raise NotImplementedError((line,)) + elif filter_reqfile(files[-1]): + assert lno is not None, (line, files[-1]) if (m := PREPROC_DIRECTIVE_RE.match(line)): name, = m.groups() if name != 'pragma': @@ -74,7 +159,7 @@ def _iter_lines(text, filename, samefiles, *, raw=False): if not raw: line, partial = _strip_directives(line, partial=partial) yield _common.SourceLine( - _common.FileInfo(filename, lno), + make_info(lno), 'source', line or '', None, @@ -82,6 +167,34 @@ def _iter_lines(text, filename, samefiles, *, raw=False): lno += 1 +def _parse_marker_line(line, reqfile=None): + m = LINE_MARKER_RE.match(line) + if not m: + return None, None, None + lno, origfile, flags = m.groups() + lno = int(lno) + assert lno > 0, (line, lno) + assert origfile not in ('<built-in>', '<command-line>'), (line,) + flags = set(int(f) for f in flags.split()) if flags else () + + if 1 in flags: + # We're entering a file. + assert lno == 1, (line, lno) + assert 2 not in flags, (line,) + elif 2 in flags: + # We're returning to a file. + #assert lno > 1, (line, lno) + pass + elif reqfile and origfile == reqfile: + # We're starting the requested file. + assert lno == 1, (line, lno) + assert not flags, (line, flags) + else: + # It's the next line from the file. + assert lno > 1, (line, lno) + return lno, origfile, flags + + def _strip_directives(line, partial=0): # We assume there are no string literals with parens in directive bodies. while partial > 0: @@ -106,18 +219,16 @@ def _strip_directives(line, partial=0): return line, partial -def _filter_orig_file(origfile, current, samefiles): - if origfile == current: +def _filter_reqfile(current, reqfile, samefiles): + if current == reqfile: + return True + if current == '<stdin>': return True - if origfile == '<stdin>': + if current in samefiles: return True - if os.path.isabs(origfile): - return False + return False - for filename in samefiles or (): - if filename.endswith(os.path.sep): - filename += os.path.basename(current) - if origfile == filename: - return True - return False +def _normpath(filename, cwd): + assert cwd + return os.path.normpath(os.path.join(cwd, filename)) |