diff options
Diffstat (limited to 'Tools/c-analyzer/c_parser/preprocessor')
-rw-r--r-- | Tools/c-analyzer/c_parser/preprocessor/__init__.py | 85 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/preprocessor/common.py | 9 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/preprocessor/gcc.py | 157 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/preprocessor/pure.py | 2 |
4 files changed, 218 insertions, 35 deletions
diff --git a/Tools/c-analyzer/c_parser/preprocessor/__init__.py b/Tools/c-analyzer/c_parser/preprocessor/__init__.py index c154137bf42..cdc1a4e1269 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/__init__.py +++ b/Tools/c-analyzer/c_parser/preprocessor/__init__.py @@ -35,9 +35,11 @@ logger = logging.getLogger(__name__) def preprocess(source, *, incldirs=None, + includes=None, macros=None, samefiles=None, filename=None, + cwd=None, tool=True, ): """... @@ -45,17 +47,27 @@ def preprocess(source, *, CWD should be the project root and "source" should be relative. """ if tool: - logger.debug(f'CWD: {os.getcwd()!r}') - logger.debug(f'incldirs: {incldirs!r}') - logger.debug(f'macros: {macros!r}') + if not cwd: + cwd = os.getcwd() + logger.debug(f'CWD: {cwd!r}') + logger.debug(f'incldirs: {incldirs!r}') + logger.debug(f'includes: {includes!r}') + logger.debug(f'macros: {macros!r}') logger.debug(f'samefiles: {samefiles!r}') _preprocess = _get_preprocessor(tool) with _good_file(source, filename) as source: - return _preprocess(source, incldirs, macros, samefiles) or () + return _preprocess( + source, + incldirs, + includes, + macros, + samefiles, + cwd, + ) or () else: source, filename = _resolve_source(source, filename) # We ignore "includes", "macros", etc. - return _pure.preprocess(source, filename) + return _pure.preprocess(source, filename, cwd) # if _run() returns just the lines: # text = _run(source) @@ -72,6 +84,7 @@ def preprocess(source, *, def get_preprocessor(*, file_macros=None, + file_includes=None, file_incldirs=None, file_same=None, ignore_exc=False, @@ -80,10 +93,12 @@ def get_preprocessor(*, _preprocess = preprocess if file_macros: file_macros = tuple(_parse_macros(file_macros)) + if file_includes: + file_includes = tuple(_parse_includes(file_includes)) if file_incldirs: file_incldirs = tuple(_parse_incldirs(file_incldirs)) if file_same: - file_same = tuple(file_same) + file_same = dict(file_same or ()) if not callable(ignore_exc): ignore_exc = (lambda exc, _ig=ignore_exc: _ig) @@ -91,16 +106,26 @@ def get_preprocessor(*, filename = filename.strip() if file_macros: macros = list(_resolve_file_values(filename, file_macros)) + if file_includes: + # There's a small chance we could need to filter out any + # includes that import "filename". It isn't clear that it's + # a problem any longer. If we do end up filtering then + # it may make sense to use c_common.fsutil.match_path_tail(). + includes = [i for i, in _resolve_file_values(filename, file_includes)] if file_incldirs: incldirs = [v for v, in _resolve_file_values(filename, file_incldirs)] + if file_same: + samefiles = _resolve_samefiles(filename, file_same) def preprocess(**kwargs): if file_macros and 'macros' not in kwargs: kwargs['macros'] = macros + if file_includes and 'includes' not in kwargs: + kwargs['includes'] = includes if file_incldirs and 'incldirs' not in kwargs: - kwargs['incldirs'] = [v for v, in _resolve_file_values(filename, file_incldirs)] - if file_same and 'file_same' not in kwargs: - kwargs['samefiles'] = file_same + kwargs['incldirs'] = incldirs + if file_same and 'samefiles' not in kwargs: + kwargs['samefiles'] = samefiles kwargs.setdefault('filename', filename) with handling_errors(ignore_exc, log_err=log_err): return _preprocess(filename, **kwargs) @@ -120,6 +145,11 @@ def _parse_macros(macros): yield row +def _parse_includes(includes): + for row, srcfile in _parse_table(includes, '\t', 'glob\tinclude', default=None): + yield row + + def _parse_incldirs(incldirs): for row, srcfile in _parse_table(incldirs, '\t', 'glob\tdirname', default=None): glob, dirname = row @@ -130,6 +160,43 @@ def _parse_incldirs(incldirs): yield row +def _resolve_samefiles(filename, file_same): + assert '*' not in filename, (filename,) + assert os.path.normpath(filename) == filename, (filename,) + _, suffix = os.path.splitext(filename) + samefiles = [] + for patterns, in _resolve_file_values(filename, file_same.items()): + for pattern in patterns: + same = _resolve_samefile(filename, pattern, suffix) + if not same: + continue + samefiles.append(same) + return samefiles + + +def _resolve_samefile(filename, pattern, suffix): + if pattern == filename: + return None + if pattern.endswith(os.path.sep): + pattern += f'*{suffix}' + assert os.path.normpath(pattern) == pattern, (pattern,) + if '*' in os.path.dirname(pattern): + raise NotImplementedError((filename, pattern)) + if '*' not in os.path.basename(pattern): + return pattern + + common = os.path.commonpath([filename, pattern]) + relpattern = pattern[len(common) + len(os.path.sep):] + relpatterndir = os.path.dirname(relpattern) + relfile = filename[len(common) + len(os.path.sep):] + if os.path.basename(pattern) == '*': + return os.path.join(common, relpatterndir, relfile) + elif os.path.basename(relpattern) == '*' + suffix: + return os.path.join(common, relpatterndir, relfile) + else: + raise NotImplementedError((filename, pattern)) + + @contextlib.contextmanager def handling_errors(ignore_exc=None, *, log_err=None): try: diff --git a/Tools/c-analyzer/c_parser/preprocessor/common.py b/Tools/c-analyzer/c_parser/preprocessor/common.py index 63681025c63..4291a066337 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/common.py +++ b/Tools/c-analyzer/c_parser/preprocessor/common.py @@ -44,7 +44,7 @@ def run_cmd(argv, *, return proc.stdout -def preprocess(tool, filename, **kwargs): +def preprocess(tool, filename, cwd=None, **kwargs): argv = _build_argv(tool, filename, **kwargs) logger.debug(' '.join(shlex.quote(v) for v in argv)) @@ -59,19 +59,24 @@ def preprocess(tool, filename, **kwargs): # distutil compiler object's preprocess() method, since that # one writes to stdout/stderr and it's simpler to do it directly # through subprocess. - return run_cmd(argv) + return run_cmd(argv, cwd=cwd) def _build_argv( tool, filename, incldirs=None, + includes=None, macros=None, preargs=None, postargs=None, executable=None, compiler=None, ): + if includes: + includes = tuple(f'-include{i}' for i in includes) + postargs = (includes + postargs) if postargs else includes + compiler = distutils.ccompiler.new_compiler( compiler=compiler or tool, ) diff --git a/Tools/c-analyzer/c_parser/preprocessor/gcc.py b/Tools/c-analyzer/c_parser/preprocessor/gcc.py index bb404a487b7..77080225379 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/gcc.py +++ b/Tools/c-analyzer/c_parser/preprocessor/gcc.py @@ -7,7 +7,12 @@ from . import common as _common TOOL = 'gcc' # https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html -LINE_MARKER_RE = re.compile(r'^# (\d+) "([^"]+)"(?: [1234])*$') +# flags: +# 1 start of a new file +# 2 returning to a file (after including another) +# 3 following text comes from a system header file +# 4 following text treated wrapped in implicit extern "C" block +LINE_MARKER_RE = re.compile(r'^# (\d+) "([^"]+)"((?: [1234])*)$') PREPROC_DIRECTIVE_RE = re.compile(r'^\s*#\s*(\w+)\b.*') COMPILER_DIRECTIVE_RE = re.compile(r''' ^ @@ -40,32 +45,112 @@ POST_ARGS = ( ) -def preprocess(filename, incldirs=None, macros=None, samefiles=None): +def preprocess(filename, + incldirs=None, + includes=None, + macros=None, + samefiles=None, + cwd=None, + ): + if not cwd or not os.path.isabs(cwd): + cwd = os.path.abspath(cwd or '.') + filename = _normpath(filename, cwd) text = _common.preprocess( TOOL, filename, incldirs=incldirs, + includes=includes, macros=macros, #preargs=PRE_ARGS, postargs=POST_ARGS, executable=['gcc'], compiler='unix', + cwd=cwd, ) - return _iter_lines(text, filename, samefiles) + return _iter_lines(text, filename, samefiles, cwd) -def _iter_lines(text, filename, samefiles, *, raw=False): +def _iter_lines(text, reqfile, samefiles, cwd, raw=False): lines = iter(text.splitlines()) - # Build the lines and filter out directives. + # The first line is special. + # The next two lines are consistent. + for expected in [ + f'# 1 "{reqfile}"', + '# 1 "<built-in>"', + '# 1 "<command-line>"', + ]: + line = next(lines) + if line != expected: + raise NotImplementedError((line, expected)) + + # Do all the CLI-provided includes. + filter_reqfile = (lambda f: _filter_reqfile(f, reqfile, samefiles)) + make_info = (lambda lno: _common.FileInfo(reqfile, lno)) + last = None + for line in lines: + assert last != reqfile, (last,) + lno, included, flags = _parse_marker_line(line, reqfile) + if not included: + raise NotImplementedError((line,)) + if included == reqfile: + # This will be the last one. + assert not flags, (line, flags) + else: + assert 1 in flags, (line, flags) + yield from _iter_top_include_lines( + lines, + _normpath(included, cwd), + cwd, + filter_reqfile, + make_info, + raw, + ) + last = included + # The last one is always the requested file. + assert included == reqfile, (line,) + + +def _iter_top_include_lines(lines, topfile, cwd, + filter_reqfile, make_info, + raw): partial = 0 # depth - origfile = None + files = [topfile] + # We start at 1 in case there are source lines (including blank onces) + # before the first marker line. Also, we already verified in + # _parse_marker_line() that the preprocessor reported lno as 1. + lno = 1 for line in lines: - m = LINE_MARKER_RE.match(line) - if m: - lno, origfile = m.groups() - lno = int(lno) - elif _filter_orig_file(origfile, filename, samefiles): + if line == '# 1 "<command-line>" 2': + # We're done with this top-level include. + return + + _lno, included, flags = _parse_marker_line(line) + if included: + lno = _lno + included = _normpath(included, cwd) + # We hit a marker line. + if 1 in flags: + # We're entering a file. + # XXX Cycles are unexpected? + #assert included not in files, (line, files) + files.append(included) + elif 2 in flags: + # We're returning to a file. + assert files and included in files, (line, files) + assert included != files[-1], (line, files) + while files[-1] != included: + files.pop() + # XXX How can a file return to line 1? + #assert lno > 1, (line, lno) + else: + # It's the next line from the file. + assert included == files[-1], (line, files) + assert lno > 1, (line, lno) + elif not files: + raise NotImplementedError((line,)) + elif filter_reqfile(files[-1]): + assert lno is not None, (line, files[-1]) if (m := PREPROC_DIRECTIVE_RE.match(line)): name, = m.groups() if name != 'pragma': @@ -74,7 +159,7 @@ def _iter_lines(text, filename, samefiles, *, raw=False): if not raw: line, partial = _strip_directives(line, partial=partial) yield _common.SourceLine( - _common.FileInfo(filename, lno), + make_info(lno), 'source', line or '', None, @@ -82,6 +167,34 @@ def _iter_lines(text, filename, samefiles, *, raw=False): lno += 1 +def _parse_marker_line(line, reqfile=None): + m = LINE_MARKER_RE.match(line) + if not m: + return None, None, None + lno, origfile, flags = m.groups() + lno = int(lno) + assert lno > 0, (line, lno) + assert origfile not in ('<built-in>', '<command-line>'), (line,) + flags = set(int(f) for f in flags.split()) if flags else () + + if 1 in flags: + # We're entering a file. + assert lno == 1, (line, lno) + assert 2 not in flags, (line,) + elif 2 in flags: + # We're returning to a file. + #assert lno > 1, (line, lno) + pass + elif reqfile and origfile == reqfile: + # We're starting the requested file. + assert lno == 1, (line, lno) + assert not flags, (line, flags) + else: + # It's the next line from the file. + assert lno > 1, (line, lno) + return lno, origfile, flags + + def _strip_directives(line, partial=0): # We assume there are no string literals with parens in directive bodies. while partial > 0: @@ -106,18 +219,16 @@ def _strip_directives(line, partial=0): return line, partial -def _filter_orig_file(origfile, current, samefiles): - if origfile == current: +def _filter_reqfile(current, reqfile, samefiles): + if current == reqfile: + return True + if current == '<stdin>': return True - if origfile == '<stdin>': + if current in samefiles: return True - if os.path.isabs(origfile): - return False + return False - for filename in samefiles or (): - if filename.endswith(os.path.sep): - filename += os.path.basename(current) - if origfile == filename: - return True - return False +def _normpath(filename, cwd): + assert cwd + return os.path.normpath(os.path.join(cwd, filename)) diff --git a/Tools/c-analyzer/c_parser/preprocessor/pure.py b/Tools/c-analyzer/c_parser/preprocessor/pure.py index e971389b188..f94447ad819 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/pure.py +++ b/Tools/c-analyzer/c_parser/preprocessor/pure.py @@ -4,7 +4,7 @@ from ..source import ( from . import common as _common -def preprocess(lines, filename=None): +def preprocess(lines, filename=None, cwd=None): if isinstance(lines, str): with _open_source(lines, filename) as (lines, filename): yield from preprocess(lines, filename) |