aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Tools/c-analyzer/c_parser/preprocessor
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/c-analyzer/c_parser/preprocessor')
-rw-r--r--Tools/c-analyzer/c_parser/preprocessor/__init__.py85
-rw-r--r--Tools/c-analyzer/c_parser/preprocessor/common.py9
-rw-r--r--Tools/c-analyzer/c_parser/preprocessor/gcc.py157
-rw-r--r--Tools/c-analyzer/c_parser/preprocessor/pure.py2
4 files changed, 218 insertions, 35 deletions
diff --git a/Tools/c-analyzer/c_parser/preprocessor/__init__.py b/Tools/c-analyzer/c_parser/preprocessor/__init__.py
index c154137bf42..cdc1a4e1269 100644
--- a/Tools/c-analyzer/c_parser/preprocessor/__init__.py
+++ b/Tools/c-analyzer/c_parser/preprocessor/__init__.py
@@ -35,9 +35,11 @@ logger = logging.getLogger(__name__)
def preprocess(source, *,
incldirs=None,
+ includes=None,
macros=None,
samefiles=None,
filename=None,
+ cwd=None,
tool=True,
):
"""...
@@ -45,17 +47,27 @@ def preprocess(source, *,
CWD should be the project root and "source" should be relative.
"""
if tool:
- logger.debug(f'CWD: {os.getcwd()!r}')
- logger.debug(f'incldirs: {incldirs!r}')
- logger.debug(f'macros: {macros!r}')
+ if not cwd:
+ cwd = os.getcwd()
+ logger.debug(f'CWD: {cwd!r}')
+ logger.debug(f'incldirs: {incldirs!r}')
+ logger.debug(f'includes: {includes!r}')
+ logger.debug(f'macros: {macros!r}')
logger.debug(f'samefiles: {samefiles!r}')
_preprocess = _get_preprocessor(tool)
with _good_file(source, filename) as source:
- return _preprocess(source, incldirs, macros, samefiles) or ()
+ return _preprocess(
+ source,
+ incldirs,
+ includes,
+ macros,
+ samefiles,
+ cwd,
+ ) or ()
else:
source, filename = _resolve_source(source, filename)
# We ignore "includes", "macros", etc.
- return _pure.preprocess(source, filename)
+ return _pure.preprocess(source, filename, cwd)
# if _run() returns just the lines:
# text = _run(source)
@@ -72,6 +84,7 @@ def preprocess(source, *,
def get_preprocessor(*,
file_macros=None,
+ file_includes=None,
file_incldirs=None,
file_same=None,
ignore_exc=False,
@@ -80,10 +93,12 @@ def get_preprocessor(*,
_preprocess = preprocess
if file_macros:
file_macros = tuple(_parse_macros(file_macros))
+ if file_includes:
+ file_includes = tuple(_parse_includes(file_includes))
if file_incldirs:
file_incldirs = tuple(_parse_incldirs(file_incldirs))
if file_same:
- file_same = tuple(file_same)
+ file_same = dict(file_same or ())
if not callable(ignore_exc):
ignore_exc = (lambda exc, _ig=ignore_exc: _ig)
@@ -91,16 +106,26 @@ def get_preprocessor(*,
filename = filename.strip()
if file_macros:
macros = list(_resolve_file_values(filename, file_macros))
+ if file_includes:
+ # There's a small chance we could need to filter out any
+ # includes that import "filename". It isn't clear that it's
+ # a problem any longer. If we do end up filtering then
+ # it may make sense to use c_common.fsutil.match_path_tail().
+ includes = [i for i, in _resolve_file_values(filename, file_includes)]
if file_incldirs:
incldirs = [v for v, in _resolve_file_values(filename, file_incldirs)]
+ if file_same:
+ samefiles = _resolve_samefiles(filename, file_same)
def preprocess(**kwargs):
if file_macros and 'macros' not in kwargs:
kwargs['macros'] = macros
+ if file_includes and 'includes' not in kwargs:
+ kwargs['includes'] = includes
if file_incldirs and 'incldirs' not in kwargs:
- kwargs['incldirs'] = [v for v, in _resolve_file_values(filename, file_incldirs)]
- if file_same and 'file_same' not in kwargs:
- kwargs['samefiles'] = file_same
+ kwargs['incldirs'] = incldirs
+ if file_same and 'samefiles' not in kwargs:
+ kwargs['samefiles'] = samefiles
kwargs.setdefault('filename', filename)
with handling_errors(ignore_exc, log_err=log_err):
return _preprocess(filename, **kwargs)
@@ -120,6 +145,11 @@ def _parse_macros(macros):
yield row
+def _parse_includes(includes):
+ for row, srcfile in _parse_table(includes, '\t', 'glob\tinclude', default=None):
+ yield row
+
+
def _parse_incldirs(incldirs):
for row, srcfile in _parse_table(incldirs, '\t', 'glob\tdirname', default=None):
glob, dirname = row
@@ -130,6 +160,43 @@ def _parse_incldirs(incldirs):
yield row
+def _resolve_samefiles(filename, file_same):
+ assert '*' not in filename, (filename,)
+ assert os.path.normpath(filename) == filename, (filename,)
+ _, suffix = os.path.splitext(filename)
+ samefiles = []
+ for patterns, in _resolve_file_values(filename, file_same.items()):
+ for pattern in patterns:
+ same = _resolve_samefile(filename, pattern, suffix)
+ if not same:
+ continue
+ samefiles.append(same)
+ return samefiles
+
+
+def _resolve_samefile(filename, pattern, suffix):
+ if pattern == filename:
+ return None
+ if pattern.endswith(os.path.sep):
+ pattern += f'*{suffix}'
+ assert os.path.normpath(pattern) == pattern, (pattern,)
+ if '*' in os.path.dirname(pattern):
+ raise NotImplementedError((filename, pattern))
+ if '*' not in os.path.basename(pattern):
+ return pattern
+
+ common = os.path.commonpath([filename, pattern])
+ relpattern = pattern[len(common) + len(os.path.sep):]
+ relpatterndir = os.path.dirname(relpattern)
+ relfile = filename[len(common) + len(os.path.sep):]
+ if os.path.basename(pattern) == '*':
+ return os.path.join(common, relpatterndir, relfile)
+ elif os.path.basename(relpattern) == '*' + suffix:
+ return os.path.join(common, relpatterndir, relfile)
+ else:
+ raise NotImplementedError((filename, pattern))
+
+
@contextlib.contextmanager
def handling_errors(ignore_exc=None, *, log_err=None):
try:
diff --git a/Tools/c-analyzer/c_parser/preprocessor/common.py b/Tools/c-analyzer/c_parser/preprocessor/common.py
index 63681025c63..4291a066337 100644
--- a/Tools/c-analyzer/c_parser/preprocessor/common.py
+++ b/Tools/c-analyzer/c_parser/preprocessor/common.py
@@ -44,7 +44,7 @@ def run_cmd(argv, *,
return proc.stdout
-def preprocess(tool, filename, **kwargs):
+def preprocess(tool, filename, cwd=None, **kwargs):
argv = _build_argv(tool, filename, **kwargs)
logger.debug(' '.join(shlex.quote(v) for v in argv))
@@ -59,19 +59,24 @@ def preprocess(tool, filename, **kwargs):
# distutil compiler object's preprocess() method, since that
# one writes to stdout/stderr and it's simpler to do it directly
# through subprocess.
- return run_cmd(argv)
+ return run_cmd(argv, cwd=cwd)
def _build_argv(
tool,
filename,
incldirs=None,
+ includes=None,
macros=None,
preargs=None,
postargs=None,
executable=None,
compiler=None,
):
+ if includes:
+ includes = tuple(f'-include{i}' for i in includes)
+ postargs = (includes + postargs) if postargs else includes
+
compiler = distutils.ccompiler.new_compiler(
compiler=compiler or tool,
)
diff --git a/Tools/c-analyzer/c_parser/preprocessor/gcc.py b/Tools/c-analyzer/c_parser/preprocessor/gcc.py
index bb404a487b7..77080225379 100644
--- a/Tools/c-analyzer/c_parser/preprocessor/gcc.py
+++ b/Tools/c-analyzer/c_parser/preprocessor/gcc.py
@@ -7,7 +7,12 @@ from . import common as _common
TOOL = 'gcc'
# https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
-LINE_MARKER_RE = re.compile(r'^# (\d+) "([^"]+)"(?: [1234])*$')
+# flags:
+# 1 start of a new file
+# 2 returning to a file (after including another)
+# 3 following text comes from a system header file
+# 4 following text treated wrapped in implicit extern "C" block
+LINE_MARKER_RE = re.compile(r'^# (\d+) "([^"]+)"((?: [1234])*)$')
PREPROC_DIRECTIVE_RE = re.compile(r'^\s*#\s*(\w+)\b.*')
COMPILER_DIRECTIVE_RE = re.compile(r'''
^
@@ -40,32 +45,112 @@ POST_ARGS = (
)
-def preprocess(filename, incldirs=None, macros=None, samefiles=None):
+def preprocess(filename,
+ incldirs=None,
+ includes=None,
+ macros=None,
+ samefiles=None,
+ cwd=None,
+ ):
+ if not cwd or not os.path.isabs(cwd):
+ cwd = os.path.abspath(cwd or '.')
+ filename = _normpath(filename, cwd)
text = _common.preprocess(
TOOL,
filename,
incldirs=incldirs,
+ includes=includes,
macros=macros,
#preargs=PRE_ARGS,
postargs=POST_ARGS,
executable=['gcc'],
compiler='unix',
+ cwd=cwd,
)
- return _iter_lines(text, filename, samefiles)
+ return _iter_lines(text, filename, samefiles, cwd)
-def _iter_lines(text, filename, samefiles, *, raw=False):
+def _iter_lines(text, reqfile, samefiles, cwd, raw=False):
lines = iter(text.splitlines())
- # Build the lines and filter out directives.
+ # The first line is special.
+ # The next two lines are consistent.
+ for expected in [
+ f'# 1 "{reqfile}"',
+ '# 1 "<built-in>"',
+ '# 1 "<command-line>"',
+ ]:
+ line = next(lines)
+ if line != expected:
+ raise NotImplementedError((line, expected))
+
+ # Do all the CLI-provided includes.
+ filter_reqfile = (lambda f: _filter_reqfile(f, reqfile, samefiles))
+ make_info = (lambda lno: _common.FileInfo(reqfile, lno))
+ last = None
+ for line in lines:
+ assert last != reqfile, (last,)
+ lno, included, flags = _parse_marker_line(line, reqfile)
+ if not included:
+ raise NotImplementedError((line,))
+ if included == reqfile:
+ # This will be the last one.
+ assert not flags, (line, flags)
+ else:
+ assert 1 in flags, (line, flags)
+ yield from _iter_top_include_lines(
+ lines,
+ _normpath(included, cwd),
+ cwd,
+ filter_reqfile,
+ make_info,
+ raw,
+ )
+ last = included
+ # The last one is always the requested file.
+ assert included == reqfile, (line,)
+
+
+def _iter_top_include_lines(lines, topfile, cwd,
+ filter_reqfile, make_info,
+ raw):
partial = 0 # depth
- origfile = None
+ files = [topfile]
+ # We start at 1 in case there are source lines (including blank onces)
+ # before the first marker line. Also, we already verified in
+ # _parse_marker_line() that the preprocessor reported lno as 1.
+ lno = 1
for line in lines:
- m = LINE_MARKER_RE.match(line)
- if m:
- lno, origfile = m.groups()
- lno = int(lno)
- elif _filter_orig_file(origfile, filename, samefiles):
+ if line == '# 1 "<command-line>" 2':
+ # We're done with this top-level include.
+ return
+
+ _lno, included, flags = _parse_marker_line(line)
+ if included:
+ lno = _lno
+ included = _normpath(included, cwd)
+ # We hit a marker line.
+ if 1 in flags:
+ # We're entering a file.
+ # XXX Cycles are unexpected?
+ #assert included not in files, (line, files)
+ files.append(included)
+ elif 2 in flags:
+ # We're returning to a file.
+ assert files and included in files, (line, files)
+ assert included != files[-1], (line, files)
+ while files[-1] != included:
+ files.pop()
+ # XXX How can a file return to line 1?
+ #assert lno > 1, (line, lno)
+ else:
+ # It's the next line from the file.
+ assert included == files[-1], (line, files)
+ assert lno > 1, (line, lno)
+ elif not files:
+ raise NotImplementedError((line,))
+ elif filter_reqfile(files[-1]):
+ assert lno is not None, (line, files[-1])
if (m := PREPROC_DIRECTIVE_RE.match(line)):
name, = m.groups()
if name != 'pragma':
@@ -74,7 +159,7 @@ def _iter_lines(text, filename, samefiles, *, raw=False):
if not raw:
line, partial = _strip_directives(line, partial=partial)
yield _common.SourceLine(
- _common.FileInfo(filename, lno),
+ make_info(lno),
'source',
line or '',
None,
@@ -82,6 +167,34 @@ def _iter_lines(text, filename, samefiles, *, raw=False):
lno += 1
+def _parse_marker_line(line, reqfile=None):
+ m = LINE_MARKER_RE.match(line)
+ if not m:
+ return None, None, None
+ lno, origfile, flags = m.groups()
+ lno = int(lno)
+ assert lno > 0, (line, lno)
+ assert origfile not in ('<built-in>', '<command-line>'), (line,)
+ flags = set(int(f) for f in flags.split()) if flags else ()
+
+ if 1 in flags:
+ # We're entering a file.
+ assert lno == 1, (line, lno)
+ assert 2 not in flags, (line,)
+ elif 2 in flags:
+ # We're returning to a file.
+ #assert lno > 1, (line, lno)
+ pass
+ elif reqfile and origfile == reqfile:
+ # We're starting the requested file.
+ assert lno == 1, (line, lno)
+ assert not flags, (line, flags)
+ else:
+ # It's the next line from the file.
+ assert lno > 1, (line, lno)
+ return lno, origfile, flags
+
+
def _strip_directives(line, partial=0):
# We assume there are no string literals with parens in directive bodies.
while partial > 0:
@@ -106,18 +219,16 @@ def _strip_directives(line, partial=0):
return line, partial
-def _filter_orig_file(origfile, current, samefiles):
- if origfile == current:
+def _filter_reqfile(current, reqfile, samefiles):
+ if current == reqfile:
+ return True
+ if current == '<stdin>':
return True
- if origfile == '<stdin>':
+ if current in samefiles:
return True
- if os.path.isabs(origfile):
- return False
+ return False
- for filename in samefiles or ():
- if filename.endswith(os.path.sep):
- filename += os.path.basename(current)
- if origfile == filename:
- return True
- return False
+def _normpath(filename, cwd):
+ assert cwd
+ return os.path.normpath(os.path.join(cwd, filename))
diff --git a/Tools/c-analyzer/c_parser/preprocessor/pure.py b/Tools/c-analyzer/c_parser/preprocessor/pure.py
index e971389b188..f94447ad819 100644
--- a/Tools/c-analyzer/c_parser/preprocessor/pure.py
+++ b/Tools/c-analyzer/c_parser/preprocessor/pure.py
@@ -4,7 +4,7 @@ from ..source import (
from . import common as _common
-def preprocess(lines, filename=None):
+def preprocess(lines, filename=None, cwd=None):
if isinstance(lines, str):
with _open_source(lines, filename) as (lines, filename):
yield from preprocess(lines, filename)