aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Tools/c-analyzer/c_parser
diff options
context:
space:
mode:
authorEric Snow <ericsnowcurrently@gmail.com>2020-10-30 15:46:52 -0600
committerGitHub <noreply@github.com>2020-10-30 15:46:52 -0600
commit4fe72090deb7fb7bc09bfa56c92f6b3b0967d395 (patch)
tree264fb8dec036a697595ff539ce9d3e4433224d47 /Tools/c-analyzer/c_parser
parentb9ee4af4c643a323779fd7076e80b29d611f2709 (diff)
downloadcpython-4fe72090deb7fb7bc09bfa56c92f6b3b0967d395.tar.gz
cpython-4fe72090deb7fb7bc09bfa56c92f6b3b0967d395.zip
bpo-36876: Small adjustments to the C-analyzer tool. (GH-23045)
This is a little bit of clean-up, small fixes, and additional helpers prior to building an updated & accurate list of globals to eliminate.
Diffstat (limited to 'Tools/c-analyzer/c_parser')
-rw-r--r--Tools/c-analyzer/c_parser/datafiles.py2
-rw-r--r--Tools/c-analyzer/c_parser/info.py166
-rw-r--r--Tools/c-analyzer/c_parser/match.py177
-rw-r--r--Tools/c-analyzer/c_parser/parser/__init__.py6
-rw-r--r--Tools/c-analyzer/c_parser/parser/_info.py15
-rw-r--r--Tools/c-analyzer/c_parser/parser/_regexes.py3
6 files changed, 236 insertions, 133 deletions
diff --git a/Tools/c-analyzer/c_parser/datafiles.py b/Tools/c-analyzer/c_parser/datafiles.py
index 5bdb946b177..cdd69b1f9b2 100644
--- a/Tools/c-analyzer/c_parser/datafiles.py
+++ b/Tools/c-analyzer/c_parser/datafiles.py
@@ -92,7 +92,7 @@ def write_decls_tsv(decls, outfile, extracolumns=None, *,
**kwargs
):
# XXX Move the row rendering here.
- _write_decls_tsv(rows, outfile, extracolumns, relroot, kwargs)
+ _write_decls_tsv(decls, outfile, extracolumns, relroot, kwargs)
def _iter_decls_tsv(infile, extracolumns=None, relroot=None):
diff --git a/Tools/c-analyzer/c_parser/info.py b/Tools/c-analyzer/c_parser/info.py
index a07ce2e0ccb..798a45d2e08 100644
--- a/Tools/c-analyzer/c_parser/info.py
+++ b/Tools/c-analyzer/c_parser/info.py
@@ -7,85 +7,12 @@ from c_common.clsutil import classonly
import c_common.misc as _misc
import c_common.strutil as _strutil
import c_common.tables as _tables
-from .parser._regexes import SIMPLE_TYPE
+from .parser._regexes import SIMPLE_TYPE, _STORAGE
FIXED_TYPE = _misc.Labeled('FIXED_TYPE')
-POTS_REGEX = re.compile(rf'^{SIMPLE_TYPE}$', re.VERBOSE)
-
-
-def is_pots(typespec):
- if not typespec:
- return None
- if type(typespec) is not str:
- _, _, _, typespec, _ = get_parsed_vartype(typespec)
- return POTS_REGEX.match(typespec) is not None
-
-
-def is_funcptr(vartype):
- if not vartype:
- return None
- _, _, _, _, abstract = get_parsed_vartype(vartype)
- return _is_funcptr(abstract)
-
-
-def _is_funcptr(declstr):
- if not declstr:
- return None
- # XXX Support "(<name>*)(".
- return '(*)(' in declstr.replace(' ', '')
-
-
-def is_exported_symbol(decl):
- _, storage, _, _, _ = get_parsed_vartype(decl)
- raise NotImplementedError
-
-
-def is_process_global(vardecl):
- kind, storage, _, _, _ = get_parsed_vartype(vardecl)
- if kind is not KIND.VARIABLE:
- raise NotImplementedError(vardecl)
- if 'static' in (storage or ''):
- return True
-
- if hasattr(vardecl, 'parent'):
- parent = vardecl.parent
- else:
- parent = vardecl.get('parent')
- return not parent
-
-
-def is_fixed_type(vardecl):
- if not vardecl:
- return None
- _, _, _, typespec, abstract = get_parsed_vartype(vardecl)
- if 'typeof' in typespec:
- raise NotImplementedError(vardecl)
- elif not abstract:
- return True
-
- if '*' not in abstract:
- # XXX What about []?
- return True
- elif _is_funcptr(abstract):
- return True
- else:
- for after in abstract.split('*')[1:]:
- if not after.lstrip().startswith('const'):
- return False
- else:
- return True
-
-
-def is_immutable(vardecl):
- if not vardecl:
- return None
- if not is_fixed_type(vardecl):
- return False
- _, _, typequal, _, _ = get_parsed_vartype(vardecl)
- # If there, it can only be "const" or "volatile".
- return typequal == 'const'
+STORAGE = frozenset(_STORAGE)
#############################
@@ -214,58 +141,8 @@ KIND._GROUPS = {
KIND._GROUPS.update((k.value, {k}) for k in KIND)
-# The module-level kind-related helpers (below) deal with <item>.kind:
-
-def is_type_decl(kind):
- # Handle ParsedItem, Declaration, etc..
- kind = getattr(kind, 'kind', kind)
- return KIND.is_type_decl(kind)
-
-
-def is_decl(kind):
- # Handle ParsedItem, Declaration, etc..
- kind = getattr(kind, 'kind', kind)
- return KIND.is_decl(kind)
-
-
-def filter_by_kind(items, kind):
- if kind == 'type':
- kinds = KIND._TYPE_DECLS
- elif kind == 'decl':
- kinds = KIND._TYPE_DECLS
- try:
- okay = kind in KIND
- except TypeError:
- kinds = set(kind)
- else:
- kinds = {kind} if okay else set(kind)
- for item in items:
- if item.kind in kinds:
- yield item
-
-
-def collate_by_kind(items):
- collated = {kind: [] for kind in KIND}
- for item in items:
- try:
- collated[item.kind].append(item)
- except KeyError:
- raise ValueError(f'unsupported kind in {item!r}')
- return collated
-
-
-def get_kind_group(kind):
- # Handle ParsedItem, Declaration, etc..
- kind = getattr(kind, 'kind', kind)
- return KIND.get_group(kind)
-
-
-def collate_by_kind_group(items):
- collated = {KIND.get_group(k): [] for k in KIND}
- for item in items:
- group = KIND.get_group(item.kind)
- collated[group].append(item)
- return collated
+def get_kind_group(item):
+ return KIND.get_group(item.kind)
#############################
@@ -484,6 +361,27 @@ def get_parsed_vartype(decl):
return kind, storage, typequal, typespec, abstract
+def get_default_storage(decl):
+ if decl.kind not in (KIND.VARIABLE, KIND.FUNCTION):
+ return None
+ return 'extern' if decl.parent is None else 'auto'
+
+
+def get_effective_storage(decl, *, default=None):
+ # Note that "static" limits access to just that C module
+ # and "extern" (the default for module-level) allows access
+ # outside the C module.
+ if default is None:
+ default = get_default_storage(decl)
+ if default is None:
+ return None
+ try:
+ storage = decl.storage
+ except AttributeError:
+ storage, _ = _get_vartype(decl.data)
+ return storage or default
+
+
#############################
# high-level
@@ -997,7 +895,7 @@ class Variable(Declaration):
def __init__(self, file, name, data, parent=None, storage=None):
super().__init__(file, name, data, parent,
- _extra={'storage': storage},
+ _extra={'storage': storage or None},
_shortkey=f'({parent.name}).{name}' if parent else name,
_key=(str(file),
# Tilde comes after all other ascii characters.
@@ -1005,6 +903,11 @@ class Variable(Declaration):
name,
),
)
+ if storage:
+ if storage not in STORAGE:
+ # The parser must need an update.
+ raise NotImplementedError(storage)
+ # Otherwise we trust the compiler to have validated it.
@property
def vartype(self):
@@ -1413,6 +1316,13 @@ def resolve_parsed(parsed):
return cls.from_parsed(parsed)
+def set_flag(item, name, value):
+ try:
+ setattr(item, name, value)
+ except AttributeError:
+ object.__setattr__(item, name, value)
+
+
#############################
# composite
diff --git a/Tools/c-analyzer/c_parser/match.py b/Tools/c-analyzer/c_parser/match.py
new file mode 100644
index 00000000000..3b5068fd11b
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/match.py
@@ -0,0 +1,177 @@
+import re
+
+from . import info as _info
+from .parser._regexes import SIMPLE_TYPE
+
+
+_KIND = _info.KIND
+
+
+def match_storage(decl, expected):
+ default = _info.get_default_storage(decl)
+ #assert default
+ if expected is None:
+ expected = {default}
+ elif isinstance(expected, str):
+ expected = {expected or default}
+ elif not expected:
+ expected = _info.STORAGE
+ else:
+ expected = {v or default for v in expected}
+ storage = _info.get_effective_storage(decl, default=default)
+ return storage in expected
+
+
+##################################
+# decl matchers
+
+def is_type_decl(item):
+ return _KIND.is_type_decl(item.kind)
+
+
+def is_decl(item):
+ return _KIND.is_decl(item.kind)
+
+
+def is_pots(typespec, *,
+ _regex=re.compile(rf'^{SIMPLE_TYPE}$', re.VERBOSE),
+ ):
+
+ if not typespec:
+ return None
+ if type(typespec) is not str:
+ _, _, _, typespec, _ = _info.get_parsed_vartype(typespec)
+ return _regex.match(typespec) is not None
+
+
+def is_funcptr(vartype):
+ if not vartype:
+ return None
+ _, _, _, _, abstract = _info.get_parsed_vartype(vartype)
+ return _is_funcptr(abstract)
+
+
+def _is_funcptr(declstr):
+ if not declstr:
+ return None
+ # XXX Support "(<name>*)(".
+ return '(*)(' in declstr.replace(' ', '')
+
+
+def is_forward_decl(decl):
+ if decl.kind is _KIND.TYPEDEF:
+ return False
+ elif is_type_decl(decl):
+ return not decl.data
+ elif decl.kind is _KIND.FUNCTION:
+ # XXX This doesn't work with ParsedItem.
+ return decl.signature.isforward
+ elif decl.kind is _KIND.VARIABLE:
+ # No var decls are considered forward (or all are...).
+ return False
+ else:
+ raise NotImplementedError(decl)
+
+
+def can_have_symbol(decl):
+ return decl.kind in (_KIND.VARIABLE, _KIND.FUNCTION)
+
+
+def has_external_symbol(decl):
+ if not can_have_symbol(decl):
+ return False
+ if _info.get_effective_storage(decl) != 'extern':
+ return False
+ if decl.kind is _KIND.FUNCTION:
+ return not decl.signature.isforward
+ else:
+ # It must be a variable, which can only be implicitly extern here.
+ return decl.storage != 'extern'
+
+
+def has_internal_symbol(decl):
+ if not can_have_symbol(decl):
+ return False
+ return _info.get_actual_storage(decl) == 'static'
+
+
+def is_external_reference(decl):
+ if not can_have_symbol(decl):
+ return False
+ # We have to check the declared storage rather tnan the effective.
+ if decl.storage != 'extern':
+ return False
+ if decl.kind is _KIND.FUNCTION:
+ return decl.signature.isforward
+ # Otherwise it's a variable.
+ return True
+
+
+def is_local_var(decl):
+ if not decl.kind is _KIND.VARIABLE:
+ return False
+ return True if decl.parent else False
+
+
+def is_global_var(decl):
+ if not decl.kind is _KIND.VARIABLE:
+ return False
+ return False if decl.parent else True
+
+
+##################################
+# filtering with matchers
+
+def filter_by_kind(items, kind):
+ if kind == 'type':
+ kinds = _KIND._TYPE_DECLS
+ elif kind == 'decl':
+ kinds = _KIND._TYPE_DECLS
+ try:
+ okay = kind in _KIND
+ except TypeError:
+ kinds = set(kind)
+ else:
+ kinds = {kind} if okay else set(kind)
+ for item in items:
+ if item.kind in kinds:
+ yield item
+
+
+##################################
+# grouping with matchers
+
+def group_by_category(decls, categories, *, ignore_non_match=True):
+ collated = {}
+ for decl in decls:
+ # Matchers should be mutually exclusive. (First match wins.)
+ for category, match in categories.items():
+ if match(decl):
+ if category not in collated:
+ collated[category] = [decl]
+ else:
+ collated[category].append(decl)
+ break
+ else:
+ if not ignore_non_match:
+ raise Exception(f'no match for {decl!r}')
+ return collated
+
+
+def group_by_kind(items):
+ collated = {kind: [] for kind in _KIND}
+ for item in items:
+ try:
+ collated[item.kind].append(item)
+ except KeyError:
+ raise ValueError(f'unsupported kind in {item!r}')
+ return collated
+
+
+def group_by_kinds(items):
+ # Collate into kind groups (decl, type, etc.).
+ collated = {_KIND.get_group(k): [] for k in _KIND}
+ for item in items:
+ group = _KIND.get_group(item.kind)
+ collated[group].append(item)
+ return collated
diff --git a/Tools/c-analyzer/c_parser/parser/__init__.py b/Tools/c-analyzer/c_parser/parser/__init__.py
index 7cb34caf09e..4b201c63540 100644
--- a/Tools/c-analyzer/c_parser/parser/__init__.py
+++ b/Tools/c-analyzer/c_parser/parser/__init__.py
@@ -163,6 +163,8 @@ def _parse(srclines, anon_name):
def _iter_source(lines, *, maxtext=20_000, maxlines=700, showtext=False):
+ maxtext = maxtext if maxtext and maxtext > 0 else None
+ maxlines = maxlines if maxlines and maxlines > 0 else None
filestack = []
allinfo = {}
# "lines" should be (fileinfo, data), as produced by the preprocessor code.
@@ -181,9 +183,7 @@ def _iter_source(lines, *, maxtext=20_000, maxlines=700, showtext=False):
_logger.debug(f'-> {line}')
srcinfo._add_line(line, fileinfo.lno)
- if len(srcinfo.text) > maxtext:
- break
- if srcinfo.end - srcinfo.start > maxlines:
+ if srcinfo.too_much(maxtext, maxlines):
break
while srcinfo._used():
yield srcinfo
diff --git a/Tools/c-analyzer/c_parser/parser/_info.py b/Tools/c-analyzer/c_parser/parser/_info.py
index 2dcd5e5e760..cc21931b66c 100644
--- a/Tools/c-analyzer/c_parser/parser/_info.py
+++ b/Tools/c-analyzer/c_parser/parser/_info.py
@@ -1,3 +1,5 @@
+import re
+
from ..info import KIND, ParsedItem, FileInfo
@@ -121,6 +123,19 @@ class SourceInfo:
def done(self):
self._set_ready()
+ def too_much(self, maxtext, maxlines):
+ if maxtext and len(self.text) > maxtext:
+ pass
+ elif maxlines and self.end - self.start > maxlines:
+ pass
+ else:
+ return False
+
+ #if re.fullmatch(r'[^;]+\[\][ ]*=[ ]*[{]([ ]*\d+,)*([ ]*\d+,?)\s*',
+ # self._current.text):
+ # return False
+ return True
+
def _set_ready(self):
if self._current is None:
self._ready = False
diff --git a/Tools/c-analyzer/c_parser/parser/_regexes.py b/Tools/c-analyzer/c_parser/parser/_regexes.py
index e9bc31d335a..cb85a59aaa1 100644
--- a/Tools/c-analyzer/c_parser/parser/_regexes.py
+++ b/Tools/c-analyzer/c_parser/parser/_regexes.py
@@ -137,7 +137,8 @@ COMPOUND_TYPE_KIND = r'(?: \b (?: struct | union | enum ) \b )'
#######################################
# variable declarations
-STORAGE_CLASS = r'(?: \b (?: auto | register | static | extern ) \b )'
+_STORAGE = 'auto register static extern'.split()
+STORAGE_CLASS = rf'(?: \b (?: {" | ".join(_STORAGE)} ) \b )'
TYPE_QUALIFIER = r'(?: \b (?: const | volatile ) \b )'
PTR_QUALIFIER = rf'(?: [*] (?: \s* {TYPE_QUALIFIER} )? )'