diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2020-10-30 15:46:52 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-30 15:46:52 -0600 |
commit | 4fe72090deb7fb7bc09bfa56c92f6b3b0967d395 (patch) | |
tree | 264fb8dec036a697595ff539ce9d3e4433224d47 /Tools/c-analyzer/c_parser | |
parent | b9ee4af4c643a323779fd7076e80b29d611f2709 (diff) | |
download | cpython-4fe72090deb7fb7bc09bfa56c92f6b3b0967d395.tar.gz cpython-4fe72090deb7fb7bc09bfa56c92f6b3b0967d395.zip |
bpo-36876: Small adjustments to the C-analyzer tool. (GH-23045)
This is a little bit of clean-up, small fixes, and additional helpers prior to building an updated & accurate list of globals to eliminate.
Diffstat (limited to 'Tools/c-analyzer/c_parser')
-rw-r--r-- | Tools/c-analyzer/c_parser/datafiles.py | 2 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/info.py | 166 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/match.py | 177 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/parser/__init__.py | 6 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/parser/_info.py | 15 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/parser/_regexes.py | 3 |
6 files changed, 236 insertions, 133 deletions
diff --git a/Tools/c-analyzer/c_parser/datafiles.py b/Tools/c-analyzer/c_parser/datafiles.py index 5bdb946b177..cdd69b1f9b2 100644 --- a/Tools/c-analyzer/c_parser/datafiles.py +++ b/Tools/c-analyzer/c_parser/datafiles.py @@ -92,7 +92,7 @@ def write_decls_tsv(decls, outfile, extracolumns=None, *, **kwargs ): # XXX Move the row rendering here. - _write_decls_tsv(rows, outfile, extracolumns, relroot, kwargs) + _write_decls_tsv(decls, outfile, extracolumns, relroot, kwargs) def _iter_decls_tsv(infile, extracolumns=None, relroot=None): diff --git a/Tools/c-analyzer/c_parser/info.py b/Tools/c-analyzer/c_parser/info.py index a07ce2e0ccb..798a45d2e08 100644 --- a/Tools/c-analyzer/c_parser/info.py +++ b/Tools/c-analyzer/c_parser/info.py @@ -7,85 +7,12 @@ from c_common.clsutil import classonly import c_common.misc as _misc import c_common.strutil as _strutil import c_common.tables as _tables -from .parser._regexes import SIMPLE_TYPE +from .parser._regexes import SIMPLE_TYPE, _STORAGE FIXED_TYPE = _misc.Labeled('FIXED_TYPE') -POTS_REGEX = re.compile(rf'^{SIMPLE_TYPE}$', re.VERBOSE) - - -def is_pots(typespec): - if not typespec: - return None - if type(typespec) is not str: - _, _, _, typespec, _ = get_parsed_vartype(typespec) - return POTS_REGEX.match(typespec) is not None - - -def is_funcptr(vartype): - if not vartype: - return None - _, _, _, _, abstract = get_parsed_vartype(vartype) - return _is_funcptr(abstract) - - -def _is_funcptr(declstr): - if not declstr: - return None - # XXX Support "(<name>*)(". - return '(*)(' in declstr.replace(' ', '') - - -def is_exported_symbol(decl): - _, storage, _, _, _ = get_parsed_vartype(decl) - raise NotImplementedError - - -def is_process_global(vardecl): - kind, storage, _, _, _ = get_parsed_vartype(vardecl) - if kind is not KIND.VARIABLE: - raise NotImplementedError(vardecl) - if 'static' in (storage or ''): - return True - - if hasattr(vardecl, 'parent'): - parent = vardecl.parent - else: - parent = vardecl.get('parent') - return not parent - - -def is_fixed_type(vardecl): - if not vardecl: - return None - _, _, _, typespec, abstract = get_parsed_vartype(vardecl) - if 'typeof' in typespec: - raise NotImplementedError(vardecl) - elif not abstract: - return True - - if '*' not in abstract: - # XXX What about []? - return True - elif _is_funcptr(abstract): - return True - else: - for after in abstract.split('*')[1:]: - if not after.lstrip().startswith('const'): - return False - else: - return True - - -def is_immutable(vardecl): - if not vardecl: - return None - if not is_fixed_type(vardecl): - return False - _, _, typequal, _, _ = get_parsed_vartype(vardecl) - # If there, it can only be "const" or "volatile". - return typequal == 'const' +STORAGE = frozenset(_STORAGE) ############################# @@ -214,58 +141,8 @@ KIND._GROUPS = { KIND._GROUPS.update((k.value, {k}) for k in KIND) -# The module-level kind-related helpers (below) deal with <item>.kind: - -def is_type_decl(kind): - # Handle ParsedItem, Declaration, etc.. - kind = getattr(kind, 'kind', kind) - return KIND.is_type_decl(kind) - - -def is_decl(kind): - # Handle ParsedItem, Declaration, etc.. - kind = getattr(kind, 'kind', kind) - return KIND.is_decl(kind) - - -def filter_by_kind(items, kind): - if kind == 'type': - kinds = KIND._TYPE_DECLS - elif kind == 'decl': - kinds = KIND._TYPE_DECLS - try: - okay = kind in KIND - except TypeError: - kinds = set(kind) - else: - kinds = {kind} if okay else set(kind) - for item in items: - if item.kind in kinds: - yield item - - -def collate_by_kind(items): - collated = {kind: [] for kind in KIND} - for item in items: - try: - collated[item.kind].append(item) - except KeyError: - raise ValueError(f'unsupported kind in {item!r}') - return collated - - -def get_kind_group(kind): - # Handle ParsedItem, Declaration, etc.. - kind = getattr(kind, 'kind', kind) - return KIND.get_group(kind) - - -def collate_by_kind_group(items): - collated = {KIND.get_group(k): [] for k in KIND} - for item in items: - group = KIND.get_group(item.kind) - collated[group].append(item) - return collated +def get_kind_group(item): + return KIND.get_group(item.kind) ############################# @@ -484,6 +361,27 @@ def get_parsed_vartype(decl): return kind, storage, typequal, typespec, abstract +def get_default_storage(decl): + if decl.kind not in (KIND.VARIABLE, KIND.FUNCTION): + return None + return 'extern' if decl.parent is None else 'auto' + + +def get_effective_storage(decl, *, default=None): + # Note that "static" limits access to just that C module + # and "extern" (the default for module-level) allows access + # outside the C module. + if default is None: + default = get_default_storage(decl) + if default is None: + return None + try: + storage = decl.storage + except AttributeError: + storage, _ = _get_vartype(decl.data) + return storage or default + + ############################# # high-level @@ -997,7 +895,7 @@ class Variable(Declaration): def __init__(self, file, name, data, parent=None, storage=None): super().__init__(file, name, data, parent, - _extra={'storage': storage}, + _extra={'storage': storage or None}, _shortkey=f'({parent.name}).{name}' if parent else name, _key=(str(file), # Tilde comes after all other ascii characters. @@ -1005,6 +903,11 @@ class Variable(Declaration): name, ), ) + if storage: + if storage not in STORAGE: + # The parser must need an update. + raise NotImplementedError(storage) + # Otherwise we trust the compiler to have validated it. @property def vartype(self): @@ -1413,6 +1316,13 @@ def resolve_parsed(parsed): return cls.from_parsed(parsed) +def set_flag(item, name, value): + try: + setattr(item, name, value) + except AttributeError: + object.__setattr__(item, name, value) + + ############################# # composite diff --git a/Tools/c-analyzer/c_parser/match.py b/Tools/c-analyzer/c_parser/match.py new file mode 100644 index 00000000000..3b5068fd11b --- /dev/null +++ b/Tools/c-analyzer/c_parser/match.py @@ -0,0 +1,177 @@ +import re + +from . import info as _info +from .parser._regexes import SIMPLE_TYPE + + +_KIND = _info.KIND + + +def match_storage(decl, expected): + default = _info.get_default_storage(decl) + #assert default + if expected is None: + expected = {default} + elif isinstance(expected, str): + expected = {expected or default} + elif not expected: + expected = _info.STORAGE + else: + expected = {v or default for v in expected} + storage = _info.get_effective_storage(decl, default=default) + return storage in expected + + +################################## +# decl matchers + +def is_type_decl(item): + return _KIND.is_type_decl(item.kind) + + +def is_decl(item): + return _KIND.is_decl(item.kind) + + +def is_pots(typespec, *, + _regex=re.compile(rf'^{SIMPLE_TYPE}$', re.VERBOSE), + ): + + if not typespec: + return None + if type(typespec) is not str: + _, _, _, typespec, _ = _info.get_parsed_vartype(typespec) + return _regex.match(typespec) is not None + + +def is_funcptr(vartype): + if not vartype: + return None + _, _, _, _, abstract = _info.get_parsed_vartype(vartype) + return _is_funcptr(abstract) + + +def _is_funcptr(declstr): + if not declstr: + return None + # XXX Support "(<name>*)(". + return '(*)(' in declstr.replace(' ', '') + + +def is_forward_decl(decl): + if decl.kind is _KIND.TYPEDEF: + return False + elif is_type_decl(decl): + return not decl.data + elif decl.kind is _KIND.FUNCTION: + # XXX This doesn't work with ParsedItem. + return decl.signature.isforward + elif decl.kind is _KIND.VARIABLE: + # No var decls are considered forward (or all are...). + return False + else: + raise NotImplementedError(decl) + + +def can_have_symbol(decl): + return decl.kind in (_KIND.VARIABLE, _KIND.FUNCTION) + + +def has_external_symbol(decl): + if not can_have_symbol(decl): + return False + if _info.get_effective_storage(decl) != 'extern': + return False + if decl.kind is _KIND.FUNCTION: + return not decl.signature.isforward + else: + # It must be a variable, which can only be implicitly extern here. + return decl.storage != 'extern' + + +def has_internal_symbol(decl): + if not can_have_symbol(decl): + return False + return _info.get_actual_storage(decl) == 'static' + + +def is_external_reference(decl): + if not can_have_symbol(decl): + return False + # We have to check the declared storage rather tnan the effective. + if decl.storage != 'extern': + return False + if decl.kind is _KIND.FUNCTION: + return decl.signature.isforward + # Otherwise it's a variable. + return True + + +def is_local_var(decl): + if not decl.kind is _KIND.VARIABLE: + return False + return True if decl.parent else False + + +def is_global_var(decl): + if not decl.kind is _KIND.VARIABLE: + return False + return False if decl.parent else True + + +################################## +# filtering with matchers + +def filter_by_kind(items, kind): + if kind == 'type': + kinds = _KIND._TYPE_DECLS + elif kind == 'decl': + kinds = _KIND._TYPE_DECLS + try: + okay = kind in _KIND + except TypeError: + kinds = set(kind) + else: + kinds = {kind} if okay else set(kind) + for item in items: + if item.kind in kinds: + yield item + + +################################## +# grouping with matchers + +def group_by_category(decls, categories, *, ignore_non_match=True): + collated = {} + for decl in decls: + # Matchers should be mutually exclusive. (First match wins.) + for category, match in categories.items(): + if match(decl): + if category not in collated: + collated[category] = [decl] + else: + collated[category].append(decl) + break + else: + if not ignore_non_match: + raise Exception(f'no match for {decl!r}') + return collated + + +def group_by_kind(items): + collated = {kind: [] for kind in _KIND} + for item in items: + try: + collated[item.kind].append(item) + except KeyError: + raise ValueError(f'unsupported kind in {item!r}') + return collated + + +def group_by_kinds(items): + # Collate into kind groups (decl, type, etc.). + collated = {_KIND.get_group(k): [] for k in _KIND} + for item in items: + group = _KIND.get_group(item.kind) + collated[group].append(item) + return collated diff --git a/Tools/c-analyzer/c_parser/parser/__init__.py b/Tools/c-analyzer/c_parser/parser/__init__.py index 7cb34caf09e..4b201c63540 100644 --- a/Tools/c-analyzer/c_parser/parser/__init__.py +++ b/Tools/c-analyzer/c_parser/parser/__init__.py @@ -163,6 +163,8 @@ def _parse(srclines, anon_name): def _iter_source(lines, *, maxtext=20_000, maxlines=700, showtext=False): + maxtext = maxtext if maxtext and maxtext > 0 else None + maxlines = maxlines if maxlines and maxlines > 0 else None filestack = [] allinfo = {} # "lines" should be (fileinfo, data), as produced by the preprocessor code. @@ -181,9 +183,7 @@ def _iter_source(lines, *, maxtext=20_000, maxlines=700, showtext=False): _logger.debug(f'-> {line}') srcinfo._add_line(line, fileinfo.lno) - if len(srcinfo.text) > maxtext: - break - if srcinfo.end - srcinfo.start > maxlines: + if srcinfo.too_much(maxtext, maxlines): break while srcinfo._used(): yield srcinfo diff --git a/Tools/c-analyzer/c_parser/parser/_info.py b/Tools/c-analyzer/c_parser/parser/_info.py index 2dcd5e5e760..cc21931b66c 100644 --- a/Tools/c-analyzer/c_parser/parser/_info.py +++ b/Tools/c-analyzer/c_parser/parser/_info.py @@ -1,3 +1,5 @@ +import re + from ..info import KIND, ParsedItem, FileInfo @@ -121,6 +123,19 @@ class SourceInfo: def done(self): self._set_ready() + def too_much(self, maxtext, maxlines): + if maxtext and len(self.text) > maxtext: + pass + elif maxlines and self.end - self.start > maxlines: + pass + else: + return False + + #if re.fullmatch(r'[^;]+\[\][ ]*=[ ]*[{]([ ]*\d+,)*([ ]*\d+,?)\s*', + # self._current.text): + # return False + return True + def _set_ready(self): if self._current is None: self._ready = False diff --git a/Tools/c-analyzer/c_parser/parser/_regexes.py b/Tools/c-analyzer/c_parser/parser/_regexes.py index e9bc31d335a..cb85a59aaa1 100644 --- a/Tools/c-analyzer/c_parser/parser/_regexes.py +++ b/Tools/c-analyzer/c_parser/parser/_regexes.py @@ -137,7 +137,8 @@ COMPOUND_TYPE_KIND = r'(?: \b (?: struct | union | enum ) \b )' ####################################### # variable declarations -STORAGE_CLASS = r'(?: \b (?: auto | register | static | extern ) \b )' +_STORAGE = 'auto register static extern'.split() +STORAGE_CLASS = rf'(?: \b (?: {" | ".join(_STORAGE)} ) \b )' TYPE_QUALIFIER = r'(?: \b (?: const | volatile ) \b )' PTR_QUALIFIER = rf'(?: [*] (?: \s* {TYPE_QUALIFIER} )? )' |