diff options
Diffstat (limited to 'tests/bytecode/pylib-tests/modulefinder.py')
-rw-r--r-- | tests/bytecode/pylib-tests/modulefinder.py | 663 |
1 files changed, 0 insertions, 663 deletions
diff --git a/tests/bytecode/pylib-tests/modulefinder.py b/tests/bytecode/pylib-tests/modulefinder.py deleted file mode 100644 index f90a4327e6..0000000000 --- a/tests/bytecode/pylib-tests/modulefinder.py +++ /dev/null @@ -1,663 +0,0 @@ -"""Find modules used by a script, using introspection.""" - -import dis -import imp -import importlib.machinery -import marshal -import os -import sys -import types -import struct - -# XXX Clean up once str8's cstor matches bytes. -LOAD_CONST = bytes([dis.opname.index('LOAD_CONST')]) -IMPORT_NAME = bytes([dis.opname.index('IMPORT_NAME')]) -STORE_NAME = bytes([dis.opname.index('STORE_NAME')]) -STORE_GLOBAL = bytes([dis.opname.index('STORE_GLOBAL')]) -STORE_OPS = [STORE_NAME, STORE_GLOBAL] -HAVE_ARGUMENT = bytes([dis.HAVE_ARGUMENT]) - -# Modulefinder does a good job at simulating Python's, but it can not -# handle __path__ modifications packages make at runtime. Therefore there -# is a mechanism whereby you can register extra paths in this map for a -# package, and it will be honored. - -# Note this is a mapping is lists of paths. -packagePathMap = {} - -# A Public interface -def AddPackagePath(packagename, path): - packagePathMap.setdefault(packagename, []).append(path) - -replacePackageMap = {} - -# This ReplacePackage mechanism allows modulefinder to work around -# situations in which a package injects itself under the name -# of another package into sys.modules at runtime by calling -# ReplacePackage("real_package_name", "faked_package_name") -# before running ModuleFinder. - -def ReplacePackage(oldname, newname): - replacePackageMap[oldname] = newname - - -class Module: - - def __init__(self, name, file=None, path=None): - self.__name__ = name - self.__file__ = file - self.__path__ = path - self.__code__ = None - # The set of global names that are assigned to in the module. - # This includes those names imported through starimports of - # Python modules. - self.globalnames = {} - # The set of starimports this module did that could not be - # resolved, ie. a starimport from a non-Python module. - self.starimports = {} - - def __repr__(self): - s = "Module(%r" % (self.__name__,) - if self.__file__ is not None: - s = s + ", %r" % (self.__file__,) - if self.__path__ is not None: - s = s + ", %r" % (self.__path__,) - s = s + ")" - return s - -class ModuleFinder: - - def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]): - if path is None: - path = sys.path - self.path = path - self.modules = {} - self.badmodules = {} - self.debug = debug - self.indent = 0 - self.excludes = excludes - self.replace_paths = replace_paths - self.processed_paths = [] # Used in debugging only - - def msg(self, level, str, *args): - if level <= self.debug: - for i in range(self.indent): - print(" ", end=' ') - print(str, end=' ') - for arg in args: - print(repr(arg), end=' ') - print() - - def msgin(self, *args): - level = args[0] - if level <= self.debug: - self.indent = self.indent + 1 - self.msg(*args) - - def msgout(self, *args): - level = args[0] - if level <= self.debug: - self.indent = self.indent - 1 - self.msg(*args) - - def run_script(self, pathname): - self.msg(2, "run_script", pathname) - with open(pathname) as fp: - stuff = ("", "r", imp.PY_SOURCE) - self.load_module('__main__', fp, pathname, stuff) - - def load_file(self, pathname): - dir, name = os.path.split(pathname) - name, ext = os.path.splitext(name) - with open(pathname) as fp: - stuff = (ext, "r", imp.PY_SOURCE) - self.load_module(name, fp, pathname, stuff) - - def import_hook(self, name, caller=None, fromlist=None, level=-1): - self.msg(3, "import_hook", name, caller, fromlist, level) - parent = self.determine_parent(caller, level=level) - q, tail = self.find_head_package(parent, name) - m = self.load_tail(q, tail) - if not fromlist: - return q - if m.__path__: - self.ensure_fromlist(m, fromlist) - return None - - def determine_parent(self, caller, level=-1): - self.msgin(4, "determine_parent", caller, level) - if not caller or level == 0: - self.msgout(4, "determine_parent -> None") - return None - pname = caller.__name__ - if level >= 1: # relative import - if caller.__path__: - level -= 1 - if level == 0: - parent = self.modules[pname] - assert parent is caller - self.msgout(4, "determine_parent ->", parent) - return parent - if pname.count(".") < level: - raise ImportError("relative importpath too deep") - pname = ".".join(pname.split(".")[:-level]) - parent = self.modules[pname] - self.msgout(4, "determine_parent ->", parent) - return parent - if caller.__path__: - parent = self.modules[pname] - assert caller is parent - self.msgout(4, "determine_parent ->", parent) - return parent - if '.' in pname: - i = pname.rfind('.') - pname = pname[:i] - parent = self.modules[pname] - assert parent.__name__ == pname - self.msgout(4, "determine_parent ->", parent) - return parent - self.msgout(4, "determine_parent -> None") - return None - - def find_head_package(self, parent, name): - self.msgin(4, "find_head_package", parent, name) - if '.' in name: - i = name.find('.') - head = name[:i] - tail = name[i+1:] - else: - head = name - tail = "" - if parent: - qname = "%s.%s" % (parent.__name__, head) - else: - qname = head - q = self.import_module(head, qname, parent) - if q: - self.msgout(4, "find_head_package ->", (q, tail)) - return q, tail - if parent: - qname = head - parent = None - q = self.import_module(head, qname, parent) - if q: - self.msgout(4, "find_head_package ->", (q, tail)) - return q, tail - self.msgout(4, "raise ImportError: No module named", qname) - raise ImportError("No module named " + qname) - - def load_tail(self, q, tail): - self.msgin(4, "load_tail", q, tail) - m = q - while tail: - i = tail.find('.') - if i < 0: i = len(tail) - head, tail = tail[:i], tail[i+1:] - mname = "%s.%s" % (m.__name__, head) - m = self.import_module(head, mname, m) - if not m: - self.msgout(4, "raise ImportError: No module named", mname) - raise ImportError("No module named " + mname) - self.msgout(4, "load_tail ->", m) - return m - - def ensure_fromlist(self, m, fromlist, recursive=0): - self.msg(4, "ensure_fromlist", m, fromlist, recursive) - for sub in fromlist: - if sub == "*": - if not recursive: - all = self.find_all_submodules(m) - if all: - self.ensure_fromlist(m, all, 1) - elif not hasattr(m, sub): - subname = "%s.%s" % (m.__name__, sub) - submod = self.import_module(sub, subname, m) - if not submod: - raise ImportError("No module named " + subname) - - def find_all_submodules(self, m): - if not m.__path__: - return - modules = {} - # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"]. - # But we must also collect Python extension modules - although - # we cannot separate normal dlls from Python extensions. - suffixes = [] - suffixes += importlib.machinery.EXTENSION_SUFFIXES[:] - suffixes += importlib.machinery.SOURCE_SUFFIXES[:] - suffixes += importlib.machinery.BYTECODE_SUFFIXES[:] - for dir in m.__path__: - try: - names = os.listdir(dir) - except os.error: - self.msg(2, "can't list directory", dir) - continue - for name in names: - mod = None - for suff in suffixes: - n = len(suff) - if name[-n:] == suff: - mod = name[:-n] - break - if mod and mod != "__init__": - modules[mod] = mod - return modules.keys() - - def import_module(self, partname, fqname, parent): - self.msgin(3, "import_module", partname, fqname, parent) - try: - m = self.modules[fqname] - except KeyError: - pass - else: - self.msgout(3, "import_module ->", m) - return m - if fqname in self.badmodules: - self.msgout(3, "import_module -> None") - return None - if parent and parent.__path__ is None: - self.msgout(3, "import_module -> None") - return None - try: - fp, pathname, stuff = self.find_module(partname, - parent and parent.__path__, parent) - except ImportError: - self.msgout(3, "import_module ->", None) - return None - try: - m = self.load_module(fqname, fp, pathname, stuff) - finally: - if fp: - fp.close() - if parent: - setattr(parent, partname, m) - self.msgout(3, "import_module ->", m) - return m - - def load_module(self, fqname, fp, pathname, file_info): - suffix, mode, type = file_info - self.msgin(2, "load_module", fqname, fp and "fp", pathname) - if type == imp.PKG_DIRECTORY: - m = self.load_package(fqname, pathname) - self.msgout(2, "load_module ->", m) - return m - if type == imp.PY_SOURCE: - co = compile(fp.read()+'\n', pathname, 'exec') - elif type == imp.PY_COMPILED: - if fp.read(4) != imp.get_magic(): - self.msgout(2, "raise ImportError: Bad magic number", pathname) - raise ImportError("Bad magic number in %s" % pathname) - fp.read(4) - co = marshal.load(fp) - else: - co = None - m = self.add_module(fqname) - m.__file__ = pathname - if co: - if self.replace_paths: - co = self.replace_paths_in_code(co) - m.__code__ = co - self.scan_code(co, m) - self.msgout(2, "load_module ->", m) - return m - - def _add_badmodule(self, name, caller): - if name not in self.badmodules: - self.badmodules[name] = {} - if caller: - self.badmodules[name][caller.__name__] = 1 - else: - self.badmodules[name]["-"] = 1 - - def _safe_import_hook(self, name, caller, fromlist, level=-1): - # wrapper for self.import_hook() that won't raise ImportError - if name in self.badmodules: - self._add_badmodule(name, caller) - return - try: - self.import_hook(name, caller, level=level) - except ImportError as msg: - self.msg(2, "ImportError:", str(msg)) - self._add_badmodule(name, caller) - else: - if fromlist: - for sub in fromlist: - if sub in self.badmodules: - self._add_badmodule(sub, caller) - continue - try: - self.import_hook(name, caller, [sub], level=level) - except ImportError as msg: - self.msg(2, "ImportError:", str(msg)) - fullname = name + "." + sub - self._add_badmodule(fullname, caller) - - def scan_opcodes(self, co, - unpack = struct.unpack): - # Scan the code, and yield 'interesting' opcode combinations - # Version for Python 2.4 and older - code = co.co_code - names = co.co_names - consts = co.co_consts - while code: - c = code[0] - if c in STORE_OPS: - oparg, = unpack('<H', code[1:3]) - yield "store", (names[oparg],) - code = code[3:] - continue - if c == LOAD_CONST and code[3] == IMPORT_NAME: - oparg_1, oparg_2 = unpack('<xHxH', code[:6]) - yield "import", (consts[oparg_1], names[oparg_2]) - code = code[6:] - continue - if c >= HAVE_ARGUMENT: - code = code[3:] - else: - code = code[1:] - - def scan_opcodes_25(self, co, - unpack = struct.unpack): - # Scan the code, and yield 'interesting' opcode combinations - # Python 2.5 version (has absolute and relative imports) - code = co.co_code - names = co.co_names - consts = co.co_consts - LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME - while code: - c = bytes([code[0]]) - if c in STORE_OPS: - oparg, = unpack('<H', code[1:3]) - yield "store", (names[oparg],) - code = code[3:] - continue - if code[:9:3] == LOAD_LOAD_AND_IMPORT: - oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9]) - level = consts[oparg_1] - if level == 0: # absolute import - yield "absolute_import", (consts[oparg_2], names[oparg_3]) - else: # relative import - yield "relative_import", (level, consts[oparg_2], names[oparg_3]) - code = code[9:] - continue - if c >= HAVE_ARGUMENT: - code = code[3:] - else: - code = code[1:] - - def scan_code(self, co, m): - code = co.co_code - if sys.version_info >= (2, 5): - scanner = self.scan_opcodes_25 - else: - scanner = self.scan_opcodes - for what, args in scanner(co): - if what == "store": - name, = args - m.globalnames[name] = 1 - elif what == "absolute_import": - fromlist, name = args - have_star = 0 - if fromlist is not None: - if "*" in fromlist: - have_star = 1 - fromlist = [f for f in fromlist if f != "*"] - self._safe_import_hook(name, m, fromlist, level=0) - if have_star: - # We've encountered an "import *". If it is a Python module, - # the code has already been parsed and we can suck out the - # global names. - mm = None - if m.__path__: - # At this point we don't know whether 'name' is a - # submodule of 'm' or a global module. Let's just try - # the full name first. - mm = self.modules.get(m.__name__ + "." + name) - if mm is None: - mm = self.modules.get(name) - if mm is not None: - m.globalnames.update(mm.globalnames) - m.starimports.update(mm.starimports) - if mm.__code__ is None: - m.starimports[name] = 1 - else: - m.starimports[name] = 1 - elif what == "relative_import": - level, fromlist, name = args - if name: - self._safe_import_hook(name, m, fromlist, level=level) - else: - parent = self.determine_parent(m, level=level) - self._safe_import_hook(parent.__name__, None, fromlist, level=0) - else: - # We don't expect anything else from the generator. - raise RuntimeError(what) - - for c in co.co_consts: - if isinstance(c, type(co)): - self.scan_code(c, m) - - def load_package(self, fqname, pathname): - self.msgin(2, "load_package", fqname, pathname) - newname = replacePackageMap.get(fqname) - if newname: - fqname = newname - m = self.add_module(fqname) - m.__file__ = pathname - m.__path__ = [pathname] - - # As per comment at top of file, simulate runtime __path__ additions. - m.__path__ = m.__path__ + packagePathMap.get(fqname, []) - - fp, buf, stuff = self.find_module("__init__", m.__path__) - try: - self.load_module(fqname, fp, buf, stuff) - self.msgout(2, "load_package ->", m) - return m - finally: - if fp: - fp.close() - - def add_module(self, fqname): - if fqname in self.modules: - return self.modules[fqname] - self.modules[fqname] = m = Module(fqname) - return m - - def find_module(self, name, path, parent=None): - if parent is not None: - # assert path is not None - fullname = parent.__name__+'.'+name - else: - fullname = name - if fullname in self.excludes: - self.msgout(3, "find_module -> Excluded", fullname) - raise ImportError(name) - - if path is None: - if name in sys.builtin_module_names: - return (None, None, ("", "", imp.C_BUILTIN)) - - path = self.path - return imp.find_module(name, path) - - def report(self): - """Print a report to stdout, listing the found modules with their - paths, as well as modules that are missing, or seem to be missing. - """ - print() - print(" %-25s %s" % ("Name", "File")) - print(" %-25s %s" % ("----", "----")) - # Print modules found - keys = sorted(self.modules.keys()) - for key in keys: - m = self.modules[key] - if m.__path__: - print("P", end=' ') - else: - print("m", end=' ') - print("%-25s" % key, m.__file__ or "") - - # Print missing modules - missing, maybe = self.any_missing_maybe() - if missing: - print() - print("Missing modules:") - for name in missing: - mods = sorted(self.badmodules[name].keys()) - print("?", name, "imported from", ', '.join(mods)) - # Print modules that may be missing, but then again, maybe not... - if maybe: - print() - print("Submodules thay appear to be missing, but could also be", end=' ') - print("global names in the parent package:") - for name in maybe: - mods = sorted(self.badmodules[name].keys()) - print("?", name, "imported from", ', '.join(mods)) - - def any_missing(self): - """Return a list of modules that appear to be missing. Use - any_missing_maybe() if you want to know which modules are - certain to be missing, and which *may* be missing. - """ - missing, maybe = self.any_missing_maybe() - return missing + maybe - - def any_missing_maybe(self): - """Return two lists, one with modules that are certainly missing - and one with modules that *may* be missing. The latter names could - either be submodules *or* just global names in the package. - - The reason it can't always be determined is that it's impossible to - tell which names are imported when "from module import *" is done - with an extension module, short of actually importing it. - """ - missing = [] - maybe = [] - for name in self.badmodules: - if name in self.excludes: - continue - i = name.rfind(".") - if i < 0: - missing.append(name) - continue - subname = name[i+1:] - pkgname = name[:i] - pkg = self.modules.get(pkgname) - if pkg is not None: - if pkgname in self.badmodules[name]: - # The package tried to import this module itself and - # failed. It's definitely missing. - missing.append(name) - elif subname in pkg.globalnames: - # It's a global in the package: definitely not missing. - pass - elif pkg.starimports: - # It could be missing, but the package did an "import *" - # from a non-Python module, so we simply can't be sure. - maybe.append(name) - else: - # It's not a global in the package, the package didn't - # do funny star imports, it's very likely to be missing. - # The symbol could be inserted into the package from the - # outside, but since that's not good style we simply list - # it missing. - missing.append(name) - else: - missing.append(name) - missing.sort() - maybe.sort() - return missing, maybe - - def replace_paths_in_code(self, co): - new_filename = original_filename = os.path.normpath(co.co_filename) - for f, r in self.replace_paths: - if original_filename.startswith(f): - new_filename = r + original_filename[len(f):] - break - - if self.debug and original_filename not in self.processed_paths: - if new_filename != original_filename: - self.msgout(2, "co_filename %r changed to %r" \ - % (original_filename,new_filename,)) - else: - self.msgout(2, "co_filename %r remains unchanged" \ - % (original_filename,)) - self.processed_paths.append(original_filename) - - consts = list(co.co_consts) - for i in range(len(consts)): - if isinstance(consts[i], type(co)): - consts[i] = self.replace_paths_in_code(consts[i]) - - return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize, - co.co_flags, co.co_code, tuple(consts), co.co_names, - co.co_varnames, new_filename, co.co_name, - co.co_firstlineno, co.co_lnotab, - co.co_freevars, co.co_cellvars) - - -def test(): - # Parse command line - import getopt - try: - opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") - except getopt.error as msg: - print(msg) - return - - # Process options - debug = 1 - domods = 0 - addpath = [] - exclude = [] - for o, a in opts: - if o == '-d': - debug = debug + 1 - if o == '-m': - domods = 1 - if o == '-p': - addpath = addpath + a.split(os.pathsep) - if o == '-q': - debug = 0 - if o == '-x': - exclude.append(a) - - # Provide default arguments - if not args: - script = "hello.py" - else: - script = args[0] - - # Set the path based on sys.path and the script directory - path = sys.path[:] - path[0] = os.path.dirname(script) - path = addpath + path - if debug > 1: - print("path:") - for item in path: - print(" ", repr(item)) - - # Create the module finder and turn its crank - mf = ModuleFinder(path, debug, exclude) - for arg in args[1:]: - if arg == '-m': - domods = 1 - continue - if domods: - if arg[-2:] == '.*': - mf.import_hook(arg[:-2], None, ["*"]) - else: - mf.import_hook(arg) - else: - mf.load_file(arg) - mf.run_script(script) - mf.report() - return mf # for -i debugging - - -if __name__ == '__main__': - try: - mf = test() - except KeyboardInterrupt: - print("\n[interrupted]") |