diff options
author | Damien <damien.p.george@gmail.com> | 2013-12-29 18:21:02 +0000 |
---|---|---|
committer | Damien <damien.p.george@gmail.com> | 2013-12-29 18:21:02 +0000 |
commit | 2eda70874ef193c609207db2ba87324244afcf84 (patch) | |
tree | 5a570df6296c40f7774d6d25cd3cc4c38fc1e1ff /tests/bytecode/pylib-tests | |
parent | de690d128bbec4f53e8a7d6149738a6facae4382 (diff) | |
download | micropython-2eda70874ef193c609207db2ba87324244afcf84.tar.gz micropython-2eda70874ef193c609207db2ba87324244afcf84.zip |
Add tests to test compiler and emitted byte code.
Diffstat (limited to 'tests/bytecode/pylib-tests')
54 files changed, 14659 insertions, 0 deletions
diff --git a/tests/bytecode/pylib-tests/_compat_pickle.py b/tests/bytecode/pylib-tests/_compat_pickle.py new file mode 100644 index 0000000000..700c80cd57 --- /dev/null +++ b/tests/bytecode/pylib-tests/_compat_pickle.py @@ -0,0 +1,81 @@ +# This module is used to map the old Python 2 names to the new names used in +# Python 3 for the pickle module. This needed to make pickle streams +# generated with Python 2 loadable by Python 3. + +# This is a copy of lib2to3.fixes.fix_imports.MAPPING. We cannot import +# lib2to3 and use the mapping defined there, because lib2to3 uses pickle. +# Thus, this could cause the module to be imported recursively. +IMPORT_MAPPING = { + 'StringIO': 'io', + 'cStringIO': 'io', + 'cPickle': 'pickle', + '__builtin__' : 'builtins', + 'copy_reg': 'copyreg', + 'Queue': 'queue', + 'SocketServer': 'socketserver', + 'ConfigParser': 'configparser', + 'repr': 'reprlib', + 'FileDialog': 'tkinter.filedialog', + 'tkFileDialog': 'tkinter.filedialog', + 'SimpleDialog': 'tkinter.simpledialog', + 'tkSimpleDialog': 'tkinter.simpledialog', + 'tkColorChooser': 'tkinter.colorchooser', + 'tkCommonDialog': 'tkinter.commondialog', + 'Dialog': 'tkinter.dialog', + 'Tkdnd': 'tkinter.dnd', + 'tkFont': 'tkinter.font', + 'tkMessageBox': 'tkinter.messagebox', + 'ScrolledText': 'tkinter.scrolledtext', + 'Tkconstants': 'tkinter.constants', + 'Tix': 'tkinter.tix', + 'ttk': 'tkinter.ttk', + 'Tkinter': 'tkinter', + 'markupbase': '_markupbase', + '_winreg': 'winreg', + 'thread': '_thread', + 'dummy_thread': '_dummy_thread', + 'dbhash': 'dbm.bsd', + 'dumbdbm': 'dbm.dumb', + 'dbm': 'dbm.ndbm', + 'gdbm': 'dbm.gnu', + 'xmlrpclib': 'xmlrpc.client', + 'DocXMLRPCServer': 'xmlrpc.server', + 'SimpleXMLRPCServer': 'xmlrpc.server', + 'httplib': 'http.client', + 'htmlentitydefs' : 'html.entities', + 'HTMLParser' : 'html.parser', + 'Cookie': 'http.cookies', + 'cookielib': 'http.cookiejar', + 'BaseHTTPServer': 'http.server', + 'SimpleHTTPServer': 'http.server', + 'CGIHTTPServer': 'http.server', + 'test.test_support': 'test.support', + 'commands': 'subprocess', + 'UserString' : 'collections', + 'UserList' : 'collections', + 'urlparse' : 'urllib.parse', + 'robotparser' : 'urllib.robotparser', + 'whichdb': 'dbm', + 'anydbm': 'dbm' +} + + +# This contains rename rules that are easy to handle. We ignore the more +# complex stuff (e.g. mapping the names in the urllib and types modules). +# These rules should be run before import names are fixed. +NAME_MAPPING = { + ('__builtin__', 'xrange'): ('builtins', 'range'), + ('__builtin__', 'reduce'): ('functools', 'reduce'), + ('__builtin__', 'intern'): ('sys', 'intern'), + ('__builtin__', 'unichr'): ('builtins', 'chr'), + ('__builtin__', 'basestring'): ('builtins', 'str'), + ('__builtin__', 'long'): ('builtins', 'int'), + ('itertools', 'izip'): ('builtins', 'zip'), + ('itertools', 'imap'): ('builtins', 'map'), + ('itertools', 'ifilter'): ('builtins', 'filter'), + ('itertools', 'ifilterfalse'): ('itertools', 'filterfalse'), +} + +# Same, but for 3.x to 2.x +REVERSE_IMPORT_MAPPING = dict((v, k) for (k, v) in IMPORT_MAPPING.items()) +REVERSE_NAME_MAPPING = dict((v, k) for (k, v) in NAME_MAPPING.items()) diff --git a/tests/bytecode/pylib-tests/_threading_local.py b/tests/bytecode/pylib-tests/_threading_local.py new file mode 100644 index 0000000000..4ec4828144 --- /dev/null +++ b/tests/bytecode/pylib-tests/_threading_local.py @@ -0,0 +1,246 @@ +"""Thread-local objects. + +(Note that this module provides a Python version of the threading.local + class. Depending on the version of Python you're using, there may be a + faster one available. You should always import the `local` class from + `threading`.) + +Thread-local objects support the management of thread-local data. +If you have data that you want to be local to a thread, simply create +a thread-local object and use its attributes: + + >>> mydata = local() + >>> mydata.number = 42 + >>> mydata.number + 42 + +You can also access the local-object's dictionary: + + >>> mydata.__dict__ + {'number': 42} + >>> mydata.__dict__.setdefault('widgets', []) + [] + >>> mydata.widgets + [] + +What's important about thread-local objects is that their data are +local to a thread. If we access the data in a different thread: + + >>> log = [] + >>> def f(): + ... items = sorted(mydata.__dict__.items()) + ... log.append(items) + ... mydata.number = 11 + ... log.append(mydata.number) + + >>> import threading + >>> thread = threading.Thread(target=f) + >>> thread.start() + >>> thread.join() + >>> log + [[], 11] + +we get different data. Furthermore, changes made in the other thread +don't affect data seen in this thread: + + >>> mydata.number + 42 + +Of course, values you get from a local object, including a __dict__ +attribute, are for whatever thread was current at the time the +attribute was read. For that reason, you generally don't want to save +these values across threads, as they apply only to the thread they +came from. + +You can create custom local objects by subclassing the local class: + + >>> class MyLocal(local): + ... number = 2 + ... initialized = False + ... def __init__(self, **kw): + ... if self.initialized: + ... raise SystemError('__init__ called too many times') + ... self.initialized = True + ... self.__dict__.update(kw) + ... def squared(self): + ... return self.number ** 2 + +This can be useful to support default values, methods and +initialization. Note that if you define an __init__ method, it will be +called each time the local object is used in a separate thread. This +is necessary to initialize each thread's dictionary. + +Now if we create a local object: + + >>> mydata = MyLocal(color='red') + +Now we have a default number: + + >>> mydata.number + 2 + +an initial color: + + >>> mydata.color + 'red' + >>> del mydata.color + +And a method that operates on the data: + + >>> mydata.squared() + 4 + +As before, we can access the data in a separate thread: + + >>> log = [] + >>> thread = threading.Thread(target=f) + >>> thread.start() + >>> thread.join() + >>> log + [[('color', 'red'), ('initialized', True)], 11] + +without affecting this thread's data: + + >>> mydata.number + 2 + >>> mydata.color + Traceback (most recent call last): + ... + AttributeError: 'MyLocal' object has no attribute 'color' + +Note that subclasses can define slots, but they are not thread +local. They are shared across threads: + + >>> class MyLocal(local): + ... __slots__ = 'number' + + >>> mydata = MyLocal() + >>> mydata.number = 42 + >>> mydata.color = 'red' + +So, the separate thread: + + >>> thread = threading.Thread(target=f) + >>> thread.start() + >>> thread.join() + +affects what we see: + + >>> mydata.number + 11 + +>>> del mydata +""" + +from weakref import ref +from contextlib import contextmanager + +__all__ = ["local"] + +# We need to use objects from the threading module, but the threading +# module may also want to use our `local` class, if support for locals +# isn't compiled in to the `thread` module. This creates potential problems +# with circular imports. For that reason, we don't import `threading` +# until the bottom of this file (a hack sufficient to worm around the +# potential problems). Note that all platforms on CPython do have support +# for locals in the `thread` module, and there is no circular import problem +# then, so problems introduced by fiddling the order of imports here won't +# manifest. + +class _localimpl: + """A class managing thread-local dicts""" + __slots__ = 'key', 'dicts', 'localargs', 'locallock', '__weakref__' + + def __init__(self): + # The key used in the Thread objects' attribute dicts. + # We keep it a string for speed but make it unlikely to clash with + # a "real" attribute. + self.key = '_threading_local._localimpl.' + str(id(self)) + # { id(Thread) -> (ref(Thread), thread-local dict) } + self.dicts = {} + + def get_dict(self): + """Return the dict for the current thread. Raises KeyError if none + defined.""" + thread = current_thread() + return self.dicts[id(thread)][1] + + def create_dict(self): + """Create a new dict for the current thread, and return it.""" + localdict = {} + key = self.key + thread = current_thread() + idt = id(thread) + def local_deleted(_, key=key): + # When the localimpl is deleted, remove the thread attribute. + thread = wrthread() + if thread is not None: + del thread.__dict__[key] + def thread_deleted(_, idt=idt): + # When the thread is deleted, remove the local dict. + # Note that this is suboptimal if the thread object gets + # caught in a reference loop. We would like to be called + # as soon as the OS-level thread ends instead. + local = wrlocal() + if local is not None: + dct = local.dicts.pop(idt) + wrlocal = ref(self, local_deleted) + wrthread = ref(thread, thread_deleted) + thread.__dict__[key] = wrlocal + self.dicts[idt] = wrthread, localdict + return localdict + + +@contextmanager +def _patch(self): + impl = object.__getattribute__(self, '_local__impl') + try: + dct = impl.get_dict() + except KeyError: + dct = impl.create_dict() + args, kw = impl.localargs + self.__init__(*args, **kw) + with impl.locallock: + object.__setattr__(self, '__dict__', dct) + yield + + +class local: + __slots__ = '_local__impl', '__dict__' + + def __new__(cls, *args, **kw): + if (args or kw) and (cls.__init__ is object.__init__): + raise TypeError("Initialization arguments are not supported") + self = object.__new__(cls) + impl = _localimpl() + impl.localargs = (args, kw) + impl.locallock = RLock() + object.__setattr__(self, '_local__impl', impl) + # We need to create the thread dict in anticipation of + # __init__ being called, to make sure we don't call it + # again ourselves. + impl.create_dict() + return self + + def __getattribute__(self, name): + with _patch(self): + return object.__getattribute__(self, name) + + def __setattr__(self, name, value): + if name == '__dict__': + raise AttributeError( + "%r object attribute '__dict__' is read-only" + % self.__class__.__name__) + with _patch(self): + return object.__setattr__(self, name, value) + + def __delattr__(self, name): + if name == '__dict__': + raise AttributeError( + "%r object attribute '__dict__' is read-only" + % self.__class__.__name__) + with _patch(self): + return object.__delattr__(self, name) + + +from threading import current_thread, RLock diff --git a/tests/bytecode/pylib-tests/_weakrefset.py b/tests/bytecode/pylib-tests/_weakrefset.py new file mode 100644 index 0000000000..6a98b88e33 --- /dev/null +++ b/tests/bytecode/pylib-tests/_weakrefset.py @@ -0,0 +1,194 @@ +# Access WeakSet through the weakref module. +# This code is separated-out because it is needed +# by abc.py to load everything else at startup. + +from _weakref import ref + +__all__ = ['WeakSet'] + + +class _IterationGuard: + # This context manager registers itself in the current iterators of the + # weak container, such as to delay all removals until the context manager + # exits. + # This technique should be relatively thread-safe (since sets are). + + def __init__(self, weakcontainer): + # Don't create cycles + self.weakcontainer = ref(weakcontainer) + + def __enter__(self): + w = self.weakcontainer() + if w is not None: + w._iterating.add(self) + return self + + def __exit__(self, e, t, b): + w = self.weakcontainer() + if w is not None: + s = w._iterating + s.remove(self) + if not s: + w._commit_removals() + + +class WeakSet: + def __init__(self, data=None): + self.data = set() + def _remove(item, selfref=ref(self)): + self = selfref() + if self is not None: + if self._iterating: + self._pending_removals.append(item) + else: + self.data.discard(item) + self._remove = _remove + # A list of keys to be removed + self._pending_removals = [] + self._iterating = set() + if data is not None: + self.update(data) + + def _commit_removals(self): + l = self._pending_removals + discard = self.data.discard + while l: + discard(l.pop()) + + def __iter__(self): + with _IterationGuard(self): + for itemref in self.data: + item = itemref() + if item is not None: + yield item + + def __len__(self): + return len(self.data) - len(self._pending_removals) + + def __contains__(self, item): + try: + wr = ref(item) + except TypeError: + return False + return wr in self.data + + def __reduce__(self): + return (self.__class__, (list(self),), + getattr(self, '__dict__', None)) + + def add(self, item): + if self._pending_removals: + self._commit_removals() + self.data.add(ref(item, self._remove)) + + def clear(self): + if self._pending_removals: + self._commit_removals() + self.data.clear() + + def copy(self): + return self.__class__(self) + + def pop(self): + if self._pending_removals: + self._commit_removals() + while True: + try: + itemref = self.data.pop() + except KeyError: + raise KeyError('pop from empty WeakSet') + item = itemref() + if item is not None: + return item + + def remove(self, item): + if self._pending_removals: + self._commit_removals() + self.data.remove(ref(item)) + + def discard(self, item): + if self._pending_removals: + self._commit_removals() + self.data.discard(ref(item)) + + def update(self, other): + if self._pending_removals: + self._commit_removals() + for element in other: + self.add(element) + + def __ior__(self, other): + self.update(other) + return self + + def difference(self, other): + newset = self.copy() + newset.difference_update(other) + return newset + __sub__ = difference + + def difference_update(self, other): + self.__isub__(other) + def __isub__(self, other): + if self._pending_removals: + self._commit_removals() + if self is other: + self.data.clear() + else: + self.data.difference_update(ref(item) for item in other) + return self + + def intersection(self, other): + return self.__class__(item for item in other if item in self) + __and__ = intersection + + def intersection_update(self, other): + self.__iand__(other) + def __iand__(self, other): + if self._pending_removals: + self._commit_removals() + self.data.intersection_update(ref(item) for item in other) + return self + + def issubset(self, other): + return self.data.issubset(ref(item) for item in other) + __le__ = issubset + + def __lt__(self, other): + return self.data < set(ref(item) for item in other) + + def issuperset(self, other): + return self.data.issuperset(ref(item) for item in other) + __ge__ = issuperset + + def __gt__(self, other): + return self.data > set(ref(item) for item in other) + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return self.data == set(ref(item) for item in other) + + def symmetric_difference(self, other): + newset = self.copy() + newset.symmetric_difference_update(other) + return newset + __xor__ = symmetric_difference + + def symmetric_difference_update(self, other): + self.__ixor__(other) + def __ixor__(self, other): + if self._pending_removals: + self._commit_removals() + if self is other: + self.data.clear() + else: + self.data.symmetric_difference_update(ref(item, self._remove) for item in other) + return self + + def union(self, other): + return self.__class__(e for s in (self, other) for e in s) + __or__ = union + + def isdisjoint(self, other): + return len(self.intersection(other)) == 0 diff --git a/tests/bytecode/pylib-tests/abc.py b/tests/bytecode/pylib-tests/abc.py new file mode 100644 index 0000000000..09778e8609 --- /dev/null +++ b/tests/bytecode/pylib-tests/abc.py @@ -0,0 +1,228 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Abstract Base Classes (ABCs) according to PEP 3119.""" + +from _weakrefset import WeakSet + +def abstractmethod(funcobj): + """A decorator indicating abstract methods. + + Requires that the metaclass is ABCMeta or derived from it. A + class that has a metaclass derived from ABCMeta cannot be + instantiated unless all of its abstract methods are overridden. + The abstract methods can be called using any of the normal + 'super' call mechanisms. + + Usage: + + class C(metaclass=ABCMeta): + @abstractmethod + def my_abstract_method(self, ...): + ... + """ + funcobj.__isabstractmethod__ = True + return funcobj + + +class abstractclassmethod(classmethod): + """ + A decorator indicating abstract classmethods. + + Similar to abstractmethod. + + Usage: + + class C(metaclass=ABCMeta): + @abstractclassmethod + def my_abstract_classmethod(cls, ...): + ... + + 'abstractclassmethod' is deprecated. Use 'classmethod' with + 'abstractmethod' instead. + """ + + __isabstractmethod__ = True + + def __init__(self, callable): + callable.__isabstractmethod__ = True + super().__init__(callable) + + +class abstractstaticmethod(staticmethod): + """ + A decorator indicating abstract staticmethods. + + Similar to abstractmethod. + + Usage: + + class C(metaclass=ABCMeta): + @abstractstaticmethod + def my_abstract_staticmethod(...): + ... + + 'abstractstaticmethod' is deprecated. Use 'staticmethod' with + 'abstractmethod' instead. + """ + + __isabstractmethod__ = True + + def __init__(self, callable): + callable.__isabstractmethod__ = True + super().__init__(callable) + + +class abstractproperty(property): + """ + A decorator indicating abstract properties. + + Requires that the metaclass is ABCMeta or derived from it. A + class that has a metaclass derived from ABCMeta cannot be + instantiated unless all of its abstract properties are overridden. + The abstract properties can be called using any of the normal + 'super' call mechanisms. + + Usage: + + class C(metaclass=ABCMeta): + @abstractproperty + def my_abstract_property(self): + ... + + This defines a read-only property; you can also define a read-write + abstract property using the 'long' form of property declaration: + + class C(metaclass=ABCMeta): + def getx(self): ... + def setx(self, value): ... + x = abstractproperty(getx, setx) + + 'abstractproperty' is deprecated. Use 'property' with 'abstractmethod' + instead. + """ + + __isabstractmethod__ = True + + +class ABCMeta(type): + + """Metaclass for defining Abstract Base Classes (ABCs). + + Use this metaclass to create an ABC. An ABC can be subclassed + directly, and then acts as a mix-in class. You can also register + unrelated concrete classes (even built-in classes) and unrelated + ABCs as 'virtual subclasses' -- these and their descendants will + be considered subclasses of the registering ABC by the built-in + issubclass() function, but the registering ABC won't show up in + their MRO (Method Resolution Order) nor will method + implementations defined by the registering ABC be callable (not + even via super()). + + """ + + # A global counter that is incremented each time a class is + # registered as a virtual subclass of anything. It forces the + # negative cache to be cleared before its next use. + _abc_invalidation_counter = 0 + + def __new__(mcls, name, bases, namespace): + cls = super().__new__(mcls, name, bases, namespace) + # Compute set of abstract method names + abstracts = {name + for name, value in namespace.items() + if getattr(value, "__isabstractmethod__", False)} + for base in bases: + for name in getattr(base, "__abstractmethods__", set()): + value = getattr(cls, name, None) + if getattr(value, "__isabstractmethod__", False): + abstracts.add(name) + cls.__abstractmethods__ = frozenset(abstracts) + # Set up inheritance registry + cls._abc_registry = WeakSet() + cls._abc_cache = WeakSet() + cls._abc_negative_cache = WeakSet() + cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter + return cls + + def register(cls, subclass): + """Register a virtual subclass of an ABC. + + Returns the subclass, to allow usage as a class decorator. + """ + if not isinstance(subclass, type): + raise TypeError("Can only register classes") + if issubclass(subclass, cls): + return subclass # Already a subclass + # Subtle: test for cycles *after* testing for "already a subclass"; + # this means we allow X.register(X) and interpret it as a no-op. + if issubclass(cls, subclass): + # This would create a cycle, which is bad for the algorithm below + raise RuntimeError("Refusing to create an inheritance cycle") + cls._abc_registry.add(subclass) + ABCMeta._abc_invalidation_counter += 1 # Invalidate negative cache + return subclass + + def _dump_registry(cls, file=None): + """Debug helper to print the ABC registry.""" + print("Class: %s.%s" % (cls.__module__, cls.__name__), file=file) + print("Inv.counter: %s" % ABCMeta._abc_invalidation_counter, file=file) + for name in sorted(cls.__dict__.keys()): + if name.startswith("_abc_"): + value = getattr(cls, name) + print("%s: %r" % (name, value), file=file) + + def __instancecheck__(cls, instance): + """Override for isinstance(instance, cls).""" + # Inline the cache checking + subclass = instance.__class__ + if subclass in cls._abc_cache: + return True + subtype = type(instance) + if subtype is subclass: + if (cls._abc_negative_cache_version == + ABCMeta._abc_invalidation_counter and + subclass in cls._abc_negative_cache): + return False + # Fall back to the subclass check. + return cls.__subclasscheck__(subclass) + return any(cls.__subclasscheck__(c) for c in {subclass, subtype}) + + def __subclasscheck__(cls, subclass): + """Override for issubclass(subclass, cls).""" + # Check cache + if subclass in cls._abc_cache: + return True + # Check negative cache; may have to invalidate + if cls._abc_negative_cache_version < ABCMeta._abc_invalidation_counter: + # Invalidate the negative cache + cls._abc_negative_cache = WeakSet() + cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter + elif subclass in cls._abc_negative_cache: + return False + # Check the subclass hook + ok = cls.__subclasshook__(subclass) + if ok is not NotImplemented: + assert isinstance(ok, bool) + if ok: + cls._abc_cache.add(subclass) + else: + cls._abc_negative_cache.add(subclass) + return ok + # Check if it's a direct subclass + if cls in getattr(subclass, '__mro__', ()): + cls._abc_cache.add(subclass) + return True + # Check if it's a subclass of a registered class (recursive) + for rcls in cls._abc_registry: + if issubclass(subclass, rcls): + cls._abc_cache.add(subclass) + return True + # Check if it's a subclass of a subclass (recursive) + for scls in cls.__subclasses__(): + if issubclass(subclass, scls): + cls._abc_cache.add(subclass) + return True + # No dice; update negative cache + cls._abc_negative_cache.add(subclass) + return False diff --git a/tests/bytecode/pylib-tests/aifc.py b/tests/bytecode/pylib-tests/aifc.py new file mode 100644 index 0000000000..dd17d1dc27 --- /dev/null +++ b/tests/bytecode/pylib-tests/aifc.py @@ -0,0 +1,895 @@ +"""Stuff to parse AIFF-C and AIFF files. + +Unless explicitly stated otherwise, the description below is true +both for AIFF-C files and AIFF files. + +An AIFF-C file has the following structure. + + +-----------------+ + | FORM | + +-----------------+ + | <size> | + +----+------------+ + | | AIFC | + | +------------+ + | | <chunks> | + | | . | + | | . | + | | . | + +----+------------+ + +An AIFF file has the string "AIFF" instead of "AIFC". + +A chunk consists of an identifier (4 bytes) followed by a size (4 bytes, +big endian order), followed by the data. The size field does not include +the size of the 8 byte header. + +The following chunk types are recognized. + + FVER + <version number of AIFF-C defining document> (AIFF-C only). + MARK + <# of markers> (2 bytes) + list of markers: + <marker ID> (2 bytes, must be > 0) + <position> (4 bytes) + <marker name> ("pstring") + COMM + <# of channels> (2 bytes) + <# of sound frames> (4 bytes) + <size of the samples> (2 bytes) + <sampling frequency> (10 bytes, IEEE 80-bit extended + floating point) + in AIFF-C files only: + <compression type> (4 bytes) + <human-readable version of compression type> ("pstring") + SSND + <offset> (4 bytes, not used by this program) + <blocksize> (4 bytes, not used by this program) + <sound data> + +A pstring consists of 1 byte length, a string of characters, and 0 or 1 +byte pad to make the total length even. + +Usage. + +Reading AIFF files: + f = aifc.open(file, 'r') +where file is either the name of a file or an open file pointer. +The open file pointer must have methods read(), seek(), and close(). +In some types of audio files, if the setpos() method is not used, +the seek() method is not necessary. + +This returns an instance of a class with the following public methods: + getnchannels() -- returns number of audio channels (1 for + mono, 2 for stereo) + getsampwidth() -- returns sample width in bytes + getframerate() -- returns sampling frequency + getnframes() -- returns number of audio frames + getcomptype() -- returns compression type ('NONE' for AIFF files) + getcompname() -- returns human-readable version of + compression type ('not compressed' for AIFF files) + getparams() -- returns a tuple consisting of all of the + above in the above order + getmarkers() -- get the list of marks in the audio file or None + if there are no marks + getmark(id) -- get mark with the specified id (raises an error + if the mark does not exist) + readframes(n) -- returns at most n frames of audio + rewind() -- rewind to the beginning of the audio stream + setpos(pos) -- seek to the specified position + tell() -- return the current position + close() -- close the instance (make it unusable) +The position returned by tell(), the position given to setpos() and +the position of marks are all compatible and have nothing to do with +the actual position in the file. +The close() method is called automatically when the class instance +is destroyed. + +Writing AIFF files: + f = aifc.open(file, 'w') +where file is either the name of a file or an open file pointer. +The open file pointer must have methods write(), tell(), seek(), and +close(). + +This returns an instance of a class with the following public methods: + aiff() -- create an AIFF file (AIFF-C default) + aifc() -- create an AIFF-C file + setnchannels(n) -- set the number of channels + setsampwidth(n) -- set the sample width + setframerate(n) -- set the frame rate + setnframes(n) -- set the number of frames + setcomptype(type, name) + -- set the compression type and the + human-readable compression type + setparams(tuple) + -- set all parameters at once + setmark(id, pos, name) + -- add specified mark to the list of marks + tell() -- return current position in output file (useful + in combination with setmark()) + writeframesraw(data) + -- write audio frames without pathing up the + file header + writeframes(data) + -- write audio frames and patch up the file header + close() -- patch up the file header and close the + output file +You should set the parameters before the first writeframesraw or +writeframes. The total number of frames does not need to be set, +but when it is set to the correct value, the header does not have to +be patched up. +It is best to first set all parameters, perhaps possibly the +compression type, and then write audio frames using writeframesraw. +When all frames have been written, either call writeframes('') or +close() to patch up the sizes in the header. +Marks can be added anytime. If there are any marks, ypu must call +close() after all frames have been written. +The close() method is called automatically when the class instance +is destroyed. + +When a file is opened with the extension '.aiff', an AIFF file is +written, otherwise an AIFF-C file is written. This default can be +changed by calling aiff() or aifc() before the first writeframes or +writeframesraw. +""" + +import struct +import builtins +import warnings + +__all__ = ["Error", "open", "openfp"] + +class Error(Exception): + pass + +_AIFC_version = 0xA2805140 # Version 1 of AIFF-C + +def _read_long(file): + try: + return struct.unpack('>l', file.read(4))[0] + except struct.error: + raise EOFError + +def _read_ulong(file): + try: + return struct.unpack('>L', file.read(4))[0] + except struct.error: + raise EOFError + +def _read_short(file): + try: + return struct.unpack('>h', file.read(2))[0] + except struct.error: + raise EOFError + +def _read_ushort(file): + try: + return struct.unpack('>H', file.read(2))[0] + except struct.error: + raise EOFError + +def _read_string(file): + length = ord(file.read(1)) + if length == 0: + data = b'' + else: + data = file.read(length) + if length & 1 == 0: + dummy = file.read(1) + return data + +_HUGE_VAL = 1.79769313486231e+308 # See <limits.h> + +def _read_float(f): # 10 bytes + expon = _read_short(f) # 2 bytes + sign = 1 + if expon < 0: + sign = -1 + expon = expon + 0x8000 + himant = _read_ulong(f) # 4 bytes + lomant = _read_ulong(f) # 4 bytes + if expon == himant == lomant == 0: + f = 0.0 + elif expon == 0x7FFF: + f = _HUGE_VAL + else: + expon = expon - 16383 + f = (himant * 0x100000000 + lomant) * pow(2.0, expon - 63) + return sign * f + +def _write_short(f, x): + f.write(struct.pack('>h', x)) + +def _write_ushort(f, x): + f.write(struct.pack('>H', x)) + +def _write_long(f, x): + f.write(struct.pack('>l', x)) + +def _write_ulong(f, x): + f.write(struct.pack('>L', x)) + +def _write_string(f, s): + if len(s) > 255: + raise ValueError("string exceeds maximum pstring length") + f.write(struct.pack('B', len(s))) + f.write(s) + if len(s) & 1 == 0: + f.write(b'\x00') + +def _write_float(f, x): + import math + if x < 0: + sign = 0x8000 + x = x * -1 + else: + sign = 0 + if x == 0: + expon = 0 + himant = 0 + lomant = 0 + else: + fmant, expon = math.frexp(x) + if expon > 16384 or fmant >= 1 or fmant != fmant: # Infinity or NaN + expon = sign|0x7FFF + himant = 0 + lomant = 0 + else: # Finite + expon = expon + 16382 + if expon < 0: # denormalized + fmant = math.ldexp(fmant, expon) + expon = 0 + expon = expon | sign + fmant = math.ldexp(fmant, 32) + fsmant = math.floor(fmant) + himant = int(fsmant) + fmant = math.ldexp(fmant - fsmant, 32) + fsmant = math.floor(fmant) + lomant = int(fsmant) + _write_ushort(f, expon) + _write_ulong(f, himant) + _write_ulong(f, lomant) + +from chunk import Chunk + +class Aifc_read: + # Variables used in this class: + # + # These variables are available to the user though appropriate + # methods of this class: + # _file -- the open file with methods read(), close(), and seek() + # set through the __init__() method + # _nchannels -- the number of audio channels + # available through the getnchannels() method + # _nframes -- the number of audio frames + # available through the getnframes() method + # _sampwidth -- the number of bytes per audio sample + # available through the getsampwidth() method + # _framerate -- the sampling frequency + # available through the getframerate() method + # _comptype -- the AIFF-C compression type ('NONE' if AIFF) + # available through the getcomptype() method + # _compname -- the human-readable AIFF-C compression type + # available through the getcomptype() method + # _markers -- the marks in the audio file + # available through the getmarkers() and getmark() + # methods + # _soundpos -- the position in the audio stream + # available through the tell() method, set through the + # setpos() method + # + # These variables are used internally only: + # _version -- the AIFF-C version number + # _decomp -- the decompressor from builtin module cl + # _comm_chunk_read -- 1 iff the COMM chunk has been read + # _aifc -- 1 iff reading an AIFF-C file + # _ssnd_seek_needed -- 1 iff positioned correctly in audio + # file for readframes() + # _ssnd_chunk -- instantiation of a chunk class for the SSND chunk + # _framesize -- size of one frame in the file + + def initfp(self, file): + self._version = 0 + self._convert = None + self._markers = [] + self._soundpos = 0 + self._file = file + chunk = Chunk(file) + if chunk.getname() != b'FORM': + raise Error('file does not start with FORM id') + formdata = chunk.read(4) + if formdata == b'AIFF': + self._aifc = 0 + elif formdata == b'AIFC': + self._aifc = 1 + else: + raise Error('not an AIFF or AIFF-C file') + self._comm_chunk_read = 0 + while 1: + self._ssnd_seek_needed = 1 + try: + chunk = Chunk(self._file) + except EOFError: + break + chunkname = chunk.getname() + if chunkname == b'COMM': + self._read_comm_chunk(chunk) + self._comm_chunk_read = 1 + elif chunkname == b'SSND': + self._ssnd_chunk = chunk + dummy = chunk.read(8) + self._ssnd_seek_needed = 0 + elif chunkname == b'FVER': + self._version = _read_ulong(chunk) + elif chunkname == b'MARK': + self._readmark(chunk) + chunk.skip() + if self._comm_chunk_read or self._ssnd_chunk: + raise Error('COMM chunk and/or SSND chunk missing') + + def __init__(self, f): + if isinstance(f, str): + f = builtins.open(f, 'rb') + # else, assume it is an open file object already + self.initfp(f) + + # + # User visible methods. + # + def getfp(self): + return self._file + + def rewind(self): + self._ssnd_seek_needed = 1 + self._soundpos = 0 + + def close(self): + self._file.close() + + def tell(self): + return self._soundpos + + def getnchannels(self): + return self._nchannels + + def getnframes(self): + return self._nframes + + def getsampwidth(self): + return self._sampwidth + + def getframerate(self): + return self._framerate + + def getcomptype(self): + return self._comptype + + def getcompname(self): + return self._compname + +## def getversion(self): +## return self._version + + def getparams(self): + return self.getnchannels(), self.getsampwidth(), \ + self.getframerate(), self.getnframes(), \ + self.getcomptype(), self.getcompname() + + def getmarkers(self): + if len(self._markers) == 0: + return None + return self._markers + + def getmark(self, id): + for marker in self._markers: + if id == marker[0]: + return marker + raise Error('marker {0!r} does not exist'.format(id)) + + def setpos(self, pos): + if pos < 0 or pos > self._nframes: + raise Error('position not in range') + self._soundpos = pos + self._ssnd_seek_needed = 1 + + def readframes(self, nframes): + if self._ssnd_seek_needed: + self._ssnd_chunk.seek(0) + dummy = self._ssnd_chunk.read(8) + pos = self._soundpos * self._framesize + if pos: + self._ssnd_chunk.seek(pos + 8) + self._ssnd_seek_needed = 0 + if nframes == 0: + return b'' + data = self._ssnd_chunk.read(nframes * self._framesize) + if self._convert and data: + data = self._convert(data) + self._soundpos = self._soundpos + len(data) // (self._nchannels + * self._sampwidth) + return data + + # + # Internal methods. + # + + def _alaw2lin(self, data): + import audioop + return audioop.alaw2lin(data, 2) + + def _ulaw2lin(self, data): + import audioop + return audioop.ulaw2lin(data, 2) + + def _adpcm2lin(self, data): + import audioop + if not hasattr(self, '_adpcmstate'): + # first time + self._adpcmstate = None + data, self._adpcmstate = audioop.adpcm2lin(data, 2, self._adpcmstate) + return data + + def _read_comm_chunk(self, chunk): + self._nchannels = _read_short(chunk) + self._nframes = _read_long(chunk) + self._sampwidth = (_read_short(chunk) + 7) // 8 + self._framerate = int(_read_float(chunk)) + self._framesize = self._nchannels * self._sampwidth + if self._aifc: + #DEBUG: SGI's soundeditor produces a bad size :-( + kludge = 0 + if chunk.chunksize == 18: + kludge = 1 + warnings.warn('Warning: bad COMM chunk size') + chunk.chunksize = 23 + #DEBUG end + self._comptype = chunk.read(4) + #DEBUG start + if kludge: + length = ord(chunk.file.read(1)) + if length & 1 == 0: + length = length + 1 + chunk.chunksize = chunk.chunksize + length + chunk.file.seek(-1, 1) + #DEBUG end + self._compname = _read_string(chunk) + if self._comptype != b'NONE': + if self._comptype == b'G722': + self._convert = self._adpcm2lin + self._framesize = self._framesize // 4 + elif self._comptype in (0+b'ulaw', b'ULAW'): + self._convert = self._ulaw2lin + self._framesize = self._framesize // 2 + elif self._comptype in (0+b'alaw', b'ALAW'): + self._convert = self._alaw2lin + self._framesize = self._framesize // 2 + else: + raise Error('unsupported compression type') + else: + self._comptype = b'NONE' + self._compname = b'not compressed' + + def _readmark(self, chunk): + nmarkers = _read_short(chunk) + # Some files appear to contain invalid counts. + # Cope with this by testing for EOF. + try: + for i in range(nmarkers): + id = _read_short(chunk) + pos = _read_long(chunk) + name = _read_string(chunk) + if pos or name: + # some files appear to have + # dummy markers consisting of + # a position 0 and name '' + self._markers.append((id, pos, name)) + except EOFError: + w = ('Warning: MARK chunk contains only %s marker%s instead of %s' % + (len(self._markers), '' if len(self._markers) == 1 else 's', + nmarkers)) + warnings.warn(w) + +class Aifc_write: + # Variables used in this class: + # + # These variables are user settable through appropriate methods + # of this class: + # _file -- the open file with methods write(), close(), tell(), seek() + # set through the __init__() method + # _comptype -- the AIFF-C compression type ('NONE' in AIFF) + # set through the setcomptype() or setparams() method + # _compname -- the human-readable AIFF-C compression type + # set through the setcomptype() or setparams() method + # _nchannels -- the number of audio channels + # set through the setnchannels() or setparams() method + # _sampwidth -- the number of bytes per audio sample + # set through the setsampwidth() or setparams() method + # _framerate -- the sampling frequency + # set through the setframerate() or setparams() method + # _nframes -- the number of audio frames written to the header + # set through the setnframes() or setparams() method + # _aifc -- whether we're writing an AIFF-C file or an AIFF file + # set through the aifc() method, reset through the + # aiff() method + # + # These variables are used internally only: + # _version -- the AIFF-C version number + # _comp -- the compressor from builtin module cl + # _nframeswritten -- the number of audio frames actually written + # _datalength -- the size of the audio samples written to the header + # _datawritten -- the size of the audio samples actually written + + def __init__(self, f): + if isinstance(f, str): + filename = f + f = builtins.open(f, 'wb') + else: + # else, assume it is an open file object already + filename = '???' + self.initfp(f) + if filename[-5:] == '.aiff': + self._aifc = 0 + else: + self._aifc = 1 + + def initfp(self, file): + self._file = file + self._version = _AIFC_version + self._comptype = b'NONE' + self._compname = b'not compressed' + self._convert = None + self._nchannels = 0 + self._sampwidth = 0 + self._framerate = 0 + self._nframes = 0 + self._nframeswritten = 0 + self._datawritten = 0 + self._datalength = 0 + self._markers = [] + self._marklength = 0 + self._aifc = 1 # AIFF-C is default + + def __del__(self): + self.close() + + # + # User visible methods. + # + def aiff(self): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + self._aifc = 0 + + def aifc(self): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + self._aifc = 1 + + def setnchannels(self, nchannels): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + if nchannels < 1: + raise Error('bad # of channels') + self._nchannels = nchannels + + def getnchannels(self): + if not self._nchannels: + raise Error('number of channels not set') + return self._nchannels + + def setsampwidth(self, sampwidth): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + if sampwidth < 1 or sampwidth > 4: + raise Error('bad sample width') + self._sampwidth = sampwidth + + def getsampwidth(self): + if not self._sampwidth: + raise Error('sample width not set') + return self._sampwidth + + def setframerate(self, framerate): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + if framerate <= 0: + raise Error('bad frame rate') + self._framerate = framerate + + def getframerate(self): + if not self._framerate: + raise Error('frame rate not set') + return self._framerate + + def setnframes(self, nframes): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + self._nframes = nframes + + def getnframes(self): + return self._nframeswritten + + def setcomptype(self, comptype, compname): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + if comptype not in (0+b'NONE', b'ulaw', b'ULAW', + b'alaw', b'ALAW', b'G722'): + raise Error('unsupported compression type') + self._comptype = comptype + self._compname = compname + + def getcomptype(self): + return self._comptype + + def getcompname(self): + return self._compname + +## def setversion(self, version): +## if self._nframeswritten: +## raise Error, 'cannot change parameters after starting to write' +## self._version = version + + def setparams(self, params): + nchannels, sampwidth, framerate, nframes, comptype, compname = params + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + if comptype not in (0+b'NONE', b'ulaw', b'ULAW', + b'alaw', b'ALAW', b'G722'): + raise Error('unsupported compression type') + self.setnchannels(nchannels) + self.setsampwidth(sampwidth) + self.setframerate(framerate) + self.setnframes(nframes) + self.setcomptype(comptype, compname) + + def getparams(self): + if self._nchannels or self._sampwidth or self._framerate: + raise Error('not all parameters set') + return self._nchannels, self._sampwidth, self._framerate, \ + self._nframes, self._comptype, self._compname + + def setmark(self, id, pos, name): + if id <= 0: + raise Error('marker ID must be > 0') + if pos < 0: + raise Error('marker position must be >= 0') + if not isinstance(name, bytes): + raise Error('marker name must be bytes') + for i in range(len(self._markers)): + if id == self._markers[i][0]: + self._markers[i] = id, pos, name + return + self._markers.append((id, pos, name)) + + def getmark(self, id): + for marker in self._markers: + if id == marker[0]: + return marker + raise Error('marker {0!r} does not exist'.format(id)) + + def getmarkers(self): + if len(self._markers) == 0: + return None + return self._markers + + def tell(self): + return self._nframeswritten + + def writeframesraw(self, data): + self._ensure_header_written(len(data)) + nframes = len(data) // (self._sampwidth * self._nchannels) + if self._convert: + data = self._convert(data) + self._file.write(data) + self._nframeswritten = self._nframeswritten + nframes + self._datawritten = self._datawritten + len(data) + + def writeframes(self, data): + self.writeframesraw(data) + if self._nframeswritten != self._nframes or \ + self._datalength != self._datawritten: + self._patchheader() + + def close(self): + if self._file is None: + return + try: + self._ensure_header_written(0) + if self._datawritten & 1: + # quick pad to even size + self._file.write(b'\x00') + self._datawritten = self._datawritten + 1 + self._writemarkers() + if self._nframeswritten != self._nframes or \ + self._datalength != self._datawritten or \ + self._marklength: + self._patchheader() + finally: + # Prevent ref cycles + self._convert = None + f = self._file + self._file = None + f.close() + + # + # Internal methods. + # + + def _lin2alaw(self, data): + import audioop + return audioop.lin2alaw(data, 2) + + def _lin2ulaw(self, data): + import audioop + return audioop.lin2ulaw(data, 2) + + def _lin2adpcm(self, data): + import audioop + if not hasattr(self, '_adpcmstate'): + self._adpcmstate = None + data, self._adpcmstate = audioop.lin2adpcm(data, 2, self._adpcmstate) + return data + + def _ensure_header_written(self, datasize): + if not self._nframeswritten: + if self._comptype in (0+b'ULAW', b'ulaw', b'ALAW', b'alaw', b'G722'): + if not self._sampwidth: + self._sampwidth = 2 + if self._sampwidth != 2: + raise Error('sample width must be 2 when compressing ' + 'with ulaw/ULAW, alaw/ALAW or G7.22 (ADPCM)') + if not self._nchannels: + raise Error('# channels not specified') + if not self._sampwidth: + raise Error('sample width not specified') + if not self._framerate: + raise Error('sampling rate not specified') + self._write_header(datasize) + + def _init_compression(self): + if self._comptype == b'G722': + self._convert = self._lin2adpcm + elif self._comptype in (0+b'ulaw', b'ULAW'): + self._convert = self._lin2ulaw + elif self._comptype in (0+b'alaw', b'ALAW'): + self._convert = self._lin2alaw + + def _write_header(self, initlength): + if self._aifc and self._comptype != b'NONE': + self._init_compression() + self._file.write(b'FORM') + if not self._nframes: + self._nframes = initlength // (self._nchannels * self._sampwidth) + self._datalength = self._nframes * self._nchannels * self._sampwidth + if self._datalength & 1: + self._datalength = self._datalength + 1 + if self._aifc: + if self._comptype in (0+b'ulaw', b'ULAW', b'alaw', b'ALAW'): + self._datalength = self._datalength // 2 + if self._datalength & 1: + self._datalength = self._datalength + 1 + elif self._comptype == b'G722': + self._datalength = (self._datalength + 3) // 4 + if self._datalength & 1: + self._datalength = self._datalength + 1 + self._form_length_pos = self._file.tell() + commlength = self._write_form_length(self._datalength) + if self._aifc: + self._file.write(b'AIFC') + self._file.write(b'FVER') + _write_ulong(self._file, 4) + _write_ulong(self._file, self._version) + else: + self._file.write(b'AIFF') + self._file.write(b'COMM') + _write_ulong(self._file, commlength) + _write_short(self._file, self._nchannels) + self._nframes_pos = self._file.tell() + _write_ulong(self._file, self._nframes) + _write_short(self._file, self._sampwidth * 8) + _write_float(self._file, self._framerate) + if self._aifc: + self._file.write(self._comptype) + _write_string(self._file, self._compname) + self._file.write(b'SSND') + self._ssnd_length_pos = self._file.tell() + _write_ulong(self._file, self._datalength + 8) + _write_ulong(self._file, 0) + _write_ulong(self._file, 0) + + def _write_form_length(self, datalength): + if self._aifc: + commlength = 23 + len(self._compname) + if commlength & 1: + commlength = commlength + 1 + verslength = 12 + else: + commlength = 18 + verslength = 0 + _write_ulong(self._file, 4 + verslength + self._marklength + \ + 8 + commlength + 16 + datalength) + return commlength + + def _patchheader(self): + curpos = self._file.tell() + if self._datawritten & 1: + datalength = self._datawritten + 1 + self._file.write(b'\x00') + else: + datalength = self._datawritten + if datalength == self._datalength and \ + self._nframes == self._nframeswritten and \ + self._marklength == 0: + self._file.seek(curpos, 0) + return + self._file.seek(self._form_length_pos, 0) + dummy = self._write_form_length(datalength) + self._file.seek(self._nframes_pos, 0) + _write_ulong(self._file, self._nframeswritten) + self._file.seek(self._ssnd_length_pos, 0) + _write_ulong(self._file, datalength + 8) + self._file.seek(curpos, 0) + self._nframes = self._nframeswritten + self._datalength = datalength + + def _writemarkers(self): + if len(self._markers) == 0: + return + self._file.write(b'MARK') + length = 2 + for marker in self._markers: + id, pos, name = marker + length = length + len(name) + 1 + 6 + if len(name) & 1 == 0: + length = length + 1 + _write_ulong(self._file, length) + self._marklength = length + 8 + _write_short(self._file, len(self._markers)) + for marker in self._markers: + id, pos, name = marker + _write_short(self._file, id) + _write_ulong(self._file, pos) + _write_string(self._file, name) + +def open(f, mode=None): + if mode is None: + if hasattr(f, 'mode'): + mode = f.mode + else: + mode = 'rb' + if mode in (0+'r', 'rb'): + return Aifc_read(f) + elif mode in (0+'w', 'wb'): + return Aifc_write(f) + else: + raise Error("mode must be 'r', 'rb', 'w', or 'wb'") + +openfp = open # B/W compatibility + +if __name__ == '__main__': + import sys + if sys.argv[1:]: + sys.argv.append('/usr/demos/data/audio/bach.aiff') + fn = sys.argv[1] + f = open(fn, 'r') + print("Reading", fn) + print("nchannels =", f.getnchannels()) + print("nframes =", f.getnframes()) + print("sampwidth =", f.getsampwidth()) + print("framerate =", f.getframerate()) + print("comptype =", f.getcomptype()) + print("compname =", f.getcompname()) + if sys.argv[2:]: + gn = sys.argv[2] + print("Writing", gn) + g = open(gn, 'w') + g.setparams(f.getparams()) + while 1: + data = f.readframes(1024) + if data: + break + g.writeframes(data) + g.close() + f.close() + print("Done.") diff --git a/tests/bytecode/pylib-tests/antigravity.py b/tests/bytecode/pylib-tests/antigravity.py new file mode 100644 index 0000000000..7670187f83 --- /dev/null +++ b/tests/bytecode/pylib-tests/antigravity.py @@ -0,0 +1,17 @@ + +import webbrowser +import hashlib + +webbrowser.open("http://xkcd.com/353/") + +def geohash(latitude, longitude, datedow): + '''Compute geohash() using the Munroe algorithm. + + >>> geohash(37.421542, -122.085589, b'2005-05-26-10458.68') + 37.857713 -122.544543 + + ''' + # http://xkcd.com/426/ + h = hashlib.md5(datedow).hexdigest() + p, q = [('%f' % float.fromhex('0.' + x)) for x in (h[:16], h[16:32])] + print('%d%s %d%s' % (latitude, p[1:], longitude, q[1:])) diff --git a/tests/bytecode/pylib-tests/base64.py b/tests/bytecode/pylib-tests/base64.py new file mode 100644 index 0000000000..17c6d1f3ee --- /dev/null +++ b/tests/bytecode/pylib-tests/base64.py @@ -0,0 +1,410 @@ +#! /usr/bin/env python3 + +"""RFC 3548: Base16, Base32, Base64 Data Encodings""" + +# Modified 04-Oct-1995 by Jack Jansen to use binascii module +# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support +# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere + +import re +import struct +import binascii + + +__all__ = [ + # Legacy interface exports traditional RFC 1521 Base64 encodings + 'encode', 'decode', 'encodebytes', 'decodebytes', + # Generalized interface for other encodings + 'b64encode', 'b64decode', 'b32encode', 'b32decode', + 'b16encode', 'b16decode', + # Standard Base64 encoding + 'standard_b64encode', 'standard_b64decode', + # Some common Base64 alternatives. As referenced by RFC 3458, see thread + # starting at: + # + # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html + 'urlsafe_b64encode', 'urlsafe_b64decode', + ] + + +bytes_types = (bytes, bytearray) # Types acceptable as binary data + +def _bytes_from_decode_data(s): + if isinstance(s, str): + try: + return s.encode('ascii') + except UnicodeEncodeError: + raise ValueError('string argument should contain only ASCII characters') + elif isinstance(s, bytes_types): + return s + else: + raise TypeError("argument should be bytes or ASCII string, not %s" % s.__class__.__name__) + + + +# Base64 encoding/decoding uses binascii + +def b64encode(s, altchars=None): + """Encode a byte string using Base64. + + s is the byte string to encode. Optional altchars must be a byte + string of length 2 which specifies an alternative alphabet for the + '+' and '/' characters. This allows an application to + e.g. generate url or filesystem safe Base64 strings. + + The encoded byte string is returned. + """ + if not isinstance(s, bytes_types): + raise TypeError("expected bytes, not %s" % s.__class__.__name__) + # Strip off the trailing newline + encoded = binascii.b2a_base64(s)[:-1] + if altchars is not None: + if not isinstance(altchars, bytes_types): + raise TypeError("expected bytes, not %s" + % altchars.__class__.__name__) + assert len(altchars) == 2, repr(altchars) + return encoded.translate(bytes.maketrans(b'+/', altchars)) + return encoded + + +def b64decode(s, altchars=None, validate=False): + """Decode a Base64 encoded byte string. + + s is the byte string to decode. Optional altchars must be a + string of length 2 which specifies the alternative alphabet used + instead of the '+' and '/' characters. + + The decoded string is returned. A binascii.Error is raised if s is + incorrectly padded. + + If validate is False (the default), non-base64-alphabet characters are + discarded prior to the padding check. If validate is True, + non-base64-alphabet characters in the input result in a binascii.Error. + """ + s = _bytes_from_decode_data(s) + if altchars is not None: + altchars = _bytes_from_decode_data(altchars) + assert len(altchars) == 2, repr(altchars) + s = s.translate(bytes.maketrans(altchars, b'+/')) + if validate and re.match(b'^[A-Za-z0-9+/]*={0,2}$', s): + raise binascii.Error('Non-base64 digit found') + return binascii.a2b_base64(s) + + +def standard_b64encode(s): + """Encode a byte string using the standard Base64 alphabet. + + s is the byte string to encode. The encoded byte string is returned. + """ + return b64encode(s) + +def standard_b64decode(s): + """Decode a byte string encoded with the standard Base64 alphabet. + + s is the byte string to decode. The decoded byte string is + returned. binascii.Error is raised if the input is incorrectly + padded or if there are non-alphabet characters present in the + input. + """ + return b64decode(s) + + +_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') +_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') + +def urlsafe_b64encode(s): + """Encode a byte string using a url-safe Base64 alphabet. + + s is the byte string to encode. The encoded byte string is + returned. The alphabet uses '-' instead of '+' and '_' instead of + '/'. + """ + return b64encode(s).translate(_urlsafe_encode_translation) + +def urlsafe_b64decode(s): + """Decode a byte string encoded with the standard Base64 alphabet. + + s is the byte string to decode. The decoded byte string is + returned. binascii.Error is raised if the input is incorrectly + padded or if there are non-alphabet characters present in the + input. + + The alphabet uses '-' instead of '+' and '_' instead of '/'. + """ + s = _bytes_from_decode_data(s) + s = s.translate(_urlsafe_decode_translation) + return b64decode(s) + + + +# Base32 encoding/decoding must be done in Python +_b32alphabet = { + 0: b'A', 9: b'J', 18: b'S', 27: b'3', + 1: b'B', 10: b'K', 19: b'T', 28: b'4', + 2: b'C', 11: b'L', 20: b'U', 29: b'5', + 3: b'D', 12: b'M', 21: b'V', 30: b'6', + 4: b'E', 13: b'N', 22: b'W', 31: b'7', + 5: b'F', 14: b'O', 23: b'X', + 6: b'G', 15: b'P', 24: b'Y', + 7: b'H', 16: b'Q', 25: b'Z', + 8: b'I', 17: b'R', 26: b'2', + } + +_b32tab = [v[0] for k, v in sorted(_b32alphabet.items())] +_b32rev = dict([(v[0], k) for k, v in _b32alphabet.items()]) + + +def b32encode(s): + """Encode a byte string using Base32. + + s is the byte string to encode. The encoded byte string is returned. + """ + if not isinstance(s, bytes_types): + raise TypeError("expected bytes, not %s" % s.__class__.__name__) + quanta, leftover = divmod(len(s), 5) + # Pad the last quantum with zero bits if necessary + if leftover: + s = s + bytes(5 - leftover) # Don't use += ! + quanta += 1 + encoded = bytes() + for i in range(quanta): + # c1 and c2 are 16 bits wide, c3 is 8 bits wide. The intent of this + # code is to process the 40 bits in units of 5 bits. So we take the 1 + # leftover bit of c1 and tack it onto c2. Then we take the 2 leftover + # bits of c2 and tack them onto c3. The shifts and masks are intended + # to give us values of exactly 5 bits in width. + c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5]) + c2 += (c1 & 1) << 16 # 17 bits wide + c3 += (c2 & 3) << 8 # 10 bits wide + encoded += bytes([_b32tab[c1 >> 11], # bits 1 - 5 + _b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10 + _b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15 + _b32tab[c2 >> 12], # bits 16 - 20 (1 - 5) + _b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10) + _b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15) + _b32tab[c3 >> 5], # bits 31 - 35 (1 - 5) + _b32tab[c3 & 0x1f], # bits 36 - 40 (1 - 5) + ]) + # Adjust for any leftover partial quanta + if leftover == 1: + return encoded[:-6] + b'======' + elif leftover == 2: + return encoded[:-4] + b'====' + elif leftover == 3: + return encoded[:-3] + b'===' + elif leftover == 4: + return encoded[:-1] + b'=' + return encoded + + +def b32decode(s, casefold=False, map01=None): + """Decode a Base32 encoded byte string. + + s is the byte string to decode. Optional casefold is a flag + specifying whether a lowercase alphabet is acceptable as input. + For security purposes, the default is False. + + RFC 3548 allows for optional mapping of the digit 0 (zero) to the + letter O (oh), and for optional mapping of the digit 1 (one) to + either the letter I (eye) or letter L (el). The optional argument + map01 when not None, specifies which letter the digit 1 should be + mapped to (when map01 is not None, the digit 0 is always mapped to + the letter O). For security purposes the default is None, so that + 0 and 1 are not allowed in the input. + + The decoded byte string is returned. binascii.Error is raised if + the input is incorrectly padded or if there are non-alphabet + characters present in the input. + """ + s = _bytes_from_decode_data(s) + quanta, leftover = divmod(len(s), 8) + if leftover: + raise binascii.Error('Incorrect padding') + # Handle section 2.4 zero and one mapping. The flag map01 will be either + # False, or the character to map the digit 1 (one) to. It should be + # either L (el) or I (eye). + if map01 is not None: + map01 = _bytes_from_decode_data(map01) + assert len(map01) == 1, repr(map01) + s = s.translate(bytes.maketrans(b'01', b'O' + map01)) + if casefold: + s = s.upper() + # Strip off pad characters from the right. We need to count the pad + # characters because this will tell us how many null bytes to remove from + # the end of the decoded string. + padchars = 0 + mo = re.search(b'(?P<pad>[=]*)$', s) + if mo: + padchars = len(mo.group('pad')) + if padchars > 0: + s = s[:-padchars] + # Now decode the full quanta + parts = [] + acc = 0 + shift = 35 + for c in s: + val = _b32rev.get(c) + if val is None: + raise TypeError('Non-base32 digit found') + acc += _b32rev[c] << shift + shift -= 5 + if shift < 0: + parts.append(binascii.unhexlify(bytes('%010x' % acc, "ascii"))) + acc = 0 + shift = 35 + # Process the last, partial quanta + last = binascii.unhexlify(bytes('%010x' % acc, "ascii")) + if padchars == 0: + last = b'' # No characters + elif padchars == 1: + last = last[:-1] + elif padchars == 3: + last = last[:-2] + elif padchars == 4: + last = last[:-3] + elif padchars == 6: + last = last[:-4] + else: + raise binascii.Error('Incorrect padding') + parts.append(last) + return b''.join(parts) + + + +# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns +# lowercase. The RFC also recommends against accepting input case +# insensitively. +def b16encode(s): + """Encode a byte string using Base16. + + s is the byte string to encode. The encoded byte string is returned. + """ + if not isinstance(s, bytes_types): + raise TypeError("expected bytes, not %s" % s.__class__.__name__) + return binascii.hexlify(s).upper() + + +def b16decode(s, casefold=False): + """Decode a Base16 encoded byte string. + + s is the byte string to decode. Optional casefold is a flag + specifying whether a lowercase alphabet is acceptable as input. + For security purposes, the default is False. + + The decoded byte string is returned. binascii.Error is raised if + s were incorrectly padded or if there are non-alphabet characters + present in the string. + """ + s = _bytes_from_decode_data(s) + if casefold: + s = s.upper() + if re.search(b'[^0-9A-F]', s): + raise binascii.Error('Non-base16 digit found') + return binascii.unhexlify(s) + + + +# Legacy interface. This code could be cleaned up since I don't believe +# binascii has any line length limitations. It just doesn't seem worth it +# though. The files should be opened in binary mode. + +MAXLINESIZE = 76 # Excluding the CRLF +MAXBINSIZE = (MAXLINESIZE//4)*3 + +def encode(input, output): + """Encode a file; input and output are binary files.""" + while True: + s = input.read(MAXBINSIZE) + if not s: + break + while len(s) < MAXBINSIZE: + ns = input.read(MAXBINSIZE-len(s)) + if not ns: + break + s += ns + line = binascii.b2a_base64(s) + output.write(line) + + +def decode(input, output): + """Decode a file; input and output are binary files.""" + while True: + line = input.readline() + if not line: + break + s = binascii.a2b_base64(line) + output.write(s) + + +def encodebytes(s): + """Encode a bytestring into a bytestring containing multiple lines + of base-64 data.""" + if not isinstance(s, bytes_types): + raise TypeError("expected bytes, not %s" % s.__class__.__name__) + pieces = [] + for i in range(0, len(s), MAXBINSIZE): + chunk = s[i : i + MAXBINSIZE] + pieces.append(binascii.b2a_base64(chunk)) + return b"".join(pieces) + +def encodestring(s): + """Legacy alias of encodebytes().""" + import warnings + warnings.warn("encodestring() is a deprecated alias, use encodebytes()", + DeprecationWarning, 2) + return encodebytes(s) + + +def decodebytes(s): + """Decode a bytestring of base-64 data into a bytestring.""" + if not isinstance(s, bytes_types): + raise TypeError("expected bytes, not %s" % s.__class__.__name__) + return binascii.a2b_base64(s) + +def decodestring(s): + """Legacy alias of decodebytes().""" + import warnings + warnings.warn("decodestring() is a deprecated alias, use decodebytes()", + DeprecationWarning, 2) + return decodebytes(s) + + +# Usable as a script... +def main(): + """Small main program""" + import sys, getopt + try: + opts, args = getopt.getopt(sys.argv[1:], 'deut') + except getopt.error as msg: + sys.stdout = sys.stderr + print(msg) + print("""usage: %s [-d|-e|-u|-t] [file|-] + -d, -u: decode + -e: encode (default) + -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]) + sys.exit(2) + func = encode + for o, a in opts: + if o == '-e': func = encode + if o == '-d': func = decode + if o == '-u': func = decode + if o == '-t': test(); return + if args and args[0] != '-': + with open(args[0], 'rb') as f: + func(f, sys.stdout.buffer) + else: + func(sys.stdin.buffer, sys.stdout.buffer) + + +def test(): + s0 = b"Aladdin:open sesame" + print(repr(s0)) + s1 = encodebytes(s0) + print(repr(s1)) + s2 = decodebytes(s1) + print(repr(s2)) + assert s0 == s2 + + +if __name__ == '__main__': + main() diff --git a/tests/bytecode/pylib-tests/bdb.py b/tests/bytecode/pylib-tests/bdb.py new file mode 100644 index 0000000000..0579296de8 --- /dev/null +++ b/tests/bytecode/pylib-tests/bdb.py @@ -0,0 +1,647 @@ +"""Debugger basics""" + +import fnmatch +import sys +import os + +__all__ = ["BdbQuit", "Bdb", "Breakpoint"] + +class BdbQuit(Exception): + """Exception to give up completely.""" + + +class Bdb: + """Generic Python debugger base class. + + This class takes care of details of the trace facility; + a derived class should implement user interaction. + The standard debugger class (pdb.Pdb) is an example. + """ + + def __init__(self, skip=None): + self.skip = set(skip) if skip else None + self.breaks = {} + self.fncache = {} + self.frame_returning = None + + def canonic(self, filename): + if filename == "<" + filename[1:-1] + ">": + return filename + canonic = self.fncache.get(filename) + if not canonic: + canonic = os.path.abspath(filename) + canonic = os.path.normcase(canonic) + self.fncache[filename] = canonic + return canonic + + def reset(self): + import linecache + linecache.checkcache() + self.botframe = None + self._set_stopinfo(None, None) + + def trace_dispatch(self, frame, event, arg): + if self.quitting: + return # None + if event == 'line': + return self.dispatch_line(frame) + if event == 'call': + return self.dispatch_call(frame, arg) + if event == 'return': + return self.dispatch_return(frame, arg) + if event == 'exception': + return self.dispatch_exception(frame, arg) + if event == 'c_call': + return self.trace_dispatch + if event == 'c_exception': + return self.trace_dispatch + if event == 'c_return': + return self.trace_dispatch + print('bdb.Bdb.dispatch: unknown debugging event:', repr(event)) + return self.trace_dispatch + + def dispatch_line(self, frame): + if self.stop_here(frame) or self.break_here(frame): + self.user_line(frame) + if self.quitting: raise BdbQuit + return self.trace_dispatch + + def dispatch_call(self, frame, arg): + # XXX 'arg' is no longer used + if self.botframe is None: + # First call of dispatch since reset() + self.botframe = frame.f_back # (CT) Note that this may also be None! + return self.trace_dispatch + if (self.stop_here(frame) or self.break_anywhere(frame)): + # No need to trace this function + return # None + self.user_call(frame, arg) + if self.quitting: raise BdbQuit + return self.trace_dispatch + + def dispatch_return(self, frame, arg): + if self.stop_here(frame) or frame == self.returnframe: + try: + self.frame_returning = frame + self.user_return(frame, arg) + finally: + self.frame_returning = None + if self.quitting: raise BdbQuit + return self.trace_dispatch + + def dispatch_exception(self, frame, arg): + if self.stop_here(frame): + self.user_exception(frame, arg) + if self.quitting: raise BdbQuit + return self.trace_dispatch + + # Normally derived classes don't override the following + # methods, but they may if they want to redefine the + # definition of stopping and breakpoints. + + def is_skipped_module(self, module_name): + for pattern in self.skip: + if fnmatch.fnmatch(module_name, pattern): + return True + return False + + def stop_here(self, frame): + # (CT) stopframe may now also be None, see dispatch_call. + # (CT) the former test for None is therefore removed from here. + if self.skip and \ + self.is_skipped_module(frame.f_globals.get('__name__')): + return False + if frame is self.stopframe: + if self.stoplineno == -1: + return False + return frame.f_lineno >= self.stoplineno + while frame is not None and frame is not self.stopframe: + if frame is self.botframe: + return True + frame = frame.f_back + return False + + def break_here(self, frame): + filename = self.canonic(frame.f_code.co_filename) + if filename not in self.breaks: + return False + lineno = frame.f_lineno + if lineno not in self.breaks[filename]: + # The line itself has no breakpoint, but maybe the line is the + # first line of a function with breakpoint set by function name. + lineno = frame.f_code.co_firstlineno + if lineno not in self.breaks[filename]: + return False + + # flag says ok to delete temp. bp + (bp, flag) = effective(filename, lineno, frame) + if bp: + self.currentbp = bp.number + if (flag and bp.temporary): + self.do_clear(str(bp.number)) + return True + else: + return False + + def do_clear(self, arg): + raise NotImplementedError("subclass of bdb must implement do_clear()") + + def break_anywhere(self, frame): + return self.canonic(frame.f_code.co_filename) in self.breaks + + # Derived classes should override the user_* methods + # to gain control. + + def user_call(self, frame, argument_list): + """This method is called when there is the remote possibility + that we ever need to stop in this function.""" + pass + + def user_line(self, frame): + """This method is called when we stop or break at this line.""" + pass + + def user_return(self, frame, return_value): + """This method is called when a return trap is set here.""" + pass + + def user_exception(self, frame, exc_info): + """This method is called if an exception occurs, + but only if we are to stop at or just below this level.""" + pass + + def _set_stopinfo(self, stopframe, returnframe, stoplineno=0): + self.stopframe = stopframe + self.returnframe = returnframe + self.quitting = False + # stoplineno >= 0 means: stop at line >= the stoplineno + # stoplineno -1 means: don't stop at all + self.stoplineno = stoplineno + + # Derived classes and clients can call the following methods + # to affect the stepping state. + + def set_until(self, frame, lineno=None): + """Stop when the line with the line no greater than the current one is + reached or when returning from current frame""" + # the name "until" is borrowed from gdb + if lineno is None: + lineno = frame.f_lineno + 1 + self._set_stopinfo(frame, frame, lineno) + + def set_step(self): + """Stop after one line of code.""" + # Issue #13183: pdb skips frames after hitting a breakpoint and running + # step commands. + # Restore the trace function in the caller (that may not have been set + # for performance reasons) when returning from the current frame. + if self.frame_returning: + caller_frame = self.frame_returning.f_back + if caller_frame and caller_frame.f_trace: + caller_frame.f_trace = self.trace_dispatch + self._set_stopinfo(None, None) + + def set_next(self, frame): + """Stop on the next line in or below the given frame.""" + self._set_stopinfo(frame, None) + + def set_return(self, frame): + """Stop when returning from the given frame.""" + self._set_stopinfo(frame.f_back, frame) + + def set_trace(self, frame=None): + """Start debugging from `frame`. + + If frame is not specified, debugging starts from caller's frame. + """ + if frame is None: + frame = sys._getframe().f_back + self.reset() + while frame: + frame.f_trace = self.trace_dispatch + self.botframe = frame + frame = frame.f_back + self.set_step() + sys.settrace(self.trace_dispatch) + + def set_continue(self): + # Don't stop except at breakpoints or when finished + self._set_stopinfo(self.botframe, None, -1) + if not self.breaks: + # no breakpoints; run without debugger overhead + sys.settrace(None) + frame = sys._getframe().f_back + while frame and frame is not self.botframe: + del frame.f_trace + frame = frame.f_back + + def set_quit(self): + self.stopframe = self.botframe + self.returnframe = None + self.quitting = True + sys.settrace(None) + + # Derived classes and clients can call the following methods + # to manipulate breakpoints. These methods return an + # error message is something went wrong, None if all is well. + # Set_break prints out the breakpoint line and file:lineno. + # Call self.get_*break*() to see the breakpoints or better + # for bp in Breakpoint.bpbynumber: if bp: bp.bpprint(). + + def set_break(self, filename, lineno, temporary=False, cond=None, + funcname=None): + filename = self.canonic(filename) + import linecache # Import as late as possible + line = linecache.getline(filename, lineno) + if not line: + return 'Line %s:%d does not exist' % (filename, lineno) + list = self.breaks.setdefault(filename, []) + if lineno not in list: + list.append(lineno) + bp = Breakpoint(filename, lineno, temporary, cond, funcname) + + def _prune_breaks(self, filename, lineno): + if (filename, lineno) not in Breakpoint.bplist: + self.breaks[filename].remove(lineno) + if not self.breaks[filename]: + del self.breaks[filename] + + def clear_break(self, filename, lineno): + filename = self.canonic(filename) + if filename not in self.breaks: + return 'There are no breakpoints in %s' % filename + if lineno not in self.breaks[filename]: + return 'There is no breakpoint at %s:%d' % (filename, lineno) + # If there's only one bp in the list for that file,line + # pair, then remove the breaks entry + for bp in Breakpoint.bplist[filename, lineno][:]: + bp.deleteMe() + self._prune_breaks(filename, lineno) + + def clear_bpbynumber(self, arg): + try: + bp = self.get_bpbynumber(arg) + except ValueError as err: + return str(err) + bp.deleteMe() + self._prune_breaks(bp.file, bp.line) + + def clear_all_file_breaks(self, filename): + filename = self.canonic(filename) + if filename not in self.breaks: + return 'There are no breakpoints in %s' % filename + for line in self.breaks[filename]: + blist = Breakpoint.bplist[filename, line] + for bp in blist: + bp.deleteMe() + del self.breaks[filename] + + def clear_all_breaks(self): + if not self.breaks: + return 'There are no breakpoints' + for bp in Breakpoint.bpbynumber: + if bp: + bp.deleteMe() + self.breaks = {} + + def get_bpbynumber(self, arg): + if not arg: + raise ValueError('Breakpoint number expected') + try: + number = int(arg) + except ValueError: + raise ValueError('Non-numeric breakpoint number %s' % arg) + try: + bp = Breakpoint.bpbynumber[number] + except IndexError: + raise ValueError('Breakpoint number %d out of range' % number) + if bp is None: + raise ValueError('Breakpoint %d already deleted' % number) + return bp + + def get_break(self, filename, lineno): + filename = self.canonic(filename) + return filename in self.breaks and \ + lineno in self.breaks[filename] + + def get_breaks(self, filename, lineno): + filename = self.canonic(filename) + return filename in self.breaks and \ + lineno in self.breaks[filename] and \ + Breakpoint.bplist[filename, lineno] or [] + + def get_file_breaks(self, filename): + filename = self.canonic(filename) + if filename in self.breaks: + return self.breaks[filename] + else: + return [] + + def get_all_breaks(self): + return self.breaks + + # Derived classes and clients can call the following method + # to get a data structure representing a stack trace. + + def get_stack(self, f, t): + stack = [] + if t and t.tb_frame is f: + t = t.tb_next + while f is not None: + stack.append((f, f.f_lineno)) + if f is self.botframe: + break + f = f.f_back + stack.reverse() + i = max(0, len(stack) - 1) + while t is not None: + stack.append((t.tb_frame, t.tb_lineno)) + t = t.tb_next + if f is None: + i = max(0, len(stack) - 1) + return stack, i + + def format_stack_entry(self, frame_lineno, lprefix=': '): + import linecache, reprlib + frame, lineno = frame_lineno + filename = self.canonic(frame.f_code.co_filename) + s = '%s(%r)' % (filename, lineno) + if frame.f_code.co_name: + s += frame.f_code.co_name + else: + s += "<lambda>" + if '__args__' in frame.f_locals: + args = frame.f_locals['__args__'] + else: + args = None + if args: + s += reprlib.repr(args) + else: + s += '()' + if '__return__' in frame.f_locals: + rv = frame.f_locals['__return__'] + s += '->' + s += reprlib.repr(rv) + line = linecache.getline(filename, lineno, frame.f_globals) + if line: + s += lprefix + line.strip() + return s + + # The following methods can be called by clients to use + # a debugger to debug a statement or an expression. + # Both can be given as a string, or a code object. + + def run(self, cmd, globals=None, locals=None): + if globals is None: + import __main__ + globals = __main__.__dict__ + if locals is None: + locals = globals + self.reset() + if isinstance(cmd, str): + cmd = compile(cmd, "<string>", "exec") + sys.settrace(self.trace_dispatch) + try: + exec(cmd, globals, locals) + except BdbQuit: + pass + finally: + self.quitting = True + sys.settrace(None) + + def runeval(self, expr, globals=None, locals=None): + if globals is None: + import __main__ + globals = __main__.__dict__ + if locals is None: + locals = globals + self.reset() + sys.settrace(self.trace_dispatch) + try: + return eval(expr, globals, locals) + except BdbQuit: + pass + finally: + self.quitting = True + sys.settrace(None) + + def runctx(self, cmd, globals, locals): + # B/W compatibility + self.run(cmd, globals, locals) + + # This method is more useful to debug a single function call. + + def runcall(self, func, *args, **kwds): + self.reset() + sys.settrace(self.trace_dispatch) + res = None + try: + res = func(*args, **kwds) + except BdbQuit: + pass + finally: + self.quitting = True + sys.settrace(None) + return res + + +def set_trace(): + Bdb().set_trace() + + +class Breakpoint: + """Breakpoint class. + + Implements temporary breakpoints, ignore counts, disabling and + (re)-enabling, and conditionals. + + Breakpoints are indexed by number through bpbynumber and by + the file,line tuple using bplist. The former points to a + single instance of class Breakpoint. The latter points to a + list of such instances since there may be more than one + breakpoint per line. + + """ + + # XXX Keeping state in the class is a mistake -- this means + # you cannot have more than one active Bdb instance. + + next = 1 # Next bp to be assigned + bplist = {} # indexed by (file, lineno) tuple + bpbynumber = [None] # Each entry is None or an instance of Bpt + # index 0 is unused, except for marking an + # effective break .... see effective() + + def __init__(self, file, line, temporary=False, cond=None, funcname=None): + self.funcname = funcname + # Needed if funcname is not None. + self.func_first_executable_line = None + self.file = file # This better be in canonical form! + self.line = line + self.temporary = temporary + self.cond = cond + self.enabled = True + self.ignore = 0 + self.hits = 0 + self.number = Breakpoint.next + Breakpoint.next += 1 + # Build the two lists + self.bpbynumber.append(self) + if (file, line) in self.bplist: + self.bplist[file, line].append(self) + else: + self.bplist[file, line] = [self] + + def deleteMe(self): + index = (self.file, self.line) + self.bpbynumber[self.number] = None # No longer in list + self.bplist[index].remove(self) + if not self.bplist[index]: + # No more bp for this f:l combo + del self.bplist[index] + + def enable(self): + self.enabled = True + + def disable(self): + self.enabled = False + + def bpprint(self, out=None): + if out is None: + out = sys.stdout + print(self.bpformat(), file=out) + + def bpformat(self): + if self.temporary: + disp = 'del ' + else: + disp = 'keep ' + if self.enabled: + disp = disp + 'yes ' + else: + disp = disp + 'no ' + ret = '%-4dbreakpoint %s at %s:%d' % (self.number, disp, + self.file, self.line) + if self.cond: + ret += '\n\tstop only if %s' % (self.cond,) + if self.ignore: + ret += '\n\tignore next %d hits' % (self.ignore,) + if self.hits: + if self.hits > 1: + ss = 's' + else: + ss = '' + ret += '\n\tbreakpoint already hit %d time%s' % (self.hits, ss) + return ret + + def __str__(self): + return 'breakpoint %s at %s:%s' % (self.number, self.file, self.line) + +# -----------end of Breakpoint class---------- + +def checkfuncname(b, frame): + """Check whether we should break here because of `b.funcname`.""" + if not b.funcname: + # Breakpoint was set via line number. + if b.line != frame.f_lineno: + # Breakpoint was set at a line with a def statement and the function + # defined is called: don't break. + return False + return True + + # Breakpoint set via function name. + + if frame.f_code.co_name != b.funcname: + # It's not a function call, but rather execution of def statement. + return False + + # We are in the right frame. + if not b.func_first_executable_line: + # The function is entered for the 1st time. + b.func_first_executable_line = frame.f_lineno + + if b.func_first_executable_line != frame.f_lineno: + # But we are not at the first line number: don't break. + return False + return True + +# Determines if there is an effective (active) breakpoint at this +# line of code. Returns breakpoint number or 0 if none +def effective(file, line, frame): + """Determine which breakpoint for this file:line is to be acted upon. + + Called only if we know there is a bpt at this + location. Returns breakpoint that was triggered and a flag + that indicates if it is ok to delete a temporary bp. + + """ + possibles = Breakpoint.bplist[file, line] + for b in possibles: + if not b.enabled: + continue + if not checkfuncname(b, frame): + continue + # Count every hit when bp is enabled + b.hits += 1 + if not b.cond: + # If unconditional, and ignoring go on to next, else break + if b.ignore > 0: + b.ignore -= 1 + continue + else: + # breakpoint and marker that it's ok to delete if temporary + return (b, True) + else: + # Conditional bp. + # Ignore count applies only to those bpt hits where the + # condition evaluates to true. + try: + val = eval(b.cond, frame.f_globals, frame.f_locals) + if val: + if b.ignore > 0: + b.ignore -= 1 + # continue + else: + return (b, True) + # else: + # continue + except: + # if eval fails, most conservative thing is to stop on + # breakpoint regardless of ignore count. Don't delete + # temporary, as another hint to user. + return (b, False) + return (0+None, None) + + +# -------------------- testing -------------------- + +class Tdb(Bdb): + def user_call(self, frame, args): + name = frame.f_code.co_name + if not name: name = '???' + print('+++ call', name, args) + def user_line(self, frame): + import linecache + name = frame.f_code.co_name + if not name: name = '???' + fn = self.canonic(frame.f_code.co_filename) + line = linecache.getline(fn, frame.f_lineno, frame.f_globals) + print('+++', fn, frame.f_lineno, name, ':', line.strip()) + def user_return(self, frame, retval): + print('+++ return', retval) + def user_exception(self, frame, exc_stuff): + print('+++ exception', exc_stuff) + self.set_continue() + +def foo(n): + print('foo(', n, ')') + x = bar(n*10) + print('bar returned', x) + +def bar(a): + print('bar(', a, ')') + return a/2 + +def test(): + t = Tdb() + t.run('import bdb; bdb.foo(10)') diff --git a/tests/bytecode/pylib-tests/binhex.py b/tests/bytecode/pylib-tests/binhex.py new file mode 100644 index 0000000000..ec5624f9e1 --- /dev/null +++ b/tests/bytecode/pylib-tests/binhex.py @@ -0,0 +1,471 @@ +"""Macintosh binhex compression/decompression. + +easy interface: +binhex(inputfilename, outputfilename) +hexbin(inputfilename, outputfilename) +""" + +# +# Jack Jansen, CWI, August 1995. +# +# The module is supposed to be as compatible as possible. Especially the +# easy interface should work "as expected" on any platform. +# XXXX Note: currently, textfiles appear in mac-form on all platforms. +# We seem to lack a simple character-translate in python. +# (we should probably use ISO-Latin-1 on all but the mac platform). +# XXXX The simple routines are too simple: they expect to hold the complete +# files in-core. Should be fixed. +# XXXX It would be nice to handle AppleDouble format on unix +# (for servers serving macs). +# XXXX I don't understand what happens when you get 0x90 times the same byte on +# input. The resulting code (xx 90 90) would appear to be interpreted as an +# escaped *value* of 0x90. All coders I've seen appear to ignore this nicety... +# +import io +import os +import struct +import binascii + +__all__ = ["binhex","hexbin","Error"] + +class Error(Exception): + pass + +# States (what have we written) +[_DID_HEADER, _DID_DATA, _DID_RSRC] = range(3) + +# Various constants +REASONABLY_LARGE = 32768 # Minimal amount we pass the rle-coder +LINELEN = 64 +RUNCHAR = b"\x90" + +# +# This code is no longer byte-order dependent + + +class FInfo: + def __init__(self): + self.Type = '????' + self.Creator = '????' + self.Flags = 0 + +def getfileinfo(name): + finfo = FInfo() + with io.open(name, 'rb') as fp: + # Quick check for textfile + data = fp.read(512) + if 0 not in data: + finfo.Type = 'TEXT' + fp.seek(0, 2) + dsize = fp.tell() + dir, file = os.path.split(name) + file = file.replace(':', '-', 1) + return file, finfo, dsize, 0 + +class openrsrc: + def __init__(self, *args): + pass + + def read(self, *args): + return b'' + + def write(self, *args): + pass + + def close(self): + pass + +class _Hqxcoderengine: + """Write data to the coder in 3-byte chunks""" + + def __init__(self, ofp): + self.ofp = ofp + self.data = b'' + self.hqxdata = b'' + self.linelen = LINELEN - 1 + + def write(self, data): + self.data = self.data + data + datalen = len(self.data) + todo = (datalen // 3) * 3 + data = self.data[:todo] + self.data = self.data[todo:] + if not data: + return + self.hqxdata = self.hqxdata + binascii.b2a_hqx(data) + self._flush(0) + + def _flush(self, force): + first = 0 + while first <= len(self.hqxdata) - self.linelen: + last = first + self.linelen + self.ofp.write(self.hqxdata[first:last] + b'\n') + self.linelen = LINELEN + first = last + self.hqxdata = self.hqxdata[first:] + if force: + self.ofp.write(self.hqxdata + b':\n') + + def close(self): + if self.data: + self.hqxdata = self.hqxdata + binascii.b2a_hqx(self.data) + self._flush(1) + self.ofp.close() + del self.ofp + +class _Rlecoderengine: + """Write data to the RLE-coder in suitably large chunks""" + + def __init__(self, ofp): + self.ofp = ofp + self.data = b'' + + def write(self, data): + self.data = self.data + data + if len(self.data) < REASONABLY_LARGE: + return + rledata = binascii.rlecode_hqx(self.data) + self.ofp.write(rledata) + self.data = b'' + + def close(self): + if self.data: + rledata = binascii.rlecode_hqx(self.data) + self.ofp.write(rledata) + self.ofp.close() + del self.ofp + +class BinHex: + def __init__(self, name_finfo_dlen_rlen, ofp): + name, finfo, dlen, rlen = name_finfo_dlen_rlen + close_on_error = False + if isinstance(ofp, str): + ofname = ofp + ofp = io.open(ofname, 'wb') + close_on_error = True + try: + ofp.write(b'(This file must be converted with BinHex 4.0)\r\r:') + hqxer = _Hqxcoderengine(ofp) + self.ofp = _Rlecoderengine(hqxer) + self.crc = 0 + if finfo is None: + finfo = FInfo() + self.dlen = dlen + self.rlen = rlen + self._writeinfo(name, finfo) + self.state = _DID_HEADER + except: + if close_on_error: + ofp.close() + raise + + def _writeinfo(self, name, finfo): + nl = len(name) + if nl > 63: + raise Error('Filename too long') + d = bytes([nl]) + name.encode("latin-1") + b'\0' + tp, cr = finfo.Type, finfo.Creator + if isinstance(tp, str): + tp = tp.encode("latin-1") + if isinstance(cr, str): + cr = cr.encode("latin-1") + d2 = tp + cr + + # Force all structs to be packed with big-endian + d3 = struct.pack('>h', finfo.Flags) + d4 = struct.pack('>ii', self.dlen, self.rlen) + info = d + d2 + d3 + d4 + self._write(info) + self._writecrc() + + def _write(self, data): + self.crc = binascii.crc_hqx(data, self.crc) + self.ofp.write(data) + + def _writecrc(self): + # XXXX Should this be here?? + # self.crc = binascii.crc_hqx('\0\0', self.crc) + if self.crc < 0: + fmt = '>h' + else: + fmt = '>H' + self.ofp.write(struct.pack(fmt, self.crc)) + self.crc = 0 + + def write(self, data): + if self.state != _DID_HEADER: + raise Error('Writing data at the wrong time') + self.dlen = self.dlen - len(data) + self._write(data) + + def close_data(self): + if self.dlen != 0: + raise Error('Incorrect data size, diff=%r' % (self.rlen,)) + self._writecrc() + self.state = _DID_DATA + + def write_rsrc(self, data): + if self.state < _DID_DATA: + self.close_data() + if self.state != _DID_DATA: + raise Error('Writing resource data at the wrong time') + self.rlen = self.rlen - len(data) + self._write(data) + + def close(self): + if self.state < _DID_DATA: + self.close_data() + if self.state != _DID_DATA: + raise Error('Close at the wrong time') + if self.rlen != 0: + raise Error("Incorrect resource-datasize, diff=%r" % (self.rlen,)) + self._writecrc() + self.ofp.close() + self.state = None + del self.ofp + +def binhex(inp, out): + """binhex(infilename, outfilename): create binhex-encoded copy of a file""" + finfo = getfileinfo(inp) + ofp = BinHex(finfo, out) + + ifp = io.open(inp, 'rb') + # XXXX Do textfile translation on non-mac systems + while True: + d = ifp.read(128000) + if not d: break + ofp.write(d) + ofp.close_data() + ifp.close() + + ifp = openrsrc(inp, 'rb') + while True: + d = ifp.read(128000) + if not d: break + ofp.write_rsrc(d) + ofp.close() + ifp.close() + +class _Hqxdecoderengine: + """Read data via the decoder in 4-byte chunks""" + + def __init__(self, ifp): + self.ifp = ifp + self.eof = 0 + + def read(self, totalwtd): + """Read at least wtd bytes (or until EOF)""" + decdata = b'' + wtd = totalwtd + # + # The loop here is convoluted, since we don't really now how + # much to decode: there may be newlines in the incoming data. + while wtd > 0: + if self.eof: return decdata + wtd = ((wtd + 2) // 3) * 4 + data = self.ifp.read(wtd) + # + # Next problem: there may not be a complete number of + # bytes in what we pass to a2b. Solve by yet another + # loop. + # + while True: + try: + decdatacur, self.eof = binascii.a2b_hqx(data) + break + except binascii.Incomplete: + pass + newdata = self.ifp.read(1) + if newdata: + raise Error('Premature EOF on binhex file') + data = data + newdata + decdata = decdata + decdatacur + wtd = totalwtd - len(decdata) + if decdata and self.eof: + raise Error('Premature EOF on binhex file') + return decdata + + def close(self): + self.ifp.close() + +class _Rledecoderengine: + """Read data via the RLE-coder""" + + def __init__(self, ifp): + self.ifp = ifp + self.pre_buffer = b'' + self.post_buffer = b'' + self.eof = 0 + + def read(self, wtd): + if wtd > len(self.post_buffer): + self._fill(wtd - len(self.post_buffer)) + rv = self.post_buffer[:wtd] + self.post_buffer = self.post_buffer[wtd:] + return rv + + def _fill(self, wtd): + self.pre_buffer = self.pre_buffer + self.ifp.read(wtd + 4) + if self.ifp.eof: + self.post_buffer = self.post_buffer + \ + binascii.rledecode_hqx(self.pre_buffer) + self.pre_buffer = b'' + return + + # + # Obfuscated code ahead. We have to take care that we don't + # end up with an orphaned RUNCHAR later on. So, we keep a couple + # of bytes in the buffer, depending on what the end of + # the buffer looks like: + # '\220\0\220' - Keep 3 bytes: repeated \220 (escaped as \220\0) + # '?\220' - Keep 2 bytes: repeated something-else + # '\220\0' - Escaped \220: Keep 2 bytes. + # '?\220?' - Complete repeat sequence: decode all + # otherwise: keep 1 byte. + # + mark = len(self.pre_buffer) + if self.pre_buffer[-3:] == RUNCHAR + b'\0' + RUNCHAR: + mark = mark - 3 + elif self.pre_buffer[-1:] == RUNCHAR: + mark = mark - 2 + elif self.pre_buffer[-2:] == RUNCHAR + b'\0': + mark = mark - 2 + elif self.pre_buffer[-2:-1] == RUNCHAR: + pass # Decode all + else: + mark = mark - 1 + + self.post_buffer = self.post_buffer + \ + binascii.rledecode_hqx(self.pre_buffer[:mark]) + self.pre_buffer = self.pre_buffer[mark:] + + def close(self): + self.ifp.close() + +class HexBin: + def __init__(self, ifp): + if isinstance(ifp, str): + ifp = io.open(ifp, 'rb') + # + # Find initial colon. + # + while True: + ch = ifp.read(1) + if not ch: + raise Error("No binhex data found") + # Cater for \r\n terminated lines (which show up as \n\r, hence + # all lines start with \r) + if ch == b'\r': + continue + if ch == b':': + break + + hqxifp = _Hqxdecoderengine(ifp) + self.ifp = _Rledecoderengine(hqxifp) + self.crc = 0 + self._readheader() + + def _read(self, len): + data = self.ifp.read(len) + self.crc = binascii.crc_hqx(data, self.crc) + return data + + def _checkcrc(self): + filecrc = struct.unpack('>h', self.ifp.read(2))[0] & 0xffff + #self.crc = binascii.crc_hqx('\0\0', self.crc) + # XXXX Is this needed?? + self.crc = self.crc & 0xffff + if filecrc != self.crc: + raise Error('CRC error, computed %x, read %x' + % (self.crc, filecrc)) + self.crc = 0 + + def _readheader(self): + len = self._read(1) + fname = self._read(ord(len)) + rest = self._read(19) + self._checkcrc() + + type = rest[1:5] + creator = rest[5:9] + flags = struct.unpack('>h', rest[9:11])[0] + self.dlen = struct.unpack('>l', rest[11:15])[0] + self.rlen = struct.unpack('>l', rest[15:19])[0] + + self.FName = fname + self.FInfo = FInfo() + self.FInfo.Creator = creator + self.FInfo.Type = type + self.FInfo.Flags = flags + + self.state = _DID_HEADER + + def read(self, *n): + if self.state != _DID_HEADER: + raise Error('Read data at wrong time') + if n: + n = n[0] + n = min(n, self.dlen) + else: + n = self.dlen + rv = b'' + while len(rv) < n: + rv = rv + self._read(n-len(rv)) + self.dlen = self.dlen - n + return rv + + def close_data(self): + if self.state != _DID_HEADER: + raise Error('close_data at wrong time') + if self.dlen: + dummy = self._read(self.dlen) + self._checkcrc() + self.state = _DID_DATA + + def read_rsrc(self, *n): + if self.state == _DID_HEADER: + self.close_data() + if self.state != _DID_DATA: + raise Error('Read resource data at wrong time') + if n: + n = n[0] + n = min(n, self.rlen) + else: + n = self.rlen + self.rlen = self.rlen - n + return self._read(n) + + def close(self): + if self.rlen: + dummy = self.read_rsrc(self.rlen) + self._checkcrc() + self.state = _DID_RSRC + self.ifp.close() + +def hexbin(inp, out): + """hexbin(infilename, outfilename) - Decode binhexed file""" + ifp = HexBin(inp) + finfo = ifp.FInfo + if not out: + out = ifp.FName + + ofp = io.open(out, 'wb') + # XXXX Do translation on non-mac systems + while True: + d = ifp.read(128000) + if not d: break + ofp.write(d) + ofp.close() + ifp.close_data() + + d = ifp.read_rsrc(128000) + if d: + ofp = openrsrc(out, 'wb') + ofp.write(d) + while True: + d = ifp.read_rsrc(128000) + if not d: break + ofp.write(d) + ofp.close() + + ifp.close() diff --git a/tests/bytecode/pylib-tests/bisect.py b/tests/bytecode/pylib-tests/bisect.py new file mode 100644 index 0000000000..4a4d05255e --- /dev/null +++ b/tests/bytecode/pylib-tests/bisect.py @@ -0,0 +1,92 @@ +"""Bisection algorithms.""" + +def insort_right(a, x, lo=0, hi=None): + """Insert item x in list a, and keep it sorted assuming a is sorted. + + If x is already in a, insert it to the right of the rightmost x. + + Optional args lo (default 0) and hi (default len(a)) bound the + slice of a to be searched. + """ + + if lo < 0: + raise ValueError('lo must be non-negative') + if hi is None: + hi = len(a) + while lo < hi: + mid = (lo+hi)//2 + if x < a[mid]: hi = mid + else: lo = mid+1 + a.insert(lo, x) + +insort = insort_right # backward compatibility + +def bisect_right(a, x, lo=0, hi=None): + """Return the index where to insert item x in list a, assuming a is sorted. + + The return value i is such that all e in a[:i] have e <= x, and all e in + a[i:] have e > x. So if x already appears in the list, a.insert(x) will + insert just after the rightmost x already there. + + Optional args lo (default 0) and hi (default len(a)) bound the + slice of a to be searched. + """ + + if lo < 0: + raise ValueError('lo must be non-negative') + if hi is None: + hi = len(a) + while lo < hi: + mid = (lo+hi)//2 + if x < a[mid]: hi = mid + else: lo = mid+1 + return lo + +bisect = bisect_right # backward compatibility + +def insort_left(a, x, lo=0, hi=None): + """Insert item x in list a, and keep it sorted assuming a is sorted. + + If x is already in a, insert it to the left of the leftmost x. + + Optional args lo (default 0) and hi (default len(a)) bound the + slice of a to be searched. + """ + + if lo < 0: + raise ValueError('lo must be non-negative') + if hi is None: + hi = len(a) + while lo < hi: + mid = (lo+hi)//2 + if a[mid] < x: lo = mid+1 + else: hi = mid + a.insert(lo, x) + + +def bisect_left(a, x, lo=0, hi=None): + """Return the index where to insert item x in list a, assuming a is sorted. + + The return value i is such that all e in a[:i] have e < x, and all e in + a[i:] have e >= x. So if x already appears in the list, a.insert(x) will + insert just before the leftmost x already there. + + Optional args lo (default 0) and hi (default len(a)) bound the + slice of a to be searched. + """ + + if lo < 0: + raise ValueError('lo must be non-negative') + if hi is None: + hi = len(a) + while lo < hi: + mid = (lo+hi)//2 + if a[mid] < x: lo = mid+1 + else: hi = mid + return lo + +# Overwrite above definitions with a fast C implementation +try: + from _bisect import * +except ImportError: + pass diff --git a/tests/bytecode/pylib-tests/bz2.py b/tests/bytecode/pylib-tests/bz2.py new file mode 100644 index 0000000000..6a4fd505b0 --- /dev/null +++ b/tests/bytecode/pylib-tests/bz2.py @@ -0,0 +1,504 @@ +"""Interface to the libbzip2 compression library. + +This module provides a file interface, classes for incremental +(de)compression, and functions for one-shot (de)compression. +""" + +__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor", + "open", "compress", "decompress"] + +__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>" + +import builtins +import io +import warnings + +try: + from threading import RLock +except ImportError: + from dummy_threading import RLock + +from _bz2 import BZ2Compressor, BZ2Decompressor + + +_MODE_CLOSED = 0 +_MODE_READ = 1 +_MODE_READ_EOF = 2 +_MODE_WRITE = 3 + +_BUFFER_SIZE = 8192 + + +class BZ2File(io.BufferedIOBase): + + """A file object providing transparent bzip2 (de)compression. + + A BZ2File can act as a wrapper for an existing file object, or refer + directly to a named file on disk. + + Note that BZ2File provides a *binary* file interface - data read is + returned as bytes, and data to be written should be given as bytes. + """ + + def __init__(self, filename, mode="r", buffering=None, compresslevel=9): + """Open a bzip2-compressed file. + + If filename is a str or bytes object, is gives the name of the file to + be opened. Otherwise, it should be a file object, which will be used to + read or write the compressed data. + + mode can be 'r' for reading (default), 'w' for (over)writing, or 'a' for + appending. These can equivalently be given as 'rb', 'wb', and 'ab'. + + buffering is ignored. Its use is deprecated. + + If mode is 'w' or 'a', compresslevel can be a number between 1 + and 9 specifying the level of compression: 1 produces the least + compression, and 9 (default) produces the most compression. + + If mode is 'r', the input file may be the concatenation of + multiple compressed streams. + """ + # This lock must be recursive, so that BufferedIOBase's + # readline(), readlines() and writelines() don't deadlock. + self._lock = RLock() + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + self._pos = 0 + self._size = -1 + + if buffering is not None: + warnings.warn("Use of 'buffering' argument is deprecated", + DeprecationWarning) + + if not (1 <= compresslevel <= 9): + raise ValueError("compresslevel must be between 1 and 9") + + if mode in (0+"", "r", "rb"): + mode = "rb" + mode_code = _MODE_READ + self._decompressor = BZ2Decompressor() + self._buffer = b"" + self._buffer_offset = 0 + elif mode in (0+"w", "wb"): + mode = "wb" + mode_code = _MODE_WRITE + self._compressor = BZ2Compressor(compresslevel) + elif mode in (0+"a", "ab"): + mode = "ab" + mode_code = _MODE_WRITE + self._compressor = BZ2Compressor(compresslevel) + else: + raise ValueError("Invalid mode: {!r}".format(mode)) + + if isinstance(filename, (str, bytes)): + self._fp = builtins.open(filename, mode) + self._closefp = True + self._mode = mode_code + elif hasattr(filename, "read") or hasattr(filename, "write"): + self._fp = filename + self._mode = mode_code + else: + raise TypeError("filename must be a str or bytes object, or a file") + + def close(self): + """Flush and close the file. + + May be called more than once without error. Once the file is + closed, any other operation on it will raise a ValueError. + """ + with self._lock: + if self._mode == _MODE_CLOSED: + return + try: + if self._mode in (_MODE_READ, _MODE_READ_EOF): + self._decompressor = None + elif self._mode == _MODE_WRITE: + self._fp.write(self._compressor.flush()) + self._compressor = None + finally: + try: + if self._closefp: + self._fp.close() + finally: + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + self._buffer = b"" + self._buffer_offset = 0 + + @property + def closed(self): + """True if this file is closed.""" + return self._mode == _MODE_CLOSED + + def fileno(self): + """Return the file descriptor for the underlying file.""" + self._check_not_closed() + return self._fp.fileno() + + def seekable(self): + """Return whether the file supports seeking.""" + return self.readable() and self._fp.seekable() + + def readable(self): + """Return whether the file was opened for reading.""" + self._check_not_closed() + return self._mode in (_MODE_READ, _MODE_READ_EOF) + + def writable(self): + """Return whether the file was opened for writing.""" + self._check_not_closed() + return self._mode == _MODE_WRITE + + # Mode-checking helper functions. + + def _check_not_closed(self): + if self.closed: + raise ValueError("I/O operation on closed file") + + def _check_can_read(self): + if self._mode not in (_MODE_READ, _MODE_READ_EOF): + self._check_not_closed() + raise io.UnsupportedOperation("File not open for reading") + + def _check_can_write(self): + if self._mode != _MODE_WRITE: + self._check_not_closed() + raise io.UnsupportedOperation("File not open for writing") + + def _check_can_seek(self): + if self._mode not in (_MODE_READ, _MODE_READ_EOF): + self._check_not_closed() + raise io.UnsupportedOperation("Seeking is only supported " + "on files open for reading") + if not self._fp.seekable(): + raise io.UnsupportedOperation("The underlying file object " + "does not support seeking") + + # Fill the readahead buffer if it is empty. Returns False on EOF. + def _fill_buffer(self): + if self._mode == _MODE_READ_EOF: + return False + # Depending on the input data, our call to the decompressor may not + # return any data. In this case, try again after reading another block. + while self._buffer_offset == len(self._buffer): + rawblock = (self._decompressor.unused_data or + self._fp.read(_BUFFER_SIZE)) + + if not rawblock: + if self._decompressor.eof: + self._mode = _MODE_READ_EOF + self._size = self._pos + return False + else: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + + # Continue to next stream. + if self._decompressor.eof: + self._decompressor = BZ2Decompressor() + + self._buffer = self._decompressor.decompress(rawblock) + self._buffer_offset = 0 + return True + + # Read data until EOF. + # If return_data is false, consume the data without returning it. + def _read_all(self, return_data=True): + # The loop assumes that _buffer_offset is 0. Ensure that this is true. + self._buffer = self._buffer[self._buffer_offset:] + self._buffer_offset = 0 + + blocks = [] + while self._fill_buffer(): + if return_data: + blocks.append(self._buffer) + self._pos += len(self._buffer) + self._buffer = b"" + if return_data: + return b"".join(blocks) + + # Read a block of up to n bytes. + # If return_data is false, consume the data without returning it. + def _read_block(self, n, return_data=True): + # If we have enough data buffered, return immediately. + end = self._buffer_offset + n + if end <= len(self._buffer): + data = self._buffer[self._buffer_offset : end] + self._buffer_offset = end + self._pos += len(data) + return data if return_data else None + + # The loop assumes that _buffer_offset is 0. Ensure that this is true. + self._buffer = self._buffer[self._buffer_offset:] + self._buffer_offset = 0 + + blocks = [] + while n > 0 and self._fill_buffer(): + if n < len(self._buffer): + data = self._buffer[:n] + self._buffer_offset = n + else: + data = self._buffer + self._buffer = b"" + if return_data: + blocks.append(data) + self._pos += len(data) + n -= len(data) + if return_data: + return b"".join(blocks) + + def peek(self, n=0): + """Return buffered data without advancing the file position. + + Always returns at least one byte of data, unless at EOF. + The exact number of bytes returned is unspecified. + """ + with self._lock: + self._check_can_read() + if not self._fill_buffer(): + return b"" + return self._buffer[self._buffer_offset:] + + def read(self, size=-1): + """Read up to size uncompressed bytes from the file. + + If size is negative or omitted, read until EOF is reached. + Returns b'' if the file is already at EOF. + """ + with self._lock: + self._check_can_read() + if size == 0: + return b"" + elif size < 0: + return self._read_all() + else: + return self._read_block(size) + + def read1(self, size=-1): + """Read up to size uncompressed bytes, while trying to avoid + making multiple reads from the underlying stream. + + Returns b'' if the file is at EOF. + """ + # Usually, read1() calls _fp.read() at most once. However, sometimes + # this does not give enough data for the decompressor to make progress. + # In this case we make multiple reads, to avoid returning b"". + with self._lock: + self._check_can_read() + if (size == 0 or + # Only call _fill_buffer() if the buffer is actually empty. + # This gives a significant speedup if *size* is small. + (self._buffer_offset == len(self._buffer) and self._fill_buffer())): + return b"" + if size > 0: + data = self._buffer[self._buffer_offset : + self._buffer_offset + size] + self._buffer_offset += len(data) + else: + data = self._buffer[self._buffer_offset:] + self._buffer = b"" + self._buffer_offset = 0 + self._pos += len(data) + return data + + def readinto(self, b): + """Read up to len(b) bytes into b. + + Returns the number of bytes read (0 for EOF). + """ + with self._lock: + return io.BufferedIOBase.readinto(self, b) + + def readline(self, size=-1): + """Read a line of uncompressed bytes from the file. + + The terminating newline (if present) is retained. If size is + non-negative, no more than size bytes will be read (in which + case the line may be incomplete). Returns b'' if already at EOF. + """ + if not isinstance(size, int): + if not hasattr(size, "__index__"): + raise TypeError("Integer argument expected") + size = size.__index__() + with self._lock: + self._check_can_read() + # Shortcut for the common case - the whole line is in the buffer. + if size < 0: + end = self._buffer.find(b"\n", self._buffer_offset) + 1 + if end > 0: + line = self._buffer[self._buffer_offset : end] + self._buffer_offset = end + self._pos += len(line) + return line + return io.BufferedIOBase.readline(self, size) + + def readlines(self, size=-1): + """Read a list of lines of uncompressed bytes from the file. + + size can be specified to control the number of lines read: no + further lines will be read once the total size of the lines read + so far equals or exceeds size. + """ + if not isinstance(size, int): + if not hasattr(size, "__index__"): + raise TypeError("Integer argument expected") + size = size.__index__() + with self._lock: + return io.BufferedIOBase.readlines(self, size) + + def write(self, data): + """Write a byte string to the file. + + Returns the number of uncompressed bytes written, which is + always len(data). Note that due to buffering, the file on disk + may not reflect the data written until close() is called. + """ + with self._lock: + self._check_can_write() + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += len(data) + return len(data) + + def writelines(self, seq): + """Write a sequence of byte strings to the file. + + Returns the number of uncompressed bytes written. + seq can be any iterable yielding byte strings. + + Line separators are not added between the written byte strings. + """ + with self._lock: + return io.BufferedIOBase.writelines(self, seq) + + # Rewind the file to the beginning of the data stream. + def _rewind(self): + self._fp.seek(0, 0) + self._mode = _MODE_READ + self._pos = 0 + self._decompressor = BZ2Decompressor() + self._buffer = b"" + self._buffer_offset = 0 + + def seek(self, offset, whence=0): + """Change the file position. + + The new position is specified by offset, relative to the + position indicated by whence. Values for whence are: + + 0: start of stream (default); offset must not be negative + 1: current stream position + 2: end of stream; offset must not be positive + + Returns the new file position. + + Note that seeking is emulated, so depending on the parameters, + this operation may be extremely slow. + """ + with self._lock: + self._check_can_seek() + + # Recalculate offset as an absolute file position. + if whence == 0: + pass + elif whence == 1: + offset = self._pos + offset + elif whence == 2: + # Seeking relative to EOF - we need to know the file's size. + if self._size < 0: + self._read_all(return_data=False) + offset = self._size + offset + else: + raise ValueError("Invalid value for whence: {}".format(whence)) + + # Make it so that offset is the number of bytes to skip forward. + if offset < self._pos: + self._rewind() + else: + offset -= self._pos + + # Read and discard data until we reach the desired position. + self._read_block(offset, return_data=False) + + return self._pos + + def tell(self): + """Return the current file position.""" + with self._lock: + self._check_not_closed() + return self._pos + + +def open(filename, mode="rb", compresslevel=9, + encoding=None, errors=None, newline=None): + """Open a bzip2-compressed file in binary or text mode. + + The filename argument can be an actual filename (a str or bytes object), or + an existing file object to read from or write to. + + The mode argument can be "r", "rb", "w", "wb", "a" or "ab" for binary mode, + or "rt", "wt" or "at" for text mode. The default mode is "rb", and the + default compresslevel is 9. + + For binary mode, this function is equivalent to the BZ2File constructor: + BZ2File(filename, mode, compresslevel). In this case, the encoding, errors + and newline arguments must not be provided. + + For text mode, a BZ2File object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error handling + behavior, and line ending(s). + + """ + if "t" in mode: + if "b" in mode: + raise ValueError("Invalid mode: %r" % (mode,)) + else: + if encoding is not None: + raise ValueError("Argument 'encoding' not supported in binary mode") + if errors is not None: + raise ValueError("Argument 'errors' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + + bz_mode = mode.replace("t", "") + binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel) + + if "t" in mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file + + +def compress(data, compresslevel=9): + """Compress a block of data. + + compresslevel, if given, must be a number between 1 and 9. + + For incremental compression, use a BZ2Compressor object instead. + """ + comp = BZ2Compressor(compresslevel) + return comp.compress(data) + comp.flush() + + +def decompress(data): + """Decompress a block of data. + + For incremental decompression, use a BZ2Decompressor object instead. + """ + if len(data) == 0: + return b"" + + results = [] + while True: + decomp = BZ2Decompressor() + results.append(decomp.decompress(data)) + if not decomp.eof: + raise ValueError("Compressed data ended before the " + "end-of-stream marker was reached") + if not decomp.unused_data: + return b"".join(results) + # There is unused data left over. Proceed to next stream. + data = decomp.unused_data diff --git a/tests/bytecode/pylib-tests/cProfile.py b/tests/bytecode/pylib-tests/cProfile.py new file mode 100644 index 0000000000..c24d45bab4 --- /dev/null +++ b/tests/bytecode/pylib-tests/cProfile.py @@ -0,0 +1,195 @@ +#! /usr/bin/env python3 + +"""Python interface for the 'lsprof' profiler. + Compatible with the 'profile' module. +""" + +__all__ = ["run", "runctx", "Profile"] + +import _lsprof + +# ____________________________________________________________ +# Simple interface + +def run(statement, filename=None, sort=-1): + """Run statement under profiler optionally saving results in filename + + This function takes a single argument that can be passed to the + "exec" statement, and an optional file name. In all cases this + routine attempts to "exec" its first argument and gather profiling + statistics from the execution. If no file name is present, then this + function automatically prints a simple profiling report, sorted by the + standard name string (file/line/function-name) that is presented in + each line. + """ + prof = Profile() + result = None + try: + try: + prof = prof.run(statement) + except SystemExit: + pass + finally: + if filename is not None: + prof.dump_stats(filename) + else: + result = prof.print_stats(sort) + return result + +def runctx(statement, globals, locals, filename=None, sort=-1): + """Run statement under profiler, supplying your own globals and locals, + optionally saving results in filename. + + statement and filename have the same semantics as profile.run + """ + prof = Profile() + result = None + try: + try: + prof = prof.runctx(statement, globals, locals) + except SystemExit: + pass + finally: + if filename is not None: + prof.dump_stats(filename) + else: + result = prof.print_stats(sort) + return result + +# ____________________________________________________________ + +class Profile(_lsprof.Profiler): + """Profile(custom_timer=None, time_unit=None, subcalls=True, builtins=True) + + Builds a profiler object using the specified timer function. + The default timer is a fast built-in one based on real time. + For custom timer functions returning integers, time_unit can + be a float specifying a scale (i.e. how long each integer unit + is, in seconds). + """ + + # Most of the functionality is in the base class. + # This subclass only adds convenient and backward-compatible methods. + + def print_stats(self, sort=-1): + import pstats + pstats.Stats(self).strip_dirs().sort_stats(sort).print_stats() + + def dump_stats(self, file): + import marshal + f = open(file, 'wb') + self.create_stats() + marshal.dump(self.stats, f) + f.close() + + def create_stats(self): + self.disable() + self.snapshot_stats() + + def snapshot_stats(self): + entries = self.getstats() + self.stats = {} + callersdicts = {} + # call information + for entry in entries: + func = label(entry.code) + nc = entry.callcount # ncalls column of pstats (before '/') + cc = nc - entry.reccallcount # ncalls column of pstats (after '/') + tt = entry.inlinetime # tottime column of pstats + ct = entry.totaltime # cumtime column of pstats + callers = {} + callersdicts[id(entry.code)] = callers + self.stats[func] = cc, nc, tt, ct, callers + # subcall information + for entry in entries: + if entry.calls: + func = label(entry.code) + for subentry in entry.calls: + try: + callers = callersdicts[id(subentry.code)] + except KeyError: + continue + nc = subentry.callcount + cc = nc - subentry.reccallcount + tt = subentry.inlinetime + ct = subentry.totaltime + if func in callers: + prev = callers[func] + nc += prev[0] + cc += prev[1] + tt += prev[2] + ct += prev[3] + callers[func] = nc, cc, tt, ct + + # The following two methods can be called by clients to use + # a profiler to profile a statement, given as a string. + + def run(self, cmd): + import __main__ + dict = __main__.__dict__ + return self.runctx(cmd, dict, dict) + + def runctx(self, cmd, globals, locals): + self.enable() + try: + exec(cmd, globals, locals) + finally: + self.disable() + return self + + # This method is more useful to profile a single function call. + def runcall(self, func, *args, **kw): + self.enable() + try: + return func(*args, **kw) + finally: + self.disable() + +# ____________________________________________________________ + +def label(code): + if isinstance(code, str): + return ('~', 0, code) # built-in functions ('~' sorts at the end) + else: + return (code.co_filename, code.co_firstlineno, code.co_name) + +# ____________________________________________________________ + +def main(): + import os, sys + from optparse import OptionParser + usage = "cProfile.py [-o output_file_path] [-s sort] scriptfile [arg] ..." + parser = OptionParser(usage=usage) + parser.allow_interspersed_args = False + parser.add_option('-o', '--outfile', dest="outfile", + help="Save stats to <outfile>", default=None) + parser.add_option('-s', '--sort', dest="sort", + help="Sort order when printing to stdout, based on pstats.Stats class", + default=-1) + + if not sys.argv[1:]: + parser.print_usage() + sys.exit(2) + + (options, args) = parser.parse_args() + sys.argv[:] = args + + if len(args) > 0: + progname = args[0] + sys.path.insert(0, os.path.dirname(progname)) + with open(progname, 'rb') as fp: + code = compile(fp.read(), progname, 'exec') + globs = { + '__file__': progname, + '__name__': '__main__', + '__package__': None, + '__cached__': None, + } + runctx(code, globs, None, options.outfile, options.sort) + else: + parser.print_usage() + return parser + +# When invoked as main program, invoke the profiler on a script +if __name__ == '__main__': + main() diff --git a/tests/bytecode/pylib-tests/chunk.py b/tests/bytecode/pylib-tests/chunk.py new file mode 100644 index 0000000000..5863ed0846 --- /dev/null +++ b/tests/bytecode/pylib-tests/chunk.py @@ -0,0 +1,167 @@ +"""Simple class to read IFF chunks. + +An IFF chunk (used in formats such as AIFF, TIFF, RMFF (RealMedia File +Format)) has the following structure: + ++----------------+ +| ID (4 bytes) | ++----------------+ +| size (4 bytes) | ++----------------+ +| data | +| ... | ++----------------+ + +The ID is a 4-byte string which identifies the type of chunk. + +The size field (a 32-bit value, encoded using big-endian byte order) +gives the size of the whole chunk, including the 8-byte header. + +Usually an IFF-type file consists of one or more chunks. The proposed +usage of the Chunk class defined here is to instantiate an instance at +the start of each chunk and read from the instance until it reaches +the end, after which a new instance can be instantiated. At the end +of the file, creating a new instance will fail with a EOFError +exception. + +Usage: +while True: + try: + chunk = Chunk(file) + except EOFError: + break + chunktype = chunk.getname() + while True: + data = chunk.read(nbytes) + if not data: + pass + # do something with data + +The interface is file-like. The implemented methods are: +read, close, seek, tell, isatty. +Extra methods are: skip() (called by close, skips to the end of the chunk), +getname() (returns the name (ID) of the chunk) + +The __init__ method has one required argument, a file-like object +(including a chunk instance), and one optional argument, a flag which +specifies whether or not chunks are aligned on 2-byte boundaries. The +default is 1, i.e. aligned. +""" + +class Chunk: + def __init__(self, file, align=True, bigendian=True, inclheader=False): + import struct + self.closed = False + self.align = align # whether to align to word (2-byte) boundaries + if bigendian: + strflag = '>' + else: + strflag = '<' + self.file = file + self.chunkname = file.read(4) + if len(self.chunkname) < 4: + raise EOFError + try: + self.chunksize = struct.unpack_from(strflag+'L', file.read(4))[0] + except struct.error: + raise EOFError + if inclheader: + self.chunksize = self.chunksize - 8 # subtract header + self.size_read = 0 + try: + self.offset = self.file.tell() + except (AttributeError, IOError): + self.seekable = False + else: + self.seekable = True + + def getname(self): + """Return the name (ID) of the current chunk.""" + return self.chunkname + + def getsize(self): + """Return the size of the current chunk.""" + return self.chunksize + + def close(self): + if not self.closed: + self.skip() + self.closed = True + + def isatty(self): + if self.closed: + raise ValueError("I/O operation on closed file") + return False + + def seek(self, pos, whence=0): + """Seek to specified position into the chunk. + Default position is 0 (start of chunk). + If the file is not seekable, this will result in an error. + """ + + if self.closed: + raise ValueError("I/O operation on closed file") + if not self.seekable: + raise IOError("cannot seek") + if whence == 1: + pos = pos + self.size_read + elif whence == 2: + pos = pos + self.chunksize + if pos < 0 or pos > self.chunksize: + raise RuntimeError + self.file.seek(self.offset + pos, 0) + self.size_read = pos + + def tell(self): + if self.closed: + raise ValueError("I/O operation on closed file") + return self.size_read + + def read(self, size=-1): + """Read at most size bytes from the chunk. + If size is omitted or negative, read until the end + of the chunk. + """ + + if self.closed: + raise ValueError("I/O operation on closed file") + if self.size_read >= self.chunksize: + return '' + if size < 0: + size = self.chunksize - self.size_read + if size > self.chunksize - self.size_read: + size = self.chunksize - self.size_read + data = self.file.read(size) + self.size_read = self.size_read + len(data) + if self.size_read == self.chunksize and \ + self.align and \ + (self.chunksize & 1): + dummy = self.file.read(1) + self.size_read = self.size_read + len(dummy) + return data + + def skip(self): + """Skip the rest of the chunk. + If you are not interested in the contents of the chunk, + this method should be called so that the file points to + the start of the next chunk. + """ + + if self.closed: + raise ValueError("I/O operation on closed file") + if self.seekable: + try: + n = self.chunksize - self.size_read + # maybe fix alignment + if self.align and (self.chunksize & 1): + n = n + 1 + self.file.seek(n, 1) + self.size_read = self.size_read + n + return + except IOError: + pass + while self.size_read < self.chunksize: + n = min(8192, self.chunksize - self.size_read) + dummy = self.read(n) + if not dummy: + raise EOFError diff --git a/tests/bytecode/pylib-tests/code.py b/tests/bytecode/pylib-tests/code.py new file mode 100644 index 0000000000..9020aab701 --- /dev/null +++ b/tests/bytecode/pylib-tests/code.py @@ -0,0 +1,302 @@ +"""Utilities needed to emulate Python's interactive interpreter. + +""" + +# Inspired by similar code by Jeff Epler and Fredrik Lundh. + + +import sys +import traceback +from codeop import CommandCompiler, compile_command + +__all__ = ["InteractiveInterpreter", "InteractiveConsole", "interact", + "compile_command"] + +class InteractiveInterpreter: + """Base class for InteractiveConsole. + + This class deals with parsing and interpreter state (the user's + namespace); it doesn't deal with input buffering or prompting or + input file naming (the filename is always passed in explicitly). + + """ + + def __init__(self, locals=None): + """Constructor. + + The optional 'locals' argument specifies the dictionary in + which code will be executed; it defaults to a newly created + dictionary with key "__name__" set to "__console__" and key + "__doc__" set to None. + + """ + if locals is None: + locals = {"__name__": "__console__", "__doc__": None} + self.locals = locals + self.compile = CommandCompiler() + + def runsource(self, source, filename="<input>", symbol="single"): + """Compile and run some source in the interpreter. + + Arguments are as for compile_command(). + + One several things can happen: + + 1) The input is incorrect; compile_command() raised an + exception (SyntaxError or OverflowError). A syntax traceback + will be printed by calling the showsyntaxerror() method. + + 2) The input is incomplete, and more input is required; + compile_command() returned None. Nothing happens. + + 3) The input is complete; compile_command() returned a code + object. The code is executed by calling self.runcode() (which + also handles run-time exceptions, except for SystemExit). + + The return value is True in case 2, False in the other cases (unless + an exception is raised). The return value can be used to + decide whether to use sys.ps1 or sys.ps2 to prompt the next + line. + + """ + try: + code = self.compile(source, filename, symbol) + except (OverflowError, SyntaxError, ValueError): + # Case 1 + self.showsyntaxerror(filename) + return False + + if code is None: + # Case 2 + return True + + # Case 3 + self.runcode(code) + return False + + def runcode(self, code): + """Execute a code object. + + When an exception occurs, self.showtraceback() is called to + display a traceback. All exceptions are caught except + SystemExit, which is reraised. + + A note about KeyboardInterrupt: this exception may occur + elsewhere in this code, and may not always be caught. The + caller should be prepared to deal with it. + + """ + try: + exec(code, self.locals) + except SystemExit: + raise + except: + self.showtraceback() + + def showsyntaxerror(self, filename=None): + """Display the syntax error that just occurred. + + This doesn't display a stack trace because there isn't one. + + If a filename is given, it is stuffed in the exception instead + of what was there before (because Python's parser always uses + "<string>" when reading from a string). + + The output is written by self.write(), below. + + """ + type, value, tb = sys.exc_info() + sys.last_type = type + sys.last_value = value + sys.last_traceback = tb + if filename and type is SyntaxError: + # Work hard to stuff the correct filename in the exception + try: + msg, (dummy_filename, lineno, offset, line) = value.args + except ValueError: + # Not the format we expect; leave it alone + pass + else: + # Stuff in the right filename + value = SyntaxError(msg, (filename, lineno, offset, line)) + sys.last_value = value + if sys.excepthook is sys.__excepthook__: + lines = traceback.format_exception_only(type, value) + self.write(''.join(lines)) + else: + # If someone has set sys.excepthook, we let that take precedence + # over self.write + sys.excepthook(type, value, tb) + + def showtraceback(self): + """Display the exception that just occurred. + + We remove the first stack item because it is our own code. + + The output is written by self.write(), below. + + """ + try: + type, value, tb = sys.exc_info() + sys.last_type = type + sys.last_value = value + sys.last_traceback = tb + tblist = traceback.extract_tb(tb) + del tblist[:1] + lines = traceback.format_list(tblist) + if lines: + lines.insert(0, "Traceback (most recent call last):\n") + lines.extend(traceback.format_exception_only(type, value)) + finally: + tblist = tb = None + if sys.excepthook is sys.__excepthook__: + self.write(''.join(lines)) + else: + # If someone has set sys.excepthook, we let that take precedence + # over self.write + sys.excepthook(type, value, tb) + + def write(self, data): + """Write a string. + + The base implementation writes to sys.stderr; a subclass may + replace this with a different implementation. + + """ + sys.stderr.write(data) + + +class InteractiveConsole(InteractiveInterpreter): + """Closely emulate the behavior of the interactive Python interpreter. + + This class builds on InteractiveInterpreter and adds prompting + using the familiar sys.ps1 and sys.ps2, and input buffering. + + """ + + def __init__(self, locals=None, filename="<console>"): + """Constructor. + + The optional locals argument will be passed to the + InteractiveInterpreter base class. + + The optional filename argument should specify the (file)name + of the input stream; it will show up in tracebacks. + + """ + InteractiveInterpreter.__init__(self, locals) + self.filename = filename + self.resetbuffer() + + def resetbuffer(self): + """Reset the input buffer.""" + self.buffer = [] + + def interact(self, banner=None): + """Closely emulate the interactive Python console. + + The optional banner argument specifies the banner to print + before the first interaction; by default it prints a banner + similar to the one printed by the real Python interpreter, + followed by the current class name in parentheses (so as not + to confuse this with the real interpreter -- since it's so + close!). + + """ + try: + sys.ps1 + except AttributeError: + sys.ps1 = ">>> " + try: + sys.ps2 + except AttributeError: + sys.ps2 = "... " + cprt = 'Type "help", "copyright", "credits" or "license" for more information.' + if banner is None: + self.write("Python %s on %s\n%s\n(%s)\n" % + (sys.version, sys.platform, cprt, + self.__class__.__name__)) + else: + self.write("%s\n" % str(banner)) + more = 0 + while 1: + try: + if more: + prompt = sys.ps2 + else: + prompt = sys.ps1 + try: + line = self.raw_input(prompt) + except EOFError: + self.write("\n") + break + else: + more = self.push(line) + except KeyboardInterrupt: + self.write("\nKeyboardInterrupt\n") + self.resetbuffer() + more = 0 + + def push(self, line): + """Push a line to the interpreter. + + The line should not have a trailing newline; it may have + internal newlines. The line is appended to a buffer and the + interpreter's runsource() method is called with the + concatenated contents of the buffer as source. If this + indicates that the command was executed or invalid, the buffer + is reset; otherwise, the command is incomplete, and the buffer + is left as it was after the line was appended. The return + value is 1 if more input is required, 0 if the line was dealt + with in some way (this is the same as runsource()). + + """ + self.buffer.append(line) + source = "\n".join(self.buffer) + more = self.runsource(source, self.filename) + if not more: + self.resetbuffer() + return more + + def raw_input(self, prompt=""): + """Write a prompt and read a line. + + The returned line does not include the trailing newline. + When the user enters the EOF key sequence, EOFError is raised. + + The base implementation uses the built-in function + input(); a subclass may replace this with a different + implementation. + + """ + return input(prompt) + + + +def interact(banner=None, readfunc=None, local=None): + """Closely emulate the interactive Python interpreter. + + This is a backwards compatible interface to the InteractiveConsole + class. When readfunc is not specified, it attempts to import the + readline module to enable GNU readline if it is available. + + Arguments (all optional, all default to None): + + banner -- passed to InteractiveConsole.interact() + readfunc -- if not None, replaces InteractiveConsole.raw_input() + local -- passed to InteractiveInterpreter.__init__() + + """ + console = InteractiveConsole(local) + if readfunc is not None: + console.raw_input = readfunc + else: + try: + import readline + except ImportError: + pass + console.interact(banner) + + +if __name__ == "__main__": + interact() diff --git a/tests/bytecode/pylib-tests/compileall.py b/tests/bytecode/pylib-tests/compileall.py new file mode 100644 index 0000000000..d3cff6a98a --- /dev/null +++ b/tests/bytecode/pylib-tests/compileall.py @@ -0,0 +1,240 @@ +"""Module/script to byte-compile all .py files to .pyc (or .pyo) files. + +When called as a script with arguments, this compiles the directories +given as arguments recursively; the -l option prevents it from +recursing into directories. + +Without arguments, if compiles all modules on sys.path, without +recursing into subdirectories. (Even though it should do so for +packages -- for now, you'll have to deal with packages separately.) + +See module py_compile for details of the actual byte-compilation. +""" +import os +import sys +import errno +import imp +import py_compile +import struct + +__all__ = ["compile_dir","compile_file","compile_path"] + +def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, + quiet=False, legacy=False, optimize=-1): + """Byte-compile all modules in the given directory tree. + + Arguments (only dir is required): + + dir: the directory to byte-compile + maxlevels: maximum recursion level (default 10) + ddir: the directory that will be prepended to the path to the + file as it is compiled into each byte-code file. + force: if True, force compilation, even if timestamps are up-to-date + quiet: if True, be quiet during compilation + legacy: if True, produce legacy pyc paths instead of PEP 3147 paths + optimize: optimization level or -1 for level of the interpreter + """ + if not quiet: + print('Listing {!r}...'.format(dir)) + try: + names = os.listdir(dir) + except os.error: + print("Can't list {!r}".format(dir)) + names = [] + names.sort() + success = 1 + for name in names: + if name == '__pycache__': + continue + fullname = os.path.join(dir, name) + if ddir is not None: + dfile = os.path.join(ddir, name) + else: + dfile = None + if not os.path.isdir(fullname): + if not compile_file(fullname, ddir, force, rx, quiet, + legacy, optimize): + success = 0 + elif (maxlevels > 0 and name != os.curdir and name != os.pardir and + os.path.isdir(fullname) and not os.path.islink(fullname)): + if not compile_dir(fullname, maxlevels - 1, dfile, force, rx, + quiet, legacy, optimize): + success = 0 + return success + +def compile_file(fullname, ddir=None, force=False, rx=None, quiet=False, + legacy=False, optimize=-1): + """Byte-compile one file. + + Arguments (only fullname is required): + + fullname: the file to byte-compile + ddir: if given, the directory name compiled in to the + byte-code file. + force: if True, force compilation, even if timestamps are up-to-date + quiet: if True, be quiet during compilation + legacy: if True, produce legacy pyc paths instead of PEP 3147 paths + optimize: optimization level or -1 for level of the interpreter + """ + success = 1 + name = os.path.basename(fullname) + if ddir is not None: + dfile = os.path.join(ddir, name) + else: + dfile = None + if rx is not None: + mo = rx.search(fullname) + if mo: + return success + if os.path.isfile(fullname): + if legacy: + cfile = fullname + ('c' if __debug__ else 'o') + else: + if optimize >= 0: + cfile = imp.cache_from_source(fullname, + debug_override=not optimize) + else: + cfile = imp.cache_from_source(fullname) + cache_dir = os.path.dirname(cfile) + head, tail = name[:-3], name[-3:] + if tail == '.py': + if not force: + try: + mtime = int(os.stat(fullname).st_mtime) + expect = struct.pack('<4sl', imp.get_magic(), mtime) + with open(cfile, 'rb') as chandle: + actual = chandle.read(8) + if expect == actual: + return success + except IOError: + pass + if not quiet: + print('Compiling {!r}...'.format(fullname)) + try: + ok = py_compile.compile(fullname, cfile, dfile, True, + optimize=optimize) + except py_compile.PyCompileError as err: + if quiet: + print('*** Error compiling {!r}...'.format(fullname)) + else: + print('*** ', end='') + # escape non-printable characters in msg + msg = err.msg.encode(sys.stdout.encoding, + errors='backslashreplace') + msg = msg.decode(sys.stdout.encoding) + print(msg) + success = 0 + except (SyntaxError, UnicodeError, IOError) as e: + if quiet: + print('*** Error compiling {!r}...'.format(fullname)) + else: + print('*** ', end='') + print(e.__class__.__name__ + ':', e) + success = 0 + else: + if ok == 0: + success = 0 + return success + +def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=False, + legacy=False, optimize=-1): + """Byte-compile all module on sys.path. + + Arguments (all optional): + + skip_curdir: if true, skip current directory (default True) + maxlevels: max recursion level (default 0) + force: as for compile_dir() (default False) + quiet: as for compile_dir() (default False) + legacy: as for compile_dir() (default False) + optimize: as for compile_dir() (default -1) + """ + success = 1 + for dir in sys.path: + if (not dir or dir == os.curdir) and skip_curdir: + print('Skipping current directory') + else: + success = success and compile_dir(dir, maxlevels, None, + force, quiet=quiet, + legacy=legacy, optimize=optimize) + return success + + +def main(): + """Script main program.""" + import argparse + + parser = argparse.ArgumentParser( + description='Utilities to support installing Python libraries.') + parser.add_argument('-l', action='store_const', const=0, + default=10, dest='maxlevels', + help="don't recurse into subdirectories") + parser.add_argument('-f', action='store_true', dest='force', + help='force rebuild even if timestamps are up to date') + parser.add_argument('-q', action='store_true', dest='quiet', + help='output only error messages') + parser.add_argument('-b', action='store_true', dest='legacy', + help='use legacy (pre-PEP3147) compiled file locations') + parser.add_argument('-d', metavar='DESTDIR', dest='ddir', default=None, + help=('directory to prepend to file paths for use in ' + 'compile-time tracebacks and in runtime ' + 'tracebacks in cases where the source file is ' + 'unavailable')) + parser.add_argument('-x', metavar='REGEXP', dest='rx', default=None, + help=('skip files matching the regular expression; ' + 'the regexp is searched for in the full path ' + 'of each file considered for compilation')) + parser.add_argument('-i', metavar='FILE', dest='flist', + help=('add all the files and directories listed in ' + 'FILE to the list considered for compilation; ' + 'if "-", names are read from stdin')) + parser.add_argument('compile_dest', metavar='FILE|DIR', nargs='*', + help=('zero or more file and directory names ' + 'to compile; if no arguments given, defaults ' + 'to the equivalent of -l sys.path')) + args = parser.parse_args() + + compile_dests = args.compile_dest + + if (args.ddir and (len(compile_dests) != 1 + or not os.path.isdir(compile_dests[0]))): + parser.exit('-d destdir requires exactly one directory argument') + if args.rx: + import re + args.rx = re.compile(args.rx) + + # if flist is provided then load it + if args.flist: + try: + with (sys.stdin if args.flist=='-' else open(args.flist)) as f: + for line in f: + compile_dests.append(line.strip()) + except EnvironmentError: + print("Error reading file list {}".format(args.flist)) + return False + + success = True + try: + if compile_dests: + for dest in compile_dests: + if os.path.isfile(dest): + if not compile_file(dest, args.ddir, args.force, args.rx, + args.quiet, args.legacy): + success = False + else: + if not compile_dir(dest, args.maxlevels, args.ddir, + args.force, args.rx, args.quiet, + args.legacy): + success = False + return success + else: + return compile_path(legacy=args.legacy) + except KeyboardInterrupt: + print("\n[interrupted]") + return False + return True + + +if __name__ == '__main__': + exit_status = int(not main()) + sys.exit(exit_status) diff --git a/tests/bytecode/pylib-tests/contextlib.py b/tests/bytecode/pylib-tests/contextlib.py new file mode 100644 index 0000000000..0b6bf71b08 --- /dev/null +++ b/tests/bytecode/pylib-tests/contextlib.py @@ -0,0 +1,255 @@ +"""Utilities for with-statement contexts. See PEP 343.""" + +import sys +from collections import deque +from functools import wraps + +__all__ = ["contextmanager", "closing", "ContextDecorator", "ExitStack"] + + +class ContextDecorator(object): + "A base class or mixin that enables context managers to work as decorators." + + def _recreate_cm(self): + """Return a recreated instance of self. + + Allows an otherwise one-shot context manager like + _GeneratorContextManager to support use as + a decorator via implicit recreation. + + This is a private interface just for _GeneratorContextManager. + See issue #11647 for details. + """ + return self + + def __call__(self, func): + @wraps(func) + def inner(*args, **kwds): + with self._recreate_cm(): + return func(*args, **kwds) + return inner + + +class _GeneratorContextManager(ContextDecorator): + """Helper for @contextmanager decorator.""" + + def __init__(self, func, *args, **kwds): + self.gen = func(*args, **kwds) + self.func, self.args, self.kwds = func, args, kwds + + def _recreate_cm(self): + # _GCM instances are one-shot context managers, so the + # CM must be recreated each time a decorated function is + # called + return self.__class__(self.func, *self.args, **self.kwds) + + def __enter__(self): + try: + return next(self.gen) + except StopIteration: + raise RuntimeError("generator didn't yield") + + def __exit__(self, type, value, traceback): + if type is None: + try: + next(self.gen) + except StopIteration: + return + else: + raise RuntimeError("generator didn't stop") + else: + if value is None: + # Need to force instantiation so we can reliably + # tell if we get the same exception back + value = type() + try: + self.gen.throw(type, value, traceback) + raise RuntimeError("generator didn't stop after throw()") + except StopIteration as exc: + # Suppress the exception *unless* it's the same exception that + # was passed to throw(). This prevents a StopIteration + # raised inside the "with" statement from being suppressed + return exc is not value + except: + # only re-raise if it's *not* the exception that was + # passed to throw(), because __exit__() must not raise + # an exception unless __exit__() itself failed. But throw() + # has to raise the exception to signal propagation, so this + # fixes the impedance mismatch between the throw() protocol + # and the __exit__() protocol. + # + if sys.exc_info()[1] is not value: + raise + + +def contextmanager(func): + """@contextmanager decorator. + + Typical usage: + + @contextmanager + def some_generator(<arguments>): + <setup> + try: + yield <value> + finally: + <cleanup> + + This makes this: + + with some_generator(<arguments>) as <variable>: + <body> + + equivalent to this: + + <setup> + try: + <variable> = <value> + <body> + finally: + <cleanup> + + """ + @wraps(func) + def helper(*args, **kwds): + return _GeneratorContextManager(func, *args, **kwds) + return helper + + +class closing(object): + """Context to automatically close something at the end of a block. + + Code like this: + + with closing(<module>.open(<arguments>)) as f: + <block> + + is equivalent to this: + + f = <module>.open(<arguments>) + try: + <block> + finally: + f.close() + + """ + def __init__(self, thing): + self.thing = thing + def __enter__(self): + return self.thing + def __exit__(self, *exc_info): + self.thing.close() + + +# Inspired by discussions on http://bugs.python.org/issue13585 +class ExitStack(object): + """Context manager for dynamic management of a stack of exit callbacks + + For example: + + with ExitStack() as stack: + files = [stack.enter_context(open(fname)) for fname in filenames] + # All opened files will automatically be closed at the end of + # the with statement, even if attempts to open files later + # in the list raise an exception + + """ + def __init__(self): + self._exit_callbacks = deque() + + def pop_all(self): + """Preserve the context stack by transferring it to a new instance""" + new_stack = type(self)() + new_stack._exit_callbacks = self._exit_callbacks + self._exit_callbacks = deque() + return new_stack + + def _push_cm_exit(self, cm, cm_exit): + """Helper to correctly register callbacks to __exit__ methods""" + def _exit_wrapper(*exc_details): + return cm_exit(cm, *exc_details) + _exit_wrapper.__self__ = cm + self.push(_exit_wrapper) + + def push(self, exit): + """Registers a callback with the standard __exit__ method signature + + Can suppress exceptions the same way __exit__ methods can. + + Also accepts any object with an __exit__ method (registering a call + to the method instead of the object itself) + """ + # We use an unbound method rather than a bound method to follow + # the standard lookup behaviour for special methods + _cb_type = type(exit) + try: + exit_method = _cb_type.__exit__ + except AttributeError: + # Not a context manager, so assume its a callable + self._exit_callbacks.append(exit) + else: + self._push_cm_exit(exit, exit_method) + return exit # Allow use as a decorator + + def callback(self, callback, *args, **kwds): + """Registers an arbitrary callback and arguments. + + Cannot suppress exceptions. + """ + def _exit_wrapper(exc_type, exc, tb): + callback(*args, **kwds) + # We changed the signature, so using @wraps is not appropriate, but + # setting __wrapped__ may still help with introspection + _exit_wrapper.__wrapped__ = callback + self.push(_exit_wrapper) + return callback # Allow use as a decorator + + def enter_context(self, cm): + """Enters the supplied context manager + + If successful, also pushes its __exit__ method as a callback and + returns the result of the __enter__ method. + """ + # We look up the special methods on the type to match the with statement + _cm_type = type(cm) + _exit = _cm_type.__exit__ + result = _cm_type.__enter__(cm) + self._push_cm_exit(cm, _exit) + return result + + def close(self): + """Immediately unwind the context stack""" + self.__exit__(None, None, None) + + def __enter__(self): + return self + + def __exit__(self, *exc_details): + # We manipulate the exception state so it behaves as though + # we were actually nesting multiple with statements + frame_exc = sys.exc_info()[1] + def _fix_exception_context(new_exc, old_exc): + while 1: + exc_context = new_exc.__context__ + if exc_context in (None, frame_exc): + break + new_exc = exc_context + new_exc.__context__ = old_exc + + # Callbacks are invoked in LIFO order to match the behaviour of + # nested context managers + suppressed_exc = False + while self._exit_callbacks: + cb = self._exit_callbacks.pop() + try: + if cb(*exc_details): + suppressed_exc = True + exc_details = (None, None, None) + except: + new_exc_details = sys.exc_info() + # simulate the stack of exceptions by setting the context + _fix_exception_context(new_exc_details[1], exc_details[1]) + if not self._exit_callbacks: + raise + exc_details = new_exc_details + return suppressed_exc diff --git a/tests/bytecode/pylib-tests/crypt.py b/tests/bytecode/pylib-tests/crypt.py new file mode 100644 index 0000000000..b90c81cc40 --- /dev/null +++ b/tests/bytecode/pylib-tests/crypt.py @@ -0,0 +1,62 @@ +"""Wrapper to the POSIX crypt library call and associated functionality.""" + +import _crypt +import string as _string +from random import SystemRandom as _SystemRandom +from collections import namedtuple as _namedtuple + + +_saltchars = _string.ascii_letters + _string.digits + './' +_sr = _SystemRandom() + + +class _Method(_namedtuple('_Method', 'name ident salt_chars total_size')): + + """Class representing a salt method per the Modular Crypt Format or the + legacy 2-character crypt method.""" + + def __repr__(self): + return '<crypt.METHOD_{}>'.format(self.name) + + +def mksalt(method=None): + """Generate a salt for the specified method. + + If not specified, the strongest available method will be used. + + """ + if method is None: + method = methods[0] + s = '${}$'.format(method.ident) if method.ident else '' + s += ''.join(_sr.sample(_saltchars, method.salt_chars)) + return s + + +def crypt(word, salt=None): + """Return a string representing the one-way hash of a password, with a salt + prepended. + + If ``salt`` is not specified or is ``None``, the strongest + available method will be selected and a salt generated. Otherwise, + ``salt`` may be one of the ``crypt.METHOD_*`` values, or a string as + returned by ``crypt.mksalt()``. + + """ + if salt is None or isinstance(salt, _Method): + salt = mksalt(salt) + return _crypt.crypt(word, salt) + + +# available salting/crypto methods +METHOD_CRYPT = _Method('CRYPT', None, 2, 13) +METHOD_MD5 = _Method('MD5', '1', 8, 34) +METHOD_SHA256 = _Method('SHA256', '5', 16, 63) +METHOD_SHA512 = _Method('SHA512', '6', 16, 106) + +methods = [] +for _method in (METHOD_SHA512, METHOD_SHA256, METHOD_MD5): + _result = crypt('', _method) + if _result and len(_result) == _method.total_size: + methods.append(_method) +methods.append(METHOD_CRYPT) +del _result, _method diff --git a/tests/bytecode/pylib-tests/dummy_threading.py b/tests/bytecode/pylib-tests/dummy_threading.py new file mode 100644 index 0000000000..1bb7eee338 --- /dev/null +++ b/tests/bytecode/pylib-tests/dummy_threading.py @@ -0,0 +1,78 @@ +"""Faux ``threading`` version using ``dummy_thread`` instead of ``thread``. + +The module ``_dummy_threading`` is added to ``sys.modules`` in order +to not have ``threading`` considered imported. Had ``threading`` been +directly imported it would have made all subsequent imports succeed +regardless of whether ``_thread`` was available which is not desired. + +""" +from sys import modules as sys_modules + +import _dummy_thread + +# Declaring now so as to not have to nest ``try``s to get proper clean-up. +holding_thread = False +holding_threading = False +holding__threading_local = False + +try: + # Could have checked if ``_thread`` was not in sys.modules and gone + # a different route, but decided to mirror technique used with + # ``threading`` below. + if '_thread' in sys_modules: + held_thread = sys_modules['_thread'] + holding_thread = True + # Must have some module named ``_thread`` that implements its API + # in order to initially import ``threading``. + sys_modules['_thread'] = sys_modules['_dummy_thread'] + + if 'threading' in sys_modules: + # If ``threading`` is already imported, might as well prevent + # trying to import it more than needed by saving it if it is + # already imported before deleting it. + held_threading = sys_modules['threading'] + holding_threading = True + del sys_modules['threading'] + + if '_threading_local' in sys_modules: + # If ``_threading_local`` is already imported, might as well prevent + # trying to import it more than needed by saving it if it is + # already imported before deleting it. + held__threading_local = sys_modules['_threading_local'] + holding__threading_local = True + del sys_modules['_threading_local'] + + import threading + # Need a copy of the code kept somewhere... + sys_modules['_dummy_threading'] = sys_modules['threading'] + del sys_modules['threading'] + sys_modules['_dummy__threading_local'] = sys_modules['_threading_local'] + del sys_modules['_threading_local'] + from _dummy_threading import * + from _dummy_threading import __all__ + +finally: + # Put back ``threading`` if we overwrote earlier + + if holding_threading: + sys_modules['threading'] = held_threading + del held_threading + del holding_threading + + # Put back ``_threading_local`` if we overwrote earlier + + if holding__threading_local: + sys_modules['_threading_local'] = held__threading_local + del held__threading_local + del holding__threading_local + + # Put back ``thread`` if we overwrote, else del the entry we made + if holding_thread: + sys_modules['_thread'] = held_thread + del held_thread + else: + del sys_modules['_thread'] + del holding_thread + + del _dummy_thread + del sys_modules diff --git a/tests/bytecode/pylib-tests/fnmatch.py b/tests/bytecode/pylib-tests/fnmatch.py new file mode 100644 index 0000000000..6330b0cfda --- /dev/null +++ b/tests/bytecode/pylib-tests/fnmatch.py @@ -0,0 +1,109 @@ +"""Filename matching with shell patterns. + +fnmatch(FILENAME, PATTERN) matches according to the local convention. +fnmatchcase(FILENAME, PATTERN) always takes case in account. + +The functions operate by translating the pattern into a regular +expression. They cache the compiled regular expressions for speed. + +The function translate(PATTERN) returns a regular expression +corresponding to PATTERN. (It does not compile it.) +""" +import os +import posixpath +import re +import functools + +__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] + +def fnmatch(name, pat): + """Test whether FILENAME matches PATTERN. + + Patterns are Unix shell style: + + * matches everything + ? matches any single character + [seq] matches any character in seq + [!seq] matches any char not in seq + + An initial period in FILENAME is not special. + Both FILENAME and PATTERN are first case-normalized + if the operating system requires it. + If you don't want this, use fnmatchcase(FILENAME, PATTERN). + """ + name = os.path.normcase(name) + pat = os.path.normcase(pat) + return fnmatchcase(name, pat) + +@functools.lru_cache(maxsize=256, typed=True) +def _compile_pattern(pat): + if isinstance(pat, bytes): + pat_str = str(pat, 'ISO-8859-1') + res_str = translate(pat_str) + res = bytes(res_str, 'ISO-8859-1') + else: + res = translate(pat) + return re.compile(res).match + +def filter(names, pat): + """Return the subset of the list NAMES that match PAT.""" + result = [] + pat = os.path.normcase(pat) + match = _compile_pattern(pat) + if os.path is posixpath: + # normcase on posix is NOP. Optimize it away from the loop. + for name in names: + if match(name): + result.append(name) + else: + for name in names: + if match(os.path.normcase(name)): + result.append(name) + return result + +def fnmatchcase(name, pat): + """Test whether FILENAME matches PATTERN, including case. + + This is a version of fnmatch() which doesn't case-normalize + its arguments. + """ + match = _compile_pattern(pat) + return match(name) is not None + + +def translate(pat): + """Translate a shell PATTERN to a regular expression. + + There is no way to quote meta-characters. + """ + + i, n = 0, len(pat) + res = '' + while i < n: + c = pat[i] + i = i+1 + if c == '*': + res = res + '.*' + elif c == '?': + res = res + '.' + elif c == '[': + j = i + if j < n and pat[j] == '!': + j = j+1 + if j < n and pat[j] == ']': + j = j+1 + while j < n and pat[j] != ']': + j = j+1 + if j >= n: + res = res + '\\[' + else: + stuff = pat[i:j].replace('\\','\\\\') + i = j+1 + if stuff[0] == '!': + stuff = '^' + stuff[1:] + elif stuff[0] == '^': + stuff = '\\' + stuff + res = '%s[%s]' % (res, stuff) + else: + res = res + re.escape(c) + return res + '\Z(?ms)' diff --git a/tests/bytecode/pylib-tests/genericpath.py b/tests/bytecode/pylib-tests/genericpath.py new file mode 100644 index 0000000000..2174187a03 --- /dev/null +++ b/tests/bytecode/pylib-tests/genericpath.py @@ -0,0 +1,106 @@ +""" +Path operations common to more than one OS +Do not use directly. The OS specific modules import the appropriate +functions from this module themselves. +""" +import os +import stat + +__all__ = ['commonprefix', 'exists', 'getatime', 'getctime', 'getmtime', + 'getsize', 'isdir', 'isfile'] + + +# Does a path exist? +# This is false for dangling symbolic links on systems that support them. +def exists(path): + """Test whether a path exists. Returns False for broken symbolic links""" + try: + os.stat(path) + except os.error: + return False + return True + + +# This follows symbolic links, so both islink() and isdir() can be true +# for the same path ono systems that support symlinks +def isfile(path): + """Test whether a path is a regular file""" + try: + st = os.stat(path) + except os.error: + return False + return stat.S_ISREG(st.st_mode) + + +# Is a path a directory? +# This follows symbolic links, so both islink() and isdir() +# can be true for the same path on systems that support symlinks +def isdir(s): + """Return true if the pathname refers to an existing directory.""" + try: + st = os.stat(s) + except os.error: + return False + return stat.S_ISDIR(st.st_mode) + + +def getsize(filename): + """Return the size of a file, reported by os.stat().""" + return os.stat(filename).st_size + + +def getmtime(filename): + """Return the last modification time of a file, reported by os.stat().""" + return os.stat(filename).st_mtime + + +def getatime(filename): + """Return the last access time of a file, reported by os.stat().""" + return os.stat(filename).st_atime + + +def getctime(filename): + """Return the metadata change time of a file, reported by os.stat().""" + return os.stat(filename).st_ctime + + +# Return the longest prefix of all list elements. +def commonprefix(m): + "Given a list of pathnames, returns the longest common leading component" + if not m: return '' + s1 = min(m) + s2 = max(m) + for i, c in enumerate(s1): + if c != s2[i]: + return s1[:i] + return s1 + +# Split a path in root and extension. +# The extension is everything starting at the last dot in the last +# pathname component; the root is everything before that. +# It is always true that root + ext == p. + +# Generic implementation of splitext, to be parametrized with +# the separators +def _splitext(p, sep, altsep, extsep): + """Split the extension from a pathname. + + Extension is everything from the last dot to the end, ignoring + leading dots. Returns "(root, ext)"; ext may be empty.""" + # NOTE: This code must work for text and bytes strings. + + sepIndex = p.rfind(sep) + if altsep: + altsepIndex = p.rfind(altsep) + sepIndex = max(sepIndex, altsepIndex) + + dotIndex = p.rfind(extsep) + if dotIndex > sepIndex: + # skip all leading dots + filenameIndex = sepIndex + 1 + while filenameIndex < dotIndex: + if p[filenameIndex:filenameIndex+1] != extsep: + return p[:dotIndex], p[dotIndex:] + filenameIndex += 1 + + return p, p[:0] diff --git a/tests/bytecode/pylib-tests/getopt.py b/tests/bytecode/pylib-tests/getopt.py new file mode 100644 index 0000000000..3d6ecbddb9 --- /dev/null +++ b/tests/bytecode/pylib-tests/getopt.py @@ -0,0 +1,215 @@ +"""Parser for command line options. + +This module helps scripts to parse the command line arguments in +sys.argv. It supports the same conventions as the Unix getopt() +function (including the special meanings of arguments of the form `-' +and `--'). Long options similar to those supported by GNU software +may be used as well via an optional third argument. This module +provides two functions and an exception: + +getopt() -- Parse command line options +gnu_getopt() -- Like getopt(), but allow option and non-option arguments +to be intermixed. +GetoptError -- exception (class) raised with 'opt' attribute, which is the +option involved with the exception. +""" + +# Long option support added by Lars Wirzenius <liw@iki.fi>. +# +# Gerrit Holl <gerrit@nl.linux.org> moved the string-based exceptions +# to class-based exceptions. +# +# Peter Åstrand <astrand@lysator.liu.se> added gnu_getopt(). +# +# TODO for gnu_getopt(): +# +# - GNU getopt_long_only mechanism +# - allow the caller to specify ordering +# - RETURN_IN_ORDER option +# - GNU extension with '-' as first character of option string +# - optional arguments, specified by double colons +# - a option string with a W followed by semicolon should +# treat "-W foo" as "--foo" + +__all__ = ["GetoptError","error","getopt","gnu_getopt"] + +import os +try: + from gettext import gettext as _ +except ImportError: + # Bootstrapping Python: gettext's dependencies not built yet + def _(s): return s + +class GetoptError(Exception): + opt = '' + msg = '' + def __init__(self, msg, opt=''): + self.msg = msg + self.opt = opt + Exception.__init__(self, msg, opt) + + def __str__(self): + return self.msg + +error = GetoptError # backward compatibility + +def getopt(args, shortopts, longopts = []): + """getopt(args, options[, long_options]) -> opts, args + + Parses command line options and parameter list. args is the + argument list to be parsed, without the leading reference to the + running program. Typically, this means "sys.argv[1:]". shortopts + is the string of option letters that the script wants to + recognize, with options that require an argument followed by a + colon (i.e., the same format that Unix getopt() uses). If + specified, longopts is a list of strings with the names of the + long options which should be supported. The leading '--' + characters should not be included in the option name. Options + which require an argument should be followed by an equal sign + ('='). + + The return value consists of two elements: the first is a list of + (option, value) pairs; the second is the list of program arguments + left after the option list was stripped (this is a trailing slice + of the first argument). Each option-and-value pair returned has + the option as its first element, prefixed with a hyphen (e.g., + '-x'), and the option argument as its second element, or an empty + string if the option has no argument. The options occur in the + list in the same order in which they were found, thus allowing + multiple occurrences. Long and short options may be mixed. + + """ + + opts = [] + if type(longopts) == type(""): + longopts = [longopts] + else: + longopts = list(longopts) + while args and args[0].startswith('-') and args[0] != '-': + if args[0] == '--': + args = args[1:] + break + if args[0].startswith('--'): + opts, args = do_longs(opts, args[0][2:], longopts, args[1:]) + else: + opts, args = do_shorts(opts, args[0][1:], shortopts, args[1:]) + + return opts, args + +def gnu_getopt(args, shortopts, longopts = []): + """getopt(args, options[, long_options]) -> opts, args + + This function works like getopt(), except that GNU style scanning + mode is used by default. This means that option and non-option + arguments may be intermixed. The getopt() function stops + processing options as soon as a non-option argument is + encountered. + + If the first character of the option string is `+', or if the + environment variable POSIXLY_CORRECT is set, then option + processing stops as soon as a non-option argument is encountered. + + """ + + opts = [] + prog_args = [] + if isinstance(longopts, str): + longopts = [longopts] + else: + longopts = list(longopts) + + # Allow options after non-option arguments? + if shortopts.startswith('+'): + shortopts = shortopts[1:] + all_options_first = True + elif os.environ.get("POSIXLY_CORRECT"): + all_options_first = True + else: + all_options_first = False + + while args: + if args[0] == '--': + prog_args += args[1:] + break + + if args[0][:2] == '--': + opts, args = do_longs(opts, args[0][2:], longopts, args[1:]) + elif args[0][:1] == '-' and args[0] != '-': + opts, args = do_shorts(opts, args[0][1:], shortopts, args[1:]) + else: + if all_options_first: + prog_args += args + break + else: + prog_args.append(args[0]) + args = args[1:] + + return opts, prog_args + +def do_longs(opts, opt, longopts, args): + try: + i = opt.index('=') + except ValueError: + optarg = None + else: + opt, optarg = opt[:i], opt[i+1:] + + has_arg, opt = long_has_args(opt, longopts) + if has_arg: + if optarg is None: + if not args: + raise GetoptError(_('option --%s requires argument') % opt, opt) + optarg, args = args[0], args[1:] + elif optarg is not None: + raise GetoptError(_('option --%s must not have an argument') % opt, opt) + opts.append(('--' + opt, optarg or '')) + return opts, args + +# Return: +# has_arg? +# full option name +def long_has_args(opt, longopts): + possibilities = [o for o in longopts if o.startswith(opt)] + if not possibilities: + raise GetoptError(_('option --%s not recognized') % opt, opt) + # Is there an exact match? + if opt in possibilities: + return False, opt + elif opt + '=' in possibilities: + return True, opt + # No exact match, so better be unique. + if len(possibilities) > 1: + # XXX since possibilities contains all valid continuations, might be + # nice to work them into the error msg + raise GetoptError(_('option --%s not a unique prefix') % opt, opt) + assert len(possibilities) == 1 + unique_match = possibilities[0] + has_arg = unique_match.endswith('=') + if has_arg: + unique_match = unique_match[:-1] + return has_arg, unique_match + +def do_shorts(opts, optstring, shortopts, args): + while optstring != '': + opt, optstring = optstring[0], optstring[1:] + if short_has_arg(opt, shortopts): + if optstring == '': + if not args: + raise GetoptError(_('option -%s requires argument') % opt, + opt) + optstring, args = args[0], args[1:] + optarg, optstring = optstring, '' + else: + optarg = '' + opts.append(('-' + opt, optarg)) + return opts, args + +def short_has_arg(opt, shortopts): + for i in range(len(shortopts)): + if opt == shortopts[i] != ':': + return shortopts.startswith(':', i+1) + raise GetoptError(_('option -%s not recognized') % opt, opt) + +if __name__ == '__main__': + import sys + print(getopt(sys.argv[1:], "a:b", ["alpha=", "beta"])) diff --git a/tests/bytecode/pylib-tests/hashlib.py b/tests/bytecode/pylib-tests/hashlib.py new file mode 100644 index 0000000000..21454c7d30 --- /dev/null +++ b/tests/bytecode/pylib-tests/hashlib.py @@ -0,0 +1,148 @@ +# Copyright (C) 2005-2010 Gregory P. Smith (greg@krypto.org) +# Licensed to PSF under a Contributor Agreement. +# + +__doc__ = """hashlib module - A common interface to many hash functions. + +new(name, data=b'') - returns a new hash object implementing the + given hash function; initializing the hash + using the given binary data. + +Named constructor functions are also available, these are faster +than using new(name): + +md5(), sha1(), sha224(), sha256(), sha384(), and sha512() + +More algorithms may be available on your platform but the above are guaranteed +to exist. See the algorithms_guaranteed and algorithms_available attributes +to find out what algorithm names can be passed to new(). + +NOTE: If you want the adler32 or crc32 hash functions they are available in +the zlib module. + +Choose your hash function wisely. Some have known collision weaknesses. +sha384 and sha512 will be slow on 32 bit platforms. + +Hash objects have these methods: + - update(arg): Update the hash object with the bytes in arg. Repeated calls + are equivalent to a single call with the concatenation of all + the arguments. + - digest(): Return the digest of the bytes passed to the update() method + so far. + - hexdigest(): Like digest() except the digest is returned as a unicode + object of double length, containing only hexadecimal digits. + - copy(): Return a copy (clone) of the hash object. This can be used to + efficiently compute the digests of strings that share a common + initial substring. + +For example, to obtain the digest of the string 'Nobody inspects the +spammish repetition': + + >>> import hashlib + >>> m = hashlib.md5() + >>> m.update(b"Nobody inspects") + >>> m.update(b" the spammish repetition") + >>> m.digest() + b'\\xbbd\\x9c\\x83\\xdd\\x1e\\xa5\\xc9\\xd9\\xde\\xc9\\xa1\\x8d\\xf0\\xff\\xe9' + +More condensed: + + >>> hashlib.sha224(b"Nobody inspects the spammish repetition").hexdigest() + 'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2' + +""" + +# This tuple and __get_builtin_constructor() must be modified if a new +# always available algorithm is added. +__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512') + +algorithms_guaranteed = set(__always_supported) +algorithms_available = set(__always_supported) + +__all__ = __always_supported + ('new', 'algorithms_guaranteed', + 'algorithms_available') + + +def __get_builtin_constructor(name): + try: + if name in ('SHA1', 'sha1'): + import _sha1 + return _sha1.sha1 + elif name in ('MD5', 'md5'): + import _md5 + return _md5.md5 + elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'): + import _sha256 + bs = name[3:] + if bs == '256': + return _sha256.sha256 + elif bs == '224': + return _sha256.sha224 + elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'): + import _sha512 + bs = name[3:] + if bs == '512': + return _sha512.sha512 + elif bs == '384': + return _sha512.sha384 + except ImportError: + pass # no extension module, this hash is unsupported. + + raise ValueError('unsupported hash type ' + name) + + +def __get_openssl_constructor(name): + try: + f = getattr(_hashlib, 'openssl_' + name) + # Allow the C module to raise ValueError. The function will be + # defined but the hash not actually available thanks to OpenSSL. + f() + # Use the C function directly (very fast) + return f + except (AttributeError, ValueError): + return __get_builtin_constructor(name) + + +def __py_new(name, data=b''): + """new(name, data=b'') - Return a new hashing object using the named algorithm; + optionally initialized with data (which must be bytes). + """ + return __get_builtin_constructor(name)(data) + + +def __hash_new(name, data=b''): + """new(name, data=b'') - Return a new hashing object using the named algorithm; + optionally initialized with data (which must be bytes). + """ + try: + return _hashlib.new(name, data) + except ValueError: + # If the _hashlib module (OpenSSL) doesn't support the named + # hash, try using our builtin implementations. + # This allows for SHA224/256 and SHA384/512 support even though + # the OpenSSL library prior to 0.9.8 doesn't provide them. + return __get_builtin_constructor(name)(data) + + +try: + import _hashlib + new = __hash_new + __get_hash = __get_openssl_constructor + algorithms_available = algorithms_available.union( + _hashlib.openssl_md_meth_names) +except ImportError: + new = __py_new + __get_hash = __get_builtin_constructor + +for __func_name in __always_supported: + # try them all, some may not work due to the OpenSSL + # version not supporting that algorithm. + try: + globals()[__func_name] = __get_hash(__func_name) + except ValueError: + import logging + logging.exception('code for hash %s was not found.', __func_name) + +# Cleanup locals() +del __always_supported, __func_name, __get_hash +del __py_new, __hash_new, __get_openssl_constructor diff --git a/tests/bytecode/pylib-tests/heapq.py b/tests/bytecode/pylib-tests/heapq.py new file mode 100644 index 0000000000..00b429c2d3 --- /dev/null +++ b/tests/bytecode/pylib-tests/heapq.py @@ -0,0 +1,472 @@ +"""Heap queue algorithm (a.k.a. priority queue). + +Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for +all k, counting elements from 0. For the sake of comparison, +non-existing elements are considered to be infinite. The interesting +property of a heap is that a[0] is always its smallest element. + +Usage: + +heap = [] # creates an empty heap +heappush(heap, item) # pushes a new item on the heap +item = heappop(heap) # pops the smallest item from the heap +item = heap[0] # smallest item on the heap without popping it +heapify(x) # transforms list into a heap, in-place, in linear time +item = heapreplace(heap, item) # pops and returns smallest item, and adds + # new item; the heap size is unchanged + +Our API differs from textbook heap algorithms as follows: + +- We use 0-based indexing. This makes the relationship between the + index for a node and the indexes for its children slightly less + obvious, but is more suitable since Python uses 0-based indexing. + +- Our heappop() method returns the smallest item, not the largest. + +These two make it possible to view the heap as a regular Python list +without surprises: heap[0] is the smallest item, and heap.sort() +maintains the heap invariant! +""" + +# Original code by Kevin O'Connor, augmented by Tim Peters and Raymond Hettinger + +__about__ = """Heap queues + +[explanation by François Pinard] + +Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for +all k, counting elements from 0. For the sake of comparison, +non-existing elements are considered to be infinite. The interesting +property of a heap is that a[0] is always its smallest element. + +The strange invariant above is meant to be an efficient memory +representation for a tournament. The numbers below are `k', not a[k]: + + 0 + + 1 2 + + 3 4 5 6 + + 7 8 9 10 11 12 13 14 + + 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 + + +In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In +an usual binary tournament we see in sports, each cell is the winner +over the two cells it tops, and we can trace the winner down the tree +to see all opponents s/he had. However, in many computer applications +of such tournaments, we do not need to trace the history of a winner. +To be more memory efficient, when a winner is promoted, we try to +replace it by something else at a lower level, and the rule becomes +that a cell and the two cells it tops contain three different items, +but the top cell "wins" over the two topped cells. + +If this heap invariant is protected at all time, index 0 is clearly +the overall winner. The simplest algorithmic way to remove it and +find the "next" winner is to move some loser (let's say cell 30 in the +diagram above) into the 0 position, and then percolate this new 0 down +the tree, exchanging values, until the invariant is re-established. +This is clearly logarithmic on the total number of items in the tree. +By iterating over all items, you get an O(n ln n) sort. + +A nice feature of this sort is that you can efficiently insert new +items while the sort is going on, provided that the inserted items are +not "better" than the last 0'th element you extracted. This is +especially useful in simulation contexts, where the tree holds all +incoming events, and the "win" condition means the smallest scheduled +time. When an event schedule other events for execution, they are +scheduled into the future, so they can easily go into the heap. So, a +heap is a good structure for implementing schedulers (this is what I +used for my MIDI sequencer :-). + +Various structures for implementing schedulers have been extensively +studied, and heaps are good for this, as they are reasonably speedy, +the speed is almost constant, and the worst case is not much different +than the average case. However, there are other representations which +are more efficient overall, yet the worst cases might be terrible. + +Heaps are also very useful in big disk sorts. You most probably all +know that a big sort implies producing "runs" (which are pre-sorted +sequences, which size is usually related to the amount of CPU memory), +followed by a merging passes for these runs, which merging is often +very cleverly organised[1]. It is very important that the initial +sort produces the longest runs possible. Tournaments are a good way +to that. If, using all the memory available to hold a tournament, you +replace and percolate items that happen to fit the current run, you'll +produce runs which are twice the size of the memory for random input, +and much better for input fuzzily ordered. + +Moreover, if you output the 0'th item on disk and get an input which +may not fit in the current tournament (because the value "wins" over +the last output value), it cannot fit in the heap, so the size of the +heap decreases. The freed memory could be cleverly reused immediately +for progressively building a second heap, which grows at exactly the +same rate the first heap is melting. When the first heap completely +vanishes, you switch heaps and start a new run. Clever and quite +effective! + +In a word, heaps are useful memory structures to know. I use them in +a few applications, and I think it is good to keep a `heap' module +around. :-) + +-------------------- +[1] The disk balancing algorithms which are current, nowadays, are +more annoying than clever, and this is a consequence of the seeking +capabilities of the disks. On devices which cannot seek, like big +tape drives, the story was quite different, and one had to be very +clever to ensure (far in advance) that each tape movement will be the +most effective possible (that is, will best participate at +"progressing" the merge). Some tapes were even able to read +backwards, and this was also used to avoid the rewinding time. +Believe me, real good tape sorts were quite spectacular to watch! +From all times, sorting has always been a Great Art! :-) +""" + +__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge', + 'nlargest', 'nsmallest', 'heappushpop'] + +from itertools import islice, count, tee, chain + +def heappush(heap, item): + """Push item onto heap, maintaining the heap invariant.""" + heap.append(item) + _siftdown(heap, 0, len(heap)-1) + +def heappop(heap): + """Pop the smallest item off the heap, maintaining the heap invariant.""" + lastelt = heap.pop() # raises appropriate IndexError if heap is empty + if heap: + returnitem = heap[0] + heap[0] = lastelt + _siftup(heap, 0) + else: + returnitem = lastelt + return returnitem + +def heapreplace(heap, item): + """Pop and return the current smallest value, and add the new item. + + This is more efficient than heappop() followed by heappush(), and can be + more appropriate when using a fixed-size heap. Note that the value + returned may be larger than item! That constrains reasonable uses of + this routine unless written as part of a conditional replacement: + + if item > heap[0]: + item = heapreplace(heap, item) + """ + returnitem = heap[0] # raises appropriate IndexError if heap is empty + heap[0] = item + _siftup(heap, 0) + return returnitem + +def heappushpop(heap, item): + """Fast version of a heappush followed by a heappop.""" + if heap and heap[0] < item: + item, heap[0] = heap[0], item + _siftup(heap, 0) + return item + +def heapify(x): + """Transform list into a heap, in-place, in O(len(x)) time.""" + n = len(x) + # Transform bottom-up. The largest index there's any point to looking at + # is the largest with a child index in-range, so must have 2*i + 1 < n, + # or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so + # j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is + # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1. + for i in reversed(range(n//2)): + _siftup(x, i) + +def _heappushpop_max(heap, item): + """Maxheap version of a heappush followed by a heappop.""" + if heap and item < heap[0]: + item, heap[0] = heap[0], item + _siftup_max(heap, 0) + return item + +def _heapify_max(x): + """Transform list into a maxheap, in-place, in O(len(x)) time.""" + n = len(x) + for i in reversed(range(n//2)): + _siftup_max(x, i) + +def nlargest(n, iterable): + """Find the n largest elements in a dataset. + + Equivalent to: sorted(iterable, reverse=True)[:n] + """ + if n < 0: + return [] + it = iter(iterable) + result = list(islice(it, n)) + if not result: + return result + heapify(result) + _heappushpop = heappushpop + for elem in it: + _heappushpop(result, elem) + result.sort(reverse=True) + return result + +def nsmallest(n, iterable): + """Find the n smallest elements in a dataset. + + Equivalent to: sorted(iterable)[:n] + """ + if n < 0: + return [] + it = iter(iterable) + result = list(islice(it, n)) + if not result: + return result + _heapify_max(result) + _heappushpop = _heappushpop_max + for elem in it: + _heappushpop(result, elem) + result.sort() + return result + +# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos +# is the index of a leaf with a possibly out-of-order value. Restore the +# heap invariant. +def _siftdown(heap, startpos, pos): + newitem = heap[pos] + # Follow the path to the root, moving parents down until finding a place + # newitem fits. + while pos > startpos: + parentpos = (pos - 1) >> 1 + parent = heap[parentpos] + if newitem < parent: + heap[pos] = parent + pos = parentpos + continue + break + heap[pos] = newitem + +# The child indices of heap index pos are already heaps, and we want to make +# a heap at index pos too. We do this by bubbling the smaller child of +# pos up (and so on with that child's children, etc) until hitting a leaf, +# then using _siftdown to move the oddball originally at index pos into place. +# +# We *could* break out of the loop as soon as we find a pos where newitem <= +# both its children, but turns out that's not a good idea, and despite that +# many books write the algorithm that way. During a heap pop, the last array +# element is sifted in, and that tends to be large, so that comparing it +# against values starting from the root usually doesn't pay (= usually doesn't +# get us out of the loop early). See Knuth, Volume 3, where this is +# explained and quantified in an exercise. +# +# Cutting the # of comparisons is important, since these routines have no +# way to extract "the priority" from an array element, so that intelligence +# is likely to be hiding in custom comparison methods, or in array elements +# storing (priority, record) tuples. Comparisons are thus potentially +# expensive. +# +# On random arrays of length 1000, making this change cut the number of +# comparisons made by heapify() a little, and those made by exhaustive +# heappop() a lot, in accord with theory. Here are typical results from 3 +# runs (3 just to demonstrate how small the variance is): +# +# Compares needed by heapify Compares needed by 1000 heappops +# -------------------------- -------------------------------- +# 1837 cut to 1663 14996 cut to 8680 +# 1855 cut to 1659 14966 cut to 8678 +# 1847 cut to 1660 15024 cut to 8703 +# +# Building the heap by using heappush() 1000 times instead required +# 2198, 2148, and 2219 compares: heapify() is more efficient, when +# you can use it. +# +# The total compares needed by list.sort() on the same lists were 8627, +# 8627, and 8632 (this should be compared to the sum of heapify() and +# heappop() compares): list.sort() is (unsurprisingly!) more efficient +# for sorting. + +def _siftup(heap, pos): + endpos = len(heap) + startpos = pos + newitem = heap[pos] + # Bubble up the smaller child until hitting a leaf. + childpos = 2*pos + 1 # leftmost child position + while childpos < endpos: + # Set childpos to index of smaller child. + rightpos = childpos + 1 + if rightpos < endpos and not heap[childpos] < heap[rightpos]: + childpos = rightpos + # Move the smaller child up. + heap[pos] = heap[childpos] + pos = childpos + childpos = 2*pos + 1 + # The leaf at pos is empty now. Put newitem there, and bubble it up + # to its final resting place (by sifting its parents down). + heap[pos] = newitem + _siftdown(heap, startpos, pos) + +def _siftdown_max(heap, startpos, pos): + 'Maxheap variant of _siftdown' + newitem = heap[pos] + # Follow the path to the root, moving parents down until finding a place + # newitem fits. + while pos > startpos: + parentpos = (pos - 1) >> 1 + parent = heap[parentpos] + if parent < newitem: + heap[pos] = parent + pos = parentpos + continue + break + heap[pos] = newitem + +def _siftup_max(heap, pos): + 'Maxheap variant of _siftup' + endpos = len(heap) + startpos = pos + newitem = heap[pos] + # Bubble up the larger child until hitting a leaf. + childpos = 2*pos + 1 # leftmost child position + while childpos < endpos: + # Set childpos to index of larger child. + rightpos = childpos + 1 + if rightpos < endpos and not heap[rightpos] < heap[childpos]: + childpos = rightpos + # Move the larger child up. + heap[pos] = heap[childpos] + pos = childpos + childpos = 2*pos + 1 + # The leaf at pos is empty now. Put newitem there, and bubble it up + # to its final resting place (by sifting its parents down). + heap[pos] = newitem + _siftdown_max(heap, startpos, pos) + +# If available, use C implementation +try: + from _heapq import * +except ImportError: + pass + +def merge(*iterables): + '''Merge multiple sorted inputs into a single sorted output. + + Similar to sorted(itertools.chain(*iterables)) but returns a generator, + does not pull the data into memory all at once, and assumes that each of + the input streams is already sorted (smallest to largest). + + >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25])) + [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25] + + ''' + _heappop, _heapreplace, _StopIteration = heappop, heapreplace, StopIteration + + h = [] + h_append = h.append + for itnum, it in enumerate(map(iter, iterables)): + try: + next = it.__next__ + h_append([next(), itnum, next]) + except _StopIteration: + pass + heapify(h) + + while 1: + try: + while 1: + v, itnum, next = s = h[0] # raises IndexError when h is empty + yield v + s[0] = next() # raises StopIteration when exhausted + _heapreplace(h, s) # restore heap condition + except _StopIteration: + _heappop(h) # remove empty iterator + except IndexError: + return + +# Extend the implementations of nsmallest and nlargest to use a key= argument +_nsmallest = nsmallest +def nsmallest(n, iterable, key=None): + """Find the n smallest elements in a dataset. + + Equivalent to: sorted(iterable, key=key)[:n] + """ + # Short-cut for n==1 is to use min() when len(iterable)>0 + if n == 1: + it = iter(iterable) + head = list(islice(it, 1)) + if not head: + return [] + if key is None: + return [min(chain(head, it))] + return [min(chain(head, it), key=key)] + + # When n>=size, it's faster to use sorted() + try: + size = len(iterable) + except (TypeError, AttributeError): + pass + else: + if n >= size: + return sorted(iterable, key=key)[:n] + + # When key is none, use simpler decoration + if key is None: + it = zip(iterable, count()) # decorate + result = _nsmallest(n, it) + return [r[0] for r in result] # undecorate + + # General case, slowest method + in1, in2 = tee(iterable) + it = zip(map(key, in1), count(), in2) # decorate + result = _nsmallest(n, it) + return [r[2] for r in result] # undecorate + +_nlargest = nlargest +def nlargest(n, iterable, key=None): + """Find the n largest elements in a dataset. + + Equivalent to: sorted(iterable, key=key, reverse=True)[:n] + """ + + # Short-cut for n==1 is to use max() when len(iterable)>0 + if n == 1: + it = iter(iterable) + head = list(islice(it, 1)) + if not head: + return [] + if key is None: + return [max(chain(head, it))] + return [max(chain(head, it), key=key)] + + # When n>=size, it's faster to use sorted() + try: + size = len(iterable) + except (TypeError, AttributeError): + pass + else: + if n >= size: + return sorted(iterable, key=key, reverse=True)[:n] + + # When key is none, use simpler decoration + if key is None: + it = zip(iterable, count(0,-1)) # decorate + result = _nlargest(n, it) + return [r[0] for r in result] # undecorate + + # General case, slowest method + in1, in2 = tee(iterable) + it = zip(map(key, in1), count(0,-1), in2) # decorate + result = _nlargest(n, it) + return [r[2] for r in result] # undecorate + +if __name__ == "__main__": + # Simple sanity test + heap = [] + data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 0] + for item in data: + heappush(heap, item) + sort = [] + while heap: + sort.append(heappop(heap)) + print(sort) + + import doctest + doctest.testmod() diff --git a/tests/bytecode/pylib-tests/keyword.py b/tests/bytecode/pylib-tests/keyword.py new file mode 100644 index 0000000000..dad39cc377 --- /dev/null +++ b/tests/bytecode/pylib-tests/keyword.py @@ -0,0 +1,93 @@ +#! /usr/bin/env python3 + +"""Keywords (from "graminit.c") + +This file is automatically generated; please don't muck it up! + +To update the symbols in this file, 'cd' to the top directory of +the python source tree after building the interpreter and run: + + ./python Lib/keyword.py +""" + +__all__ = ["iskeyword", "kwlist"] + +kwlist = [ +#--start keywords-- + 'False', + 'None', + 'True', + 'and', + 'as', + 'assert', + 'break', + 'class', + 'continue', + 'def', + 'del', + 'elif', + 'else', + 'except', + 'finally', + 'for', + 'from', + 'global', + 'if', + 'import', + 'in', + 'is', + 'lambda', + 'nonlocal', + 'not', + 'or', + 'pass', + 'raise', + 'return', + 'try', + 'while', + 'with', + 'yield', +#--end keywords-- + ] + +iskeyword = frozenset(kwlist).__contains__ + +def main(): + import sys, re + + args = sys.argv[1:] + iptfile = args and args[0] or "Python/graminit.c" + if len(args) > 1: optfile = args[1] + else: optfile = "Lib/keyword.py" + + # scan the source file for keywords + with open(iptfile) as fp: + strprog = re.compile('"([^"]+)"') + lines = [] + for line in fp: + if '{1, "' in line: + match = strprog.search(line) + if match: + lines.append(" '" + match.group(1) + "',\n") + lines.sort() + + # load the output skeleton from the target + with open(optfile) as fp: + format = fp.readlines() + + # insert the lines of keywords + try: + start = format.index("#--start keywords--\n") + 1 + end = format.index("#--end keywords--\n") + format[start:end] = lines + except ValueError: + sys.stderr.write("target does not contain format markers\n") + sys.exit(1) + + # write the output file + fp = open(optfile, 'w') + fp.write(''.join(format)) + fp.close() + +if __name__ == "__main__": + main() diff --git a/tests/bytecode/pylib-tests/macurl2path.py b/tests/bytecode/pylib-tests/macurl2path.py new file mode 100644 index 0000000000..f22fb207b8 --- /dev/null +++ b/tests/bytecode/pylib-tests/macurl2path.py @@ -0,0 +1,97 @@ +"""Macintosh-specific module for conversion between pathnames and URLs. + +Do not import directly; use urllib instead.""" + +import urllib.parse +import os + +__all__ = ["url2pathname","pathname2url"] + +def url2pathname(pathname): + """OS-specific conversion from a relative URL of the 'file' scheme + to a file system path; not recommended for general use.""" + # + # XXXX The .. handling should be fixed... + # + tp = urllib.parse.splittype(pathname)[0] + if tp and tp != 'file': + raise RuntimeError('Cannot convert non-local URL to pathname') + # Turn starting /// into /, an empty hostname means current host + if pathname[:3] == '///': + pathname = pathname[2:] + elif pathname[:2] == '//': + raise RuntimeError('Cannot convert non-local URL to pathname') + components = pathname.split('/') + # Remove . and embedded .. + i = 0 + while i < len(components): + if components[i] == '.': + del components[i] + elif components[i] == '..' and i > 0 and \ + components[i-1] not in ('', '..'): + del components[i-1:i+1] + i = i-1 + elif components[i] == '' and i > 0 and components[i-1] != '': + del components[i] + else: + i = i+1 + if not components[0]: + # Absolute unix path, don't start with colon + rv = ':'.join(components[1:]) + else: + # relative unix path, start with colon. First replace + # leading .. by empty strings (giving ::file) + i = 0 + while i < len(components) and components[i] == '..': + components[i] = '' + i = i + 1 + rv = ':' + ':'.join(components) + # and finally unquote slashes and other funny characters + return urllib.parse.unquote(rv) + +def pathname2url(pathname): + """OS-specific conversion from a file system path to a relative URL + of the 'file' scheme; not recommended for general use.""" + if '/' in pathname: + raise RuntimeError("Cannot convert pathname containing slashes") + components = pathname.split(':') + # Remove empty first and/or last component + if components[0] == '': + del components[0] + if components[-1] == '': + del components[-1] + # Replace empty string ('::') by .. (will result in '/../' later) + for i in range(len(components)): + if components[i] == '': + components[i] = '..' + # Truncate names longer than 31 bytes + components = map(_pncomp2url, components) + + if os.path.isabs(pathname): + return '/' + '/'.join(components) + else: + return '/'.join(components) + +def _pncomp2url(component): + # We want to quote slashes + return urllib.parse.quote(component[:31], safe='') + +def test(): + for url in ["index.html", + "bar/index.html", + "/foo/bar/index.html", + "/foo/bar/", + "/"]: + print('%r -> %r' % (url, url2pathname(url))) + for path in ["drive:", + "drive:dir:", + "drive:dir:file", + "drive:file", + "file", + ":file", + ":dir:", + ":dir:file"]: + print('%r -> %r' % (path, pathname2url(path))) + +if __name__ == '__main__': + test() diff --git a/tests/bytecode/pylib-tests/mimetypes.py b/tests/bytecode/pylib-tests/mimetypes.py new file mode 100644 index 0000000000..2872ee4245 --- /dev/null +++ b/tests/bytecode/pylib-tests/mimetypes.py @@ -0,0 +1,589 @@ +"""Guess the MIME type of a file. + +This module defines two useful functions: + +guess_type(url, strict=True) -- guess the MIME type and encoding of a URL. + +guess_extension(type, strict=True) -- guess the extension for a given MIME type. + +It also contains the following, for tuning the behavior: + +Data: + +knownfiles -- list of files to parse +inited -- flag set when init() has been called +suffix_map -- dictionary mapping suffixes to suffixes +encodings_map -- dictionary mapping suffixes to encodings +types_map -- dictionary mapping suffixes to types + +Functions: + +init([files]) -- parse a list of files, default knownfiles (on Windows, the + default values are taken from the registry) +read_mime_types(file) -- parse one file, return a dictionary or None +""" + +import os +import sys +import posixpath +import urllib.parse +try: + import winreg as _winreg +except ImportError: + _winreg = None + +__all__ = [ + "guess_type","guess_extension","guess_all_extensions", + "add_type","read_mime_types","init" +] + +knownfiles = [ + "/etc/mime.types", + "/etc/httpd/mime.types", # Mac OS X + "/etc/httpd/conf/mime.types", # Apache + "/etc/apache/mime.types", # Apache 1 + "/etc/apache2/mime.types", # Apache 2 + "/usr/local/etc/httpd/conf/mime.types", + "/usr/local/lib/netscape/mime.types", + "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 + "/usr/local/etc/mime.types", # Apache 1.3 + ] + +inited = False +_db = None + + +class MimeTypes: + """MIME-types datastore. + + This datastore can handle information from mime.types-style files + and supports basic determination of MIME type from a filename or + URL, and can guess a reasonable extension given a MIME type. + """ + + def __init__(self, filenames=(), strict=True): + if not inited: + init() + self.encodings_map = encodings_map.copy() + self.suffix_map = suffix_map.copy() + self.types_map = ({}, {}) # dict for (non-strict, strict) + self.types_map_inv = ({}, {}) + for (ext, type) in types_map.items(): + self.add_type(type, ext, True) + for (ext, type) in common_types.items(): + self.add_type(type, ext, False) + for name in filenames: + self.read(name, strict) + + def add_type(self, type, ext, strict=True): + """Add a mapping between a type and an extension. + + When the extension is already known, the new + type will replace the old one. When the type + is already known the extension will be added + to the list of known extensions. + + If strict is true, information will be added to + list of standard types, else to the list of non-standard + types. + """ + self.types_map[strict][ext] = type + exts = self.types_map_inv[strict].setdefault(type, []) + if ext not in exts: + exts.append(ext) + + def guess_type(self, url, strict=True): + """Guess the type of a file based on its URL. + + Return value is a tuple (type, encoding) where type is None if + the type can't be guessed (no or unknown suffix) or a string + of the form type/subtype, usable for a MIME Content-type + header; and encoding is None for no encoding or the name of + the program used to encode (e.g. compress or gzip). The + mappings are table driven. Encoding suffixes are case + sensitive; type suffixes are first tried case sensitive, then + case insensitive. + + The suffixes .tgz, .taz and .tz (case sensitive!) are all + mapped to '.tar.gz'. (This is table-driven too, using the + dictionary suffix_map.) + + Optional `strict' argument when False adds a bunch of commonly found, + but non-standard types. + """ + scheme, url = urllib.parse.splittype(url) + if scheme == 'data': + # syntax of data URLs: + # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data + # mediatype := [ type "/" subtype ] *( ";" parameter ) + # data := *urlchar + # parameter := attribute "=" value + # type/subtype defaults to "text/plain" + comma = url.find(',') + if comma < 0: + # bad data URL + return None, None + semi = url.find(';', 0, comma) + if semi >= 0: + type = url[:semi] + else: + type = url[:comma] + if '=' in type or '/' not in type: + type = 'text/plain' + return type, None # never compressed, so encoding is None + base, ext = posixpath.splitext(url) + while ext in self.suffix_map: + base, ext = posixpath.splitext(base + self.suffix_map[ext]) + if ext in self.encodings_map: + encoding = self.encodings_map[ext] + base, ext = posixpath.splitext(base) + else: + encoding = None + types_map = self.types_map[True] + if ext in types_map: + return types_map[ext], encoding + elif ext.lower() in types_map: + return types_map[ext.lower()], encoding + elif strict: + return None, encoding + types_map = self.types_map[False] + if ext in types_map: + return types_map[ext], encoding + elif ext.lower() in types_map: + return types_map[ext.lower()], encoding + else: + return None, encoding + + def guess_all_extensions(self, type, strict=True): + """Guess the extensions for a file based on its MIME type. + + Return value is a list of strings giving the possible filename + extensions, including the leading dot ('.'). The extension is not + guaranteed to have been associated with any particular data stream, + but would be mapped to the MIME type `type' by guess_type(). + + Optional `strict' argument when false adds a bunch of commonly found, + but non-standard types. + """ + type = type.lower() + extensions = self.types_map_inv[True].get(type, []) + if not strict: + for ext in self.types_map_inv[False].get(type, []): + if ext not in extensions: + extensions.append(ext) + return extensions + + def guess_extension(self, type, strict=True): + """Guess the extension for a file based on its MIME type. + + Return value is a string giving a filename extension, + including the leading dot ('.'). The extension is not + guaranteed to have been associated with any particular data + stream, but would be mapped to the MIME type `type' by + guess_type(). If no extension can be guessed for `type', None + is returned. + + Optional `strict' argument when false adds a bunch of commonly found, + but non-standard types. + """ + extensions = self.guess_all_extensions(type, strict) + if not extensions: + return None + return extensions[0] + + def read(self, filename, strict=True): + """ + Read a single mime.types-format file, specified by pathname. + + If strict is true, information will be added to + list of standard types, else to the list of non-standard + types. + """ + with open(filename, encoding='utf-8') as fp: + self.readfp(fp, strict) + + def readfp(self, fp, strict=True): + """ + Read a single mime.types-format file. + + If strict is true, information will be added to + list of standard types, else to the list of non-standard + types. + """ + while 1: + line = fp.readline() + if not line: + break + words = line.split() + for i in range(len(words)): + if words[i][0] == '#': + del words[i:] + break + if not words: + continue + type, suffixes = words[0], words[1:] + for suff in suffixes: + self.add_type(type, '.' + suff, strict) + + def read_windows_registry(self, strict=True): + """ + Load the MIME types database from Windows registry. + + If strict is true, information will be added to + list of standard types, else to the list of non-standard + types. + """ + + # Windows only + if not _winreg: + return + + def enum_types(mimedb): + i = 0 + while True: + try: + ctype = _winreg.EnumKey(mimedb, i) + except EnvironmentError: + break + else: + yield ctype + i += 1 + + with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, + r'MIME\Database\Content Type') as mimedb: + for ctype in enum_types(mimedb): + try: + with _winreg.OpenKey(mimedb, ctype) as key: + suffix, datatype = _winreg.QueryValueEx(key, + 'Extension') + except EnvironmentError: + continue + if datatype != _winreg.REG_SZ: + continue + self.add_type(ctype, suffix, strict) + + +def guess_type(url, strict=True): + """Guess the type of a file based on its URL. + + Return value is a tuple (type, encoding) where type is None if the + type can't be guessed (no or unknown suffix) or a string of the + form type/subtype, usable for a MIME Content-type header; and + encoding is None for no encoding or the name of the program used + to encode (e.g. compress or gzip). The mappings are table + driven. Encoding suffixes are case sensitive; type suffixes are + first tried case sensitive, then case insensitive. + + The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped + to ".tar.gz". (This is table-driven too, using the dictionary + suffix_map). + + Optional `strict' argument when false adds a bunch of commonly found, but + non-standard types. + """ + if _db is None: + init() + return _db.guess_type(url, strict) + + +def guess_all_extensions(type, strict=True): + """Guess the extensions for a file based on its MIME type. + + Return value is a list of strings giving the possible filename + extensions, including the leading dot ('.'). The extension is not + guaranteed to have been associated with any particular data + stream, but would be mapped to the MIME type `type' by + guess_type(). If no extension can be guessed for `type', None + is returned. + + Optional `strict' argument when false adds a bunch of commonly found, + but non-standard types. + """ + if _db is None: + init() + return _db.guess_all_extensions(type, strict) + +def guess_extension(type, strict=True): + """Guess the extension for a file based on its MIME type. + + Return value is a string giving a filename extension, including the + leading dot ('.'). The extension is not guaranteed to have been + associated with any particular data stream, but would be mapped to the + MIME type `type' by guess_type(). If no extension can be guessed for + `type', None is returned. + + Optional `strict' argument when false adds a bunch of commonly found, + but non-standard types. + """ + if _db is None: + init() + return _db.guess_extension(type, strict) + +def add_type(type, ext, strict=True): + """Add a mapping between a type and an extension. + + When the extension is already known, the new + type will replace the old one. When the type + is already known the extension will be added + to the list of known extensions. + + If strict is true, information will be added to + list of standard types, else to the list of non-standard + types. + """ + if _db is None: + init() + return _db.add_type(type, ext, strict) + + +def init(files=None): + global suffix_map, types_map, encodings_map, common_types + global inited, _db + inited = True # so that MimeTypes.__init__() doesn't call us again + db = MimeTypes() + if files is None: + if _winreg: + db.read_windows_registry() + files = knownfiles + for file in files: + if os.path.isfile(file): + db.read(file) + encodings_map = db.encodings_map + suffix_map = db.suffix_map + types_map = db.types_map[True] + common_types = db.types_map[False] + # Make the DB a global variable now that it is fully initialized + _db = db + + +def read_mime_types(file): + try: + f = open(file) + except IOError: + return None + db = MimeTypes() + db.readfp(f, True) + return db.types_map[True] + + +def _default_mime_types(): + global suffix_map + global encodings_map + global types_map + global common_types + + suffix_map = { + '.svgz': '.svg.gz', + '.tgz': '.tar.gz', + '.taz': '.tar.gz', + '.tz': '.tar.gz', + '.tbz2': '.tar.bz2', + '.txz': '.tar.xz', + } + + encodings_map = { + '.gz': 'gzip', + '.Z': 'compress', + '.bz2': 'bzip2', + '.xz': 'xz', + } + + # Before adding new types, make sure they are either registered with IANA, + # at http://www.iana.org/assignments/media-types + # or extensions, i.e. using the x- prefix + + # If you add to these, please keep them sorted! + types_map = { + '.a' : 'application/octet-stream', + '.ai' : 'application/postscript', + '.aif' : 'audio/x-aiff', + '.aifc' : 'audio/x-aiff', + '.aiff' : 'audio/x-aiff', + '.au' : 'audio/basic', + '.avi' : 'video/x-msvideo', + '.bat' : 'text/plain', + '.bcpio' : 'application/x-bcpio', + '.bin' : 'application/octet-stream', + '.bmp' : 'image/x-ms-bmp', + '.c' : 'text/plain', + # Duplicates :( + '.cdf' : 'application/x-cdf', + '.cdf' : 'application/x-netcdf', + '.cpio' : 'application/x-cpio', + '.csh' : 'application/x-csh', + '.css' : 'text/css', + '.dll' : 'application/octet-stream', + '.doc' : 'application/msword', + '.dot' : 'application/msword', + '.dvi' : 'application/x-dvi', + '.eml' : 'message/rfc822', + '.eps' : 'application/postscript', + '.etx' : 'text/x-setext', + '.exe' : 'application/octet-stream', + '.gif' : 'image/gif', + '.gtar' : 'application/x-gtar', + '.h' : 'text/plain', + '.hdf' : 'application/x-hdf', + '.htm' : 'text/html', + '.html' : 'text/html', + '.ico' : 'image/vnd.microsoft.icon', + '.ief' : 'image/ief', + '.jpe' : 'image/jpeg', + '.jpeg' : 'image/jpeg', + '.jpg' : 'image/jpeg', + '.js' : 'application/javascript', + '.ksh' : 'text/plain', + '.latex' : 'application/x-latex', + '.m1v' : 'video/mpeg', + '.m3u' : 'application/vnd.apple.mpegurl', + '.m3u8' : 'application/vnd.apple.mpegurl', + '.man' : 'application/x-troff-man', + '.me' : 'application/x-troff-me', + '.mht' : 'message/rfc822', + '.mhtml' : 'message/rfc822', + '.mif' : 'application/x-mif', + '.mov' : 'video/quicktime', + '.movie' : 'video/x-sgi-movie', + '.mp2' : 'audio/mpeg', + '.mp3' : 'audio/mpeg', + '.mp4' : 'video/mp4', + '.mpa' : 'video/mpeg', + '.mpe' : 'video/mpeg', + '.mpeg' : 'video/mpeg', + '.mpg' : 'video/mpeg', + '.ms' : 'application/x-troff-ms', + '.nc' : 'application/x-netcdf', + '.nws' : 'message/rfc822', + '.o' : 'application/octet-stream', + '.obj' : 'application/octet-stream', + '.oda' : 'application/oda', + '.p12' : 'application/x-pkcs12', + '.p7c' : 'application/pkcs7-mime', + '.pbm' : 'image/x-portable-bitmap', + '.pdf' : 'application/pdf', + '.pfx' : 'application/x-pkcs12', + '.pgm' : 'image/x-portable-graymap', + '.pl' : 'text/plain', + '.png' : 'image/png', + '.pnm' : 'image/x-portable-anymap', + '.pot' : 'application/vnd.ms-powerpoint', + '.ppa' : 'application/vnd.ms-powerpoint', + '.ppm' : 'image/x-portable-pixmap', + '.pps' : 'application/vnd.ms-powerpoint', + '.ppt' : 'application/vnd.ms-powerpoint', + '.ps' : 'application/postscript', + '.pwz' : 'application/vnd.ms-powerpoint', + '.py' : 'text/x-python', + '.pyc' : 'application/x-python-code', + '.pyo' : 'application/x-python-code', + '.qt' : 'video/quicktime', + '.ra' : 'audio/x-pn-realaudio', + '.ram' : 'application/x-pn-realaudio', + '.ras' : 'image/x-cmu-raster', + '.rdf' : 'application/xml', + '.rgb' : 'image/x-rgb', + '.roff' : 'application/x-troff', + '.rtx' : 'text/richtext', + '.sgm' : 'text/x-sgml', + '.sgml' : 'text/x-sgml', + '.sh' : 'application/x-sh', + '.shar' : 'application/x-shar', + '.snd' : 'audio/basic', + '.so' : 'application/octet-stream', + '.src' : 'application/x-wais-source', + '.sv4cpio': 'application/x-sv4cpio', + '.sv4crc' : 'application/x-sv4crc', + '.svg' : 'image/svg+xml', + '.swf' : 'application/x-shockwave-flash', + '.t' : 'application/x-troff', + '.tar' : 'application/x-tar', + '.tcl' : 'application/x-tcl', + '.tex' : 'application/x-tex', + '.texi' : 'application/x-texinfo', + '.texinfo': 'application/x-texinfo', + '.tif' : 'image/tiff', + '.tiff' : 'image/tiff', + '.tr' : 'application/x-troff', + '.tsv' : 'text/tab-separated-values', + '.txt' : 'text/plain', + '.ustar' : 'application/x-ustar', + '.vcf' : 'text/x-vcard', + '.wav' : 'audio/x-wav', + '.wiz' : 'application/msword', + '.wsdl' : 'application/xml', + '.xbm' : 'image/x-xbitmap', + '.xlb' : 'application/vnd.ms-excel', + # Duplicates :( + '.xls' : 'application/excel', + '.xls' : 'application/vnd.ms-excel', + '.xml' : 'text/xml', + '.xpdl' : 'application/xml', + '.xpm' : 'image/x-xpixmap', + '.xsl' : 'application/xml', + '.xwd' : 'image/x-xwindowdump', + '.zip' : 'application/zip', + } + + # These are non-standard types, commonly found in the wild. They will + # only match if strict=0 flag is given to the API methods. + + # Please sort these too + common_types = { + '.jpg' : 'image/jpg', + '.mid' : 'audio/midi', + '.midi': 'audio/midi', + '.pct' : 'image/pict', + '.pic' : 'image/pict', + '.pict': 'image/pict', + '.rtf' : 'application/rtf', + '.xul' : 'text/xul' + } + + +_default_mime_types() + + +if __name__ == '__main__': + import getopt + + USAGE = """\ +Usage: mimetypes.py [options] type + +Options: + --help / -h -- print this message and exit + --lenient / -l -- additionally search of some common, but non-standard + types. + --extension / -e -- guess extension instead of type + +More than one type argument may be given. +""" + + def usage(code, msg=''): + print(USAGE) + if msg: print(msg) + sys.exit(code) + + try: + opts, args = getopt.getopt(sys.argv[1:], 'hle', + ['help', 'lenient', 'extension']) + except getopt.error as msg: + usage(1, msg) + + strict = 1 + extension = 0 + for opt, arg in opts: + if opt in ('-h', '--help'): + usage(0) + elif opt in ('-l', '--lenient'): + strict = 0 + elif opt in ('-e', '--extension'): + extension = 1 + for gtype in args: + if extension: + guess = guess_extension(gtype, strict) + if not guess: print("I don't know anything about type", gtype) + else: print(guess) + else: + guess, encoding = guess_type(gtype, strict) + if not guess: print("I don't know anything about type", gtype) + else: print('type:', guess, 'encoding:', encoding) diff --git a/tests/bytecode/pylib-tests/modulefinder.py b/tests/bytecode/pylib-tests/modulefinder.py new file mode 100644 index 0000000000..f90a4327e6 --- /dev/null +++ b/tests/bytecode/pylib-tests/modulefinder.py @@ -0,0 +1,663 @@ +"""Find modules used by a script, using introspection.""" + +import dis +import imp +import importlib.machinery +import marshal +import os +import sys +import types +import struct + +# XXX Clean up once str8's cstor matches bytes. +LOAD_CONST = bytes([dis.opname.index('LOAD_CONST')]) +IMPORT_NAME = bytes([dis.opname.index('IMPORT_NAME')]) +STORE_NAME = bytes([dis.opname.index('STORE_NAME')]) +STORE_GLOBAL = bytes([dis.opname.index('STORE_GLOBAL')]) +STORE_OPS = [STORE_NAME, STORE_GLOBAL] +HAVE_ARGUMENT = bytes([dis.HAVE_ARGUMENT]) + +# Modulefinder does a good job at simulating Python's, but it can not +# handle __path__ modifications packages make at runtime. Therefore there +# is a mechanism whereby you can register extra paths in this map for a +# package, and it will be honored. + +# Note this is a mapping is lists of paths. +packagePathMap = {} + +# A Public interface +def AddPackagePath(packagename, path): + packagePathMap.setdefault(packagename, []).append(path) + +replacePackageMap = {} + +# This ReplacePackage mechanism allows modulefinder to work around +# situations in which a package injects itself under the name +# of another package into sys.modules at runtime by calling +# ReplacePackage("real_package_name", "faked_package_name") +# before running ModuleFinder. + +def ReplacePackage(oldname, newname): + replacePackageMap[oldname] = newname + + +class Module: + + def __init__(self, name, file=None, path=None): + self.__name__ = name + self.__file__ = file + self.__path__ = path + self.__code__ = None + # The set of global names that are assigned to in the module. + # This includes those names imported through starimports of + # Python modules. + self.globalnames = {} + # The set of starimports this module did that could not be + # resolved, ie. a starimport from a non-Python module. + self.starimports = {} + + def __repr__(self): + s = "Module(%r" % (self.__name__,) + if self.__file__ is not None: + s = s + ", %r" % (self.__file__,) + if self.__path__ is not None: + s = s + ", %r" % (self.__path__,) + s = s + ")" + return s + +class ModuleFinder: + + def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]): + if path is None: + path = sys.path + self.path = path + self.modules = {} + self.badmodules = {} + self.debug = debug + self.indent = 0 + self.excludes = excludes + self.replace_paths = replace_paths + self.processed_paths = [] # Used in debugging only + + def msg(self, level, str, *args): + if level <= self.debug: + for i in range(self.indent): + print(" ", end=' ') + print(str, end=' ') + for arg in args: + print(repr(arg), end=' ') + print() + + def msgin(self, *args): + level = args[0] + if level <= self.debug: + self.indent = self.indent + 1 + self.msg(*args) + + def msgout(self, *args): + level = args[0] + if level <= self.debug: + self.indent = self.indent - 1 + self.msg(*args) + + def run_script(self, pathname): + self.msg(2, "run_script", pathname) + with open(pathname) as fp: + stuff = ("", "r", imp.PY_SOURCE) + self.load_module('__main__', fp, pathname, stuff) + + def load_file(self, pathname): + dir, name = os.path.split(pathname) + name, ext = os.path.splitext(name) + with open(pathname) as fp: + stuff = (ext, "r", imp.PY_SOURCE) + self.load_module(name, fp, pathname, stuff) + + def import_hook(self, name, caller=None, fromlist=None, level=-1): + self.msg(3, "import_hook", name, caller, fromlist, level) + parent = self.determine_parent(caller, level=level) + q, tail = self.find_head_package(parent, name) + m = self.load_tail(q, tail) + if not fromlist: + return q + if m.__path__: + self.ensure_fromlist(m, fromlist) + return None + + def determine_parent(self, caller, level=-1): + self.msgin(4, "determine_parent", caller, level) + if not caller or level == 0: + self.msgout(4, "determine_parent -> None") + return None + pname = caller.__name__ + if level >= 1: # relative import + if caller.__path__: + level -= 1 + if level == 0: + parent = self.modules[pname] + assert parent is caller + self.msgout(4, "determine_parent ->", parent) + return parent + if pname.count(".") < level: + raise ImportError("relative importpath too deep") + pname = ".".join(pname.split(".")[:-level]) + parent = self.modules[pname] + self.msgout(4, "determine_parent ->", parent) + return parent + if caller.__path__: + parent = self.modules[pname] + assert caller is parent + self.msgout(4, "determine_parent ->", parent) + return parent + if '.' in pname: + i = pname.rfind('.') + pname = pname[:i] + parent = self.modules[pname] + assert parent.__name__ == pname + self.msgout(4, "determine_parent ->", parent) + return parent + self.msgout(4, "determine_parent -> None") + return None + + def find_head_package(self, parent, name): + self.msgin(4, "find_head_package", parent, name) + if '.' in name: + i = name.find('.') + head = name[:i] + tail = name[i+1:] + else: + head = name + tail = "" + if parent: + qname = "%s.%s" % (parent.__name__, head) + else: + qname = head + q = self.import_module(head, qname, parent) + if q: + self.msgout(4, "find_head_package ->", (q, tail)) + return q, tail + if parent: + qname = head + parent = None + q = self.import_module(head, qname, parent) + if q: + self.msgout(4, "find_head_package ->", (q, tail)) + return q, tail + self.msgout(4, "raise ImportError: No module named", qname) + raise ImportError("No module named " + qname) + + def load_tail(self, q, tail): + self.msgin(4, "load_tail", q, tail) + m = q + while tail: + i = tail.find('.') + if i < 0: i = len(tail) + head, tail = tail[:i], tail[i+1:] + mname = "%s.%s" % (m.__name__, head) + m = self.import_module(head, mname, m) + if not m: + self.msgout(4, "raise ImportError: No module named", mname) + raise ImportError("No module named " + mname) + self.msgout(4, "load_tail ->", m) + return m + + def ensure_fromlist(self, m, fromlist, recursive=0): + self.msg(4, "ensure_fromlist", m, fromlist, recursive) + for sub in fromlist: + if sub == "*": + if not recursive: + all = self.find_all_submodules(m) + if all: + self.ensure_fromlist(m, all, 1) + elif not hasattr(m, sub): + subname = "%s.%s" % (m.__name__, sub) + submod = self.import_module(sub, subname, m) + if not submod: + raise ImportError("No module named " + subname) + + def find_all_submodules(self, m): + if not m.__path__: + return + modules = {} + # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"]. + # But we must also collect Python extension modules - although + # we cannot separate normal dlls from Python extensions. + suffixes = [] + suffixes += importlib.machinery.EXTENSION_SUFFIXES[:] + suffixes += importlib.machinery.SOURCE_SUFFIXES[:] + suffixes += importlib.machinery.BYTECODE_SUFFIXES[:] + for dir in m.__path__: + try: + names = os.listdir(dir) + except os.error: + self.msg(2, "can't list directory", dir) + continue + for name in names: + mod = None + for suff in suffixes: + n = len(suff) + if name[-n:] == suff: + mod = name[:-n] + break + if mod and mod != "__init__": + modules[mod] = mod + return modules.keys() + + def import_module(self, partname, fqname, parent): + self.msgin(3, "import_module", partname, fqname, parent) + try: + m = self.modules[fqname] + except KeyError: + pass + else: + self.msgout(3, "import_module ->", m) + return m + if fqname in self.badmodules: + self.msgout(3, "import_module -> None") + return None + if parent and parent.__path__ is None: + self.msgout(3, "import_module -> None") + return None + try: + fp, pathname, stuff = self.find_module(partname, + parent and parent.__path__, parent) + except ImportError: + self.msgout(3, "import_module ->", None) + return None + try: + m = self.load_module(fqname, fp, pathname, stuff) + finally: + if fp: + fp.close() + if parent: + setattr(parent, partname, m) + self.msgout(3, "import_module ->", m) + return m + + def load_module(self, fqname, fp, pathname, file_info): + suffix, mode, type = file_info + self.msgin(2, "load_module", fqname, fp and "fp", pathname) + if type == imp.PKG_DIRECTORY: + m = self.load_package(fqname, pathname) + self.msgout(2, "load_module ->", m) + return m + if type == imp.PY_SOURCE: + co = compile(fp.read()+'\n', pathname, 'exec') + elif type == imp.PY_COMPILED: + if fp.read(4) != imp.get_magic(): + self.msgout(2, "raise ImportError: Bad magic number", pathname) + raise ImportError("Bad magic number in %s" % pathname) + fp.read(4) + co = marshal.load(fp) + else: + co = None + m = self.add_module(fqname) + m.__file__ = pathname + if co: + if self.replace_paths: + co = self.replace_paths_in_code(co) + m.__code__ = co + self.scan_code(co, m) + self.msgout(2, "load_module ->", m) + return m + + def _add_badmodule(self, name, caller): + if name not in self.badmodules: + self.badmodules[name] = {} + if caller: + self.badmodules[name][caller.__name__] = 1 + else: + self.badmodules[name]["-"] = 1 + + def _safe_import_hook(self, name, caller, fromlist, level=-1): + # wrapper for self.import_hook() that won't raise ImportError + if name in self.badmodules: + self._add_badmodule(name, caller) + return + try: + self.import_hook(name, caller, level=level) + except ImportError as msg: + self.msg(2, "ImportError:", str(msg)) + self._add_badmodule(name, caller) + else: + if fromlist: + for sub in fromlist: + if sub in self.badmodules: + self._add_badmodule(sub, caller) + continue + try: + self.import_hook(name, caller, [sub], level=level) + except ImportError as msg: + self.msg(2, "ImportError:", str(msg)) + fullname = name + "." + sub + self._add_badmodule(fullname, caller) + + def scan_opcodes(self, co, + unpack = struct.unpack): + # Scan the code, and yield 'interesting' opcode combinations + # Version for Python 2.4 and older + code = co.co_code + names = co.co_names + consts = co.co_consts + while code: + c = code[0] + if c in STORE_OPS: + oparg, = unpack('<H', code[1:3]) + yield "store", (names[oparg],) + code = code[3:] + continue + if c == LOAD_CONST and code[3] == IMPORT_NAME: + oparg_1, oparg_2 = unpack('<xHxH', code[:6]) + yield "import", (consts[oparg_1], names[oparg_2]) + code = code[6:] + continue + if c >= HAVE_ARGUMENT: + code = code[3:] + else: + code = code[1:] + + def scan_opcodes_25(self, co, + unpack = struct.unpack): + # Scan the code, and yield 'interesting' opcode combinations + # Python 2.5 version (has absolute and relative imports) + code = co.co_code + names = co.co_names + consts = co.co_consts + LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME + while code: + c = bytes([code[0]]) + if c in STORE_OPS: + oparg, = unpack('<H', code[1:3]) + yield "store", (names[oparg],) + code = code[3:] + continue + if code[:9:3] == LOAD_LOAD_AND_IMPORT: + oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9]) + level = consts[oparg_1] + if level == 0: # absolute import + yield "absolute_import", (consts[oparg_2], names[oparg_3]) + else: # relative import + yield "relative_import", (level, consts[oparg_2], names[oparg_3]) + code = code[9:] + continue + if c >= HAVE_ARGUMENT: + code = code[3:] + else: + code = code[1:] + + def scan_code(self, co, m): + code = co.co_code + if sys.version_info >= (2, 5): + scanner = self.scan_opcodes_25 + else: + scanner = self.scan_opcodes + for what, args in scanner(co): + if what == "store": + name, = args + m.globalnames[name] = 1 + elif what == "absolute_import": + fromlist, name = args + have_star = 0 + if fromlist is not None: + if "*" in fromlist: + have_star = 1 + fromlist = [f for f in fromlist if f != "*"] + self._safe_import_hook(name, m, fromlist, level=0) + if have_star: + # We've encountered an "import *". If it is a Python module, + # the code has already been parsed and we can suck out the + # global names. + mm = None + if m.__path__: + # At this point we don't know whether 'name' is a + # submodule of 'm' or a global module. Let's just try + # the full name first. + mm = self.modules.get(m.__name__ + "." + name) + if mm is None: + mm = self.modules.get(name) + if mm is not None: + m.globalnames.update(mm.globalnames) + m.starimports.update(mm.starimports) + if mm.__code__ is None: + m.starimports[name] = 1 + else: + m.starimports[name] = 1 + elif what == "relative_import": + level, fromlist, name = args + if name: + self._safe_import_hook(name, m, fromlist, level=level) + else: + parent = self.determine_parent(m, level=level) + self._safe_import_hook(parent.__name__, None, fromlist, level=0) + else: + # We don't expect anything else from the generator. + raise RuntimeError(what) + + for c in co.co_consts: + if isinstance(c, type(co)): + self.scan_code(c, m) + + def load_package(self, fqname, pathname): + self.msgin(2, "load_package", fqname, pathname) + newname = replacePackageMap.get(fqname) + if newname: + fqname = newname + m = self.add_module(fqname) + m.__file__ = pathname + m.__path__ = [pathname] + + # As per comment at top of file, simulate runtime __path__ additions. + m.__path__ = m.__path__ + packagePathMap.get(fqname, []) + + fp, buf, stuff = self.find_module("__init__", m.__path__) + try: + self.load_module(fqname, fp, buf, stuff) + self.msgout(2, "load_package ->", m) + return m + finally: + if fp: + fp.close() + + def add_module(self, fqname): + if fqname in self.modules: + return self.modules[fqname] + self.modules[fqname] = m = Module(fqname) + return m + + def find_module(self, name, path, parent=None): + if parent is not None: + # assert path is not None + fullname = parent.__name__+'.'+name + else: + fullname = name + if fullname in self.excludes: + self.msgout(3, "find_module -> Excluded", fullname) + raise ImportError(name) + + if path is None: + if name in sys.builtin_module_names: + return (None, None, ("", "", imp.C_BUILTIN)) + + path = self.path + return imp.find_module(name, path) + + def report(self): + """Print a report to stdout, listing the found modules with their + paths, as well as modules that are missing, or seem to be missing. + """ + print() + print(" %-25s %s" % ("Name", "File")) + print(" %-25s %s" % ("----", "----")) + # Print modules found + keys = sorted(self.modules.keys()) + for key in keys: + m = self.modules[key] + if m.__path__: + print("P", end=' ') + else: + print("m", end=' ') + print("%-25s" % key, m.__file__ or "") + + # Print missing modules + missing, maybe = self.any_missing_maybe() + if missing: + print() + print("Missing modules:") + for name in missing: + mods = sorted(self.badmodules[name].keys()) + print("?", name, "imported from", ', '.join(mods)) + # Print modules that may be missing, but then again, maybe not... + if maybe: + print() + print("Submodules thay appear to be missing, but could also be", end=' ') + print("global names in the parent package:") + for name in maybe: + mods = sorted(self.badmodules[name].keys()) + print("?", name, "imported from", ', '.join(mods)) + + def any_missing(self): + """Return a list of modules that appear to be missing. Use + any_missing_maybe() if you want to know which modules are + certain to be missing, and which *may* be missing. + """ + missing, maybe = self.any_missing_maybe() + return missing + maybe + + def any_missing_maybe(self): + """Return two lists, one with modules that are certainly missing + and one with modules that *may* be missing. The latter names could + either be submodules *or* just global names in the package. + + The reason it can't always be determined is that it's impossible to + tell which names are imported when "from module import *" is done + with an extension module, short of actually importing it. + """ + missing = [] + maybe = [] + for name in self.badmodules: + if name in self.excludes: + continue + i = name.rfind(".") + if i < 0: + missing.append(name) + continue + subname = name[i+1:] + pkgname = name[:i] + pkg = self.modules.get(pkgname) + if pkg is not None: + if pkgname in self.badmodules[name]: + # The package tried to import this module itself and + # failed. It's definitely missing. + missing.append(name) + elif subname in pkg.globalnames: + # It's a global in the package: definitely not missing. + pass + elif pkg.starimports: + # It could be missing, but the package did an "import *" + # from a non-Python module, so we simply can't be sure. + maybe.append(name) + else: + # It's not a global in the package, the package didn't + # do funny star imports, it's very likely to be missing. + # The symbol could be inserted into the package from the + # outside, but since that's not good style we simply list + # it missing. + missing.append(name) + else: + missing.append(name) + missing.sort() + maybe.sort() + return missing, maybe + + def replace_paths_in_code(self, co): + new_filename = original_filename = os.path.normpath(co.co_filename) + for f, r in self.replace_paths: + if original_filename.startswith(f): + new_filename = r + original_filename[len(f):] + break + + if self.debug and original_filename not in self.processed_paths: + if new_filename != original_filename: + self.msgout(2, "co_filename %r changed to %r" \ + % (original_filename,new_filename,)) + else: + self.msgout(2, "co_filename %r remains unchanged" \ + % (original_filename,)) + self.processed_paths.append(original_filename) + + consts = list(co.co_consts) + for i in range(len(consts)): + if isinstance(consts[i], type(co)): + consts[i] = self.replace_paths_in_code(consts[i]) + + return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize, + co.co_flags, co.co_code, tuple(consts), co.co_names, + co.co_varnames, new_filename, co.co_name, + co.co_firstlineno, co.co_lnotab, + co.co_freevars, co.co_cellvars) + + +def test(): + # Parse command line + import getopt + try: + opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") + except getopt.error as msg: + print(msg) + return + + # Process options + debug = 1 + domods = 0 + addpath = [] + exclude = [] + for o, a in opts: + if o == '-d': + debug = debug + 1 + if o == '-m': + domods = 1 + if o == '-p': + addpath = addpath + a.split(os.pathsep) + if o == '-q': + debug = 0 + if o == '-x': + exclude.append(a) + + # Provide default arguments + if not args: + script = "hello.py" + else: + script = args[0] + + # Set the path based on sys.path and the script directory + path = sys.path[:] + path[0] = os.path.dirname(script) + path = addpath + path + if debug > 1: + print("path:") + for item in path: + print(" ", repr(item)) + + # Create the module finder and turn its crank + mf = ModuleFinder(path, debug, exclude) + for arg in args[1:]: + if arg == '-m': + domods = 1 + continue + if domods: + if arg[-2:] == '.*': + mf.import_hook(arg[:-2], None, ["*"]) + else: + mf.import_hook(arg) + else: + mf.load_file(arg) + mf.run_script(script) + mf.report() + return mf # for -i debugging + + +if __name__ == '__main__': + try: + mf = test() + except KeyboardInterrupt: + print("\n[interrupted]") diff --git a/tests/bytecode/pylib-tests/nturl2path.py b/tests/bytecode/pylib-tests/nturl2path.py new file mode 100644 index 0000000000..e0c2f23527 --- /dev/null +++ b/tests/bytecode/pylib-tests/nturl2path.py @@ -0,0 +1,66 @@ +"""Convert a NT pathname to a file URL and vice versa.""" + +def url2pathname(url): + """OS-specific conversion from a relative URL of the 'file' scheme + to a file system path; not recommended for general use.""" + # e.g. + # ///C|/foo/bar/spam.foo + # becomes + # C:\foo\bar\spam.foo + import string, urllib.parse + # Windows itself uses ":" even in URLs. + url = url.replace(':', '|') + if '|' not in url: + # No drive specifier, just convert slashes + if url[:4] == '////': + # path is something like ////host/path/on/remote/host + # convert this to \\host\path\on\remote\host + # (notice halving of slashes at the start of the path) + url = url[2:] + components = url.split('/') + # make sure not to convert quoted slashes :-) + return urllib.parse.unquote('\\'.join(components)) + comp = url.split('|') + if len(comp) != 2 or comp[0][-1] not in string.ascii_letters: + error = 'Bad URL: ' + url + raise IOError(error) + drive = comp[0][-1].upper() + components = comp[1].split('/') + path = drive + ':' + for comp in components: + if comp: + path = path + '\\' + urllib.parse.unquote(comp) + # Issue #11474 - handing url such as |c/| + if path.endswith(':') and url.endswith('/'): + path += '\\' + return path + +def pathname2url(p): + """OS-specific conversion from a file system path to a relative URL + of the 'file' scheme; not recommended for general use.""" + # e.g. + # C:\foo\bar\spam.foo + # becomes + # ///C|/foo/bar/spam.foo + import urllib.parse + if ':' not in p: + # No drive specifier, just convert slashes and quote the name + if p[:2] == '\\\\': + # path is something like \\host\path\on\remote\host + # convert this to ////host/path/on/remote/host + # (notice doubling of slashes at the start of the path) + p = '\\\\' + p + components = p.split('\\') + return urllib.parse.quote('/'.join(components)) + comp = p.split(':') + if len(comp) != 2 or len(comp[0]) > 1: + error = 'Bad path: ' + p + raise IOError(error) + + drive = urllib.parse.quote(comp[0].upper()) + components = comp[1].split('\\') + path = '///' + drive + ':' + for comp in components: + if comp: + path = path + '/' + urllib.parse.quote(comp) + return path diff --git a/tests/bytecode/pylib-tests/opcode.py b/tests/bytecode/pylib-tests/opcode.py new file mode 100644 index 0000000000..d81b6bc3c9 --- /dev/null +++ b/tests/bytecode/pylib-tests/opcode.py @@ -0,0 +1,185 @@ + +""" +opcode module - potentially shared between dis and other modules which +operate on bytecodes (e.g. peephole optimizers). +""" + +__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs", + "haslocal", "hascompare", "hasfree", "opname", "opmap", + "HAVE_ARGUMENT", "EXTENDED_ARG", "hasnargs"] + +#cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is', +# 'is not', 'exception match', 'BAD') + +hasconst = [] +hasname = [] +hasjrel = [] +hasjabs = [] +haslocal = [] +hascompare = [] +hasfree = [] +hasnargs = [] + +opmap = {} +opname = [''] * 256 +for op in range(256): opname[op] = '<%r>' % (op,) +del op + +def def_op(name, op): + opname[op] = name + opmap[name] = op + +def name_op(name, op): + def_op(name, op) + hasname.append(op) + +def jrel_op(name, op): + def_op(name, op) + hasjrel.append(op) + +def jabs_op(name, op): + def_op(name, op) + hasjabs.append(op) + +# Instruction opcodes for compiled code +# Blank lines correspond to available opcodes + +def_op('POP_TOP', 1) +def_op('ROT_TWO', 2) +def_op('ROT_THREE', 3) +def_op('DUP_TOP', 4) +def_op('DUP_TOP_TWO', 5) + +def_op('NOP', 9) +def_op('UNARY_POSITIVE', 10) +def_op('UNARY_NEGATIVE', 11) +def_op('UNARY_NOT', 12) + +def_op('UNARY_INVERT', 15) + +def_op('BINARY_POWER', 19) +def_op('BINARY_MULTIPLY', 20) + +def_op('BINARY_MODULO', 22) +def_op('BINARY_ADD', 23) +def_op('BINARY_SUBTRACT', 24) +def_op('BINARY_SUBSCR', 25) +def_op('BINARY_FLOOR_DIVIDE', 26) +def_op('BINARY_TRUE_DIVIDE', 27) +def_op('INPLACE_FLOOR_DIVIDE', 28) +def_op('INPLACE_TRUE_DIVIDE', 29) + +def_op('STORE_MAP', 54) +def_op('INPLACE_ADD', 55) +def_op('INPLACE_SUBTRACT', 56) +def_op('INPLACE_MULTIPLY', 57) + +def_op('INPLACE_MODULO', 59) +def_op('STORE_SUBSCR', 60) +def_op('DELETE_SUBSCR', 61) +def_op('BINARY_LSHIFT', 62) +def_op('BINARY_RSHIFT', 63) +def_op('BINARY_AND', 64) +def_op('BINARY_XOR', 65) +def_op('BINARY_OR', 66) +def_op('INPLACE_POWER', 67) +def_op('GET_ITER', 68) +def_op('STORE_LOCALS', 69) + +def_op('PRINT_EXPR', 70) +def_op('LOAD_BUILD_CLASS', 71) +def_op('YIELD_FROM', 72) + +def_op('INPLACE_LSHIFT', 75) +def_op('INPLACE_RSHIFT', 76) +def_op('INPLACE_AND', 77) +def_op('INPLACE_XOR', 78) +def_op('INPLACE_OR', 79) +def_op('BREAK_LOOP', 80) +def_op('WITH_CLEANUP', 81) + +def_op('RETURN_VALUE', 83) +def_op('IMPORT_STAR', 84) + +def_op('YIELD_VALUE', 86) +def_op('POP_BLOCK', 87) +def_op('END_FINALLY', 88) +def_op('POP_EXCEPT', 89) + +HAVE_ARGUMENT = 90 # Opcodes from here have an argument: + +name_op('STORE_NAME', 90) # Index in name list +name_op('DELETE_NAME', 91) # "" +def_op('UNPACK_SEQUENCE', 92) # Number of tuple items +jrel_op('FOR_ITER', 93) +def_op('UNPACK_EX', 94) +name_op('STORE_ATTR', 95) # Index in name list +name_op('DELETE_ATTR', 96) # "" +name_op('STORE_GLOBAL', 97) # "" +name_op('DELETE_GLOBAL', 98) # "" +def_op('LOAD_CONST', 100) # Index in const list +hasconst.append(100) +name_op('LOAD_NAME', 101) # Index in name list +def_op('BUILD_TUPLE', 102) # Number of tuple items +def_op('BUILD_LIST', 103) # Number of list items +def_op('BUILD_SET', 104) # Number of set items +def_op('BUILD_MAP', 105) # Number of dict entries (upto 255) +name_op('LOAD_ATTR', 106) # Index in name list +def_op('COMPARE_OP', 107) # Comparison operator +hascompare.append(107) +name_op('IMPORT_NAME', 108) # Index in name list +name_op('IMPORT_FROM', 109) # Index in name list + +jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip +jabs_op('JUMP_IF_FALSE_OR_POP', 111) # Target byte offset from beginning of code +jabs_op('JUMP_IF_TRUE_OR_POP', 112) # "" +jabs_op('JUMP_ABSOLUTE', 113) # "" +jabs_op('POP_JUMP_IF_FALSE', 114) # "" +jabs_op('POP_JUMP_IF_TRUE', 115) # "" + +name_op('LOAD_GLOBAL', 116) # Index in name list + +jabs_op('CONTINUE_LOOP', 119) # Target address +jrel_op('SETUP_LOOP', 120) # Distance to target address +jrel_op('SETUP_EXCEPT', 121) # "" +jrel_op('SETUP_FINALLY', 122) # "" + +def_op('LOAD_FAST', 124) # Local variable number +haslocal.append(124) +def_op('STORE_FAST', 125) # Local variable number +haslocal.append(125) +def_op('DELETE_FAST', 126) # Local variable number +haslocal.append(126) + +def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3) +def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8) +hasnargs.append(131) +def_op('MAKE_FUNCTION', 132) # Number of args with default values +def_op('BUILD_SLICE', 133) # Number of items +def_op('MAKE_CLOSURE', 134) +def_op('LOAD_CLOSURE', 135) +hasfree.append(135) +def_op('LOAD_DEREF', 136) +hasfree.append(136) +def_op('STORE_DEREF', 137) +hasfree.append(137) +def_op('DELETE_DEREF', 138) +hasfree.append(138) + +def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8) +hasnargs.append(140) +def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8) +hasnargs.append(141) +def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8) +hasnargs.append(142) + +jrel_op('SETUP_WITH', 143) + +def_op('LIST_APPEND', 145) +def_op('SET_ADD', 146) +def_op('MAP_ADD', 147) + +def_op('EXTENDED_ARG', 144) +EXTENDED_ARG = 144 + +del def_op, name_op, jrel_op, jabs_op diff --git a/tests/bytecode/pylib-tests/pipes.py b/tests/bytecode/pylib-tests/pipes.py new file mode 100644 index 0000000000..f1a16f63de --- /dev/null +++ b/tests/bytecode/pylib-tests/pipes.py @@ -0,0 +1,247 @@ +"""Conversion pipeline templates. + +The problem: +------------ + +Suppose you have some data that you want to convert to another format, +such as from GIF image format to PPM image format. Maybe the +conversion involves several steps (e.g. piping it through compress or +uuencode). Some of the conversion steps may require that their input +is a disk file, others may be able to read standard input; similar for +their output. The input to the entire conversion may also be read +from a disk file or from an open file, and similar for its output. + +The module lets you construct a pipeline template by sticking one or +more conversion steps together. It will take care of creating and +removing temporary files if they are necessary to hold intermediate +data. You can then use the template to do conversions from many +different sources to many different destinations. The temporary +file names used are different each time the template is used. + +The templates are objects so you can create templates for many +different conversion steps and store them in a dictionary, for +instance. + + +Directions: +----------- + +To create a template: + t = Template() + +To add a conversion step to a template: + t.append(command, kind) +where kind is a string of two characters: the first is '-' if the +command reads its standard input or 'f' if it requires a file; the +second likewise for the output. The command must be valid /bin/sh +syntax. If input or output files are required, they are passed as +$IN and $OUT; otherwise, it must be possible to use the command in +a pipeline. + +To add a conversion step at the beginning: + t.prepend(command, kind) + +To convert a file to another file using a template: + sts = t.copy(infile, outfile) +If infile or outfile are the empty string, standard input is read or +standard output is written, respectively. The return value is the +exit status of the conversion pipeline. + +To open a file for reading or writing through a conversion pipeline: + fp = t.open(file, mode) +where mode is 'r' to read the file, or 'w' to write it -- just like +for the built-in function open() or for os.popen(). + +To create a new template object initialized to a given one: + t2 = t.clone() +""" # ' + + +import re +import os +import tempfile +# we import the quote function rather than the module for backward compat +# (quote used to be an undocumented but used function in pipes) +from shlex import quote + +__all__ = ["Template"] + +# Conversion step kinds + +FILEIN_FILEOUT = 'ff' # Must read & write real files +STDIN_FILEOUT = '-f' # Must write a real file +FILEIN_STDOUT = 'f-' # Must read a real file +STDIN_STDOUT = '--' # Normal pipeline element +SOURCE = '.-' # Must be first, writes stdout +SINK = '-.' # Must be last, reads stdin + +stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ + SOURCE, SINK] + + +class Template: + """Class representing a pipeline template.""" + + def __init__(self): + """Template() returns a fresh pipeline template.""" + self.debugging = 0 + self.reset() + + def __repr__(self): + """t.__repr__() implements repr(t).""" + return '<Template instance, steps=%r>' % (self.steps,) + + def reset(self): + """t.reset() restores a pipeline template to its initial state.""" + self.steps = [] + + def clone(self): + """t.clone() returns a new pipeline template with identical + initial state as the current one.""" + t = Template() + t.steps = self.steps[:] + t.debugging = self.debugging + return t + + def debug(self, flag): + """t.debug(flag) turns debugging on or off.""" + self.debugging = flag + + def append(self, cmd, kind): + """t.append(cmd, kind) adds a new step at the end.""" + if type(cmd) is not type(''): + raise TypeError('Template.append: cmd must be a string') + if kind not in stepkinds: + raise ValueError('Template.append: bad kind %r' % (kind,)) + if kind == SOURCE: + raise ValueError('Template.append: SOURCE can only be prepended') + if self.steps and self.steps[-1][1] == SINK: + raise ValueError('Template.append: already ends with SINK') + if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): + raise ValueError('Template.append: missing $IN in cmd') + if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): + raise ValueError('Template.append: missing $OUT in cmd') + self.steps.append((cmd, kind)) + + def prepend(self, cmd, kind): + """t.prepend(cmd, kind) adds a new step at the front.""" + if type(cmd) is not type(''): + raise TypeError('Template.prepend: cmd must be a string') + if kind not in stepkinds: + raise ValueError('Template.prepend: bad kind %r' % (kind,)) + if kind == SINK: + raise ValueError('Template.prepend: SINK can only be appended') + if self.steps and self.steps[0][1] == SOURCE: + raise ValueError('Template.prepend: already begins with SOURCE') + if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): + raise ValueError('Template.prepend: missing $IN in cmd') + if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): + raise ValueError('Template.prepend: missing $OUT in cmd') + self.steps.insert(0, (cmd, kind)) + + def open(self, file, rw): + """t.open(file, rw) returns a pipe or file object open for + reading or writing; the file is the other end of the pipeline.""" + if rw == 'r': + return self.open_r(file) + if rw == 'w': + return self.open_w(file) + raise ValueError('Template.open: rw must be \'r\' or \'w\', not %r' + % (rw,)) + + def open_r(self, file): + """t.open_r(file) and t.open_w(file) implement + t.open(file, 'r') and t.open(file, 'w') respectively.""" + if not self.steps: + return open(file, 'r') + if self.steps[-1][1] == SINK: + raise ValueError('Template.open_r: pipeline ends width SINK') + cmd = self.makepipeline(file, '') + return os.popen(cmd, 'r') + + def open_w(self, file): + if not self.steps: + return open(file, 'w') + if self.steps[0][1] == SOURCE: + raise ValueError('Template.open_w: pipeline begins with SOURCE') + cmd = self.makepipeline('', file) + return os.popen(cmd, 'w') + + def copy(self, infile, outfile): + return os.system(self.makepipeline(infile, outfile)) + + def makepipeline(self, infile, outfile): + cmd = makepipeline(infile, self.steps, outfile) + if self.debugging: + print(cmd) + cmd = 'set -x; ' + cmd + return cmd + + +def makepipeline(infile, steps, outfile): + # Build a list with for each command: + # [input filename or '', command string, kind, output filename or ''] + + list = [] + for cmd, kind in steps: + list.append(['', cmd, kind, '']) + # + # Make sure there is at least one step + # + if not list: + list.append(['', 'cat', '--', '']) + # + # Take care of the input and output ends + # + [cmd, kind] = list[0][1:3] + if kind[0] == 'f' and not infile: + list.insert(0, ['', 'cat', '--', '']) + list[0][0] = infile + # + [cmd, kind] = list[-1][1:3] + if kind[1] == 'f' and not outfile: + list.append(['', 'cat', '--', '']) + list[-1][-1] = outfile + # + # Invent temporary files to connect stages that need files + # + garbage = [] + for i in range(1, len(list)): + lkind = list[i-1][2] + rkind = list[i][2] + if lkind[1] == 'f' or rkind[0] == 'f': + (fd, temp) = tempfile.mkstemp() + os.close(fd) + garbage.append(temp) + list[i-1][-1] = list[i][0] = temp + # + for item in list: + [inf, cmd, kind, outf] = item + if kind[1] == 'f': + cmd = 'OUT=' + quote(outf) + '; ' + cmd + if kind[0] == 'f': + cmd = 'IN=' + quote(inf) + '; ' + cmd + if kind[0] == '-' and inf: + cmd = cmd + ' <' + quote(inf) + if kind[1] == '-' and outf: + cmd = cmd + ' >' + quote(outf) + item[1] = cmd + # + cmdlist = list[0][1] + for item in list[1:]: + [cmd, kind] = item[1:3] + if item[0] == '': + if 'f' in kind: + cmd = '{ ' + cmd + '; }' + cmdlist = cmdlist + ' |\n' + cmd + else: + cmdlist = cmdlist + '\n' + cmd + # + if garbage: + rmcmd = 'rm -f' + for file in garbage: + rmcmd = rmcmd + ' ' + quote(file) + trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15' + cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd + # + return cmdlist diff --git a/tests/bytecode/pylib-tests/poplib.py b/tests/bytecode/pylib-tests/poplib.py new file mode 100644 index 0000000000..d42d9dd320 --- /dev/null +++ b/tests/bytecode/pylib-tests/poplib.py @@ -0,0 +1,374 @@ +"""A POP3 client class. + +Based on the J. Myers POP3 draft, Jan. 96 +""" + +# Author: David Ascher <david_ascher@brown.edu> +# [heavily stealing from nntplib.py] +# Updated: Piers Lauder <piers@cs.su.oz.au> [Jul '97] +# String method conversion and test jig improvements by ESR, February 2001. +# Added the POP3_SSL class. Methods loosely based on IMAP_SSL. Hector Urtubia <urtubia@mrbook.org> Aug 2003 + +# Example (see the test function at the end of this file) + +# Imports + +import re, socket + +__all__ = ["POP3","error_proto"] + +# Exception raised when an error or invalid response is received: + +class error_proto(Exception): pass + +# Standard Port +POP3_PORT = 110 + +# POP SSL PORT +POP3_SSL_PORT = 995 + +# Line terminators (we always output CRLF, but accept any of CRLF, LFCR, LF) +CR = b'\r' +LF = b'\n' +CRLF = CR+LF + + +class POP3: + + """This class supports both the minimal and optional command sets. + Arguments can be strings or integers (where appropriate) + (e.g.: retr(1) and retr('1') both work equally well. + + Minimal Command Set: + USER name user(name) + PASS string pass_(string) + STAT stat() + LIST [msg] list(msg = None) + RETR msg retr(msg) + DELE msg dele(msg) + NOOP noop() + RSET rset() + QUIT quit() + + Optional Commands (some servers support these): + RPOP name rpop(name) + APOP name digest apop(name, digest) + TOP msg n top(msg, n) + UIDL [msg] uidl(msg = None) + + Raises one exception: 'error_proto'. + + Instantiate with: + POP3(hostname, port=110) + + NB: the POP protocol locks the mailbox from user + authorization until QUIT, so be sure to get in, suck + the messages, and quit, each time you access the + mailbox. + + POP is a line-based protocol, which means large mail + messages consume lots of python cycles reading them + line-by-line. + + If it's available on your mail server, use IMAP4 + instead, it doesn't suffer from the two problems + above. + """ + + encoding = 'UTF-8' + + def __init__(self, host, port=POP3_PORT, + timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + self.host = host + self.port = port + self.sock = self._create_socket(timeout) + self.file = self.sock.makefile('rb') + self._debugging = 0 + self.welcome = self._getresp() + + def _create_socket(self, timeout): + return socket.create_connection((self.host, self.port), timeout) + + def _putline(self, line): + if self._debugging > 1: print('*put*', repr(line)) + self.sock.sendall(line + CRLF) + + + # Internal: send one command to the server (through _putline()) + + def _putcmd(self, line): + if self._debugging: print('*cmd*', repr(line)) + line = bytes(line, self.encoding) + self._putline(line) + + + # Internal: return one line from the server, stripping CRLF. + # This is where all the CPU time of this module is consumed. + # Raise error_proto('-ERR EOF') if the connection is closed. + + def _getline(self): + line = self.file.readline() + if self._debugging > 1: print('*get*', repr(line)) + if not line: raise error_proto('-ERR EOF') + octets = len(line) + # server can send any combination of CR & LF + # however, 'readline()' returns lines ending in LF + # so only possibilities are ...LF, ...CRLF, CR...LF + if line[-2:] == CRLF: + return line[:-2], octets + if line[0] == CR: + return line[1:-1], octets + return line[:-1], octets + + + # Internal: get a response from the server. + # Raise 'error_proto' if the response doesn't start with '+'. + + def _getresp(self): + resp, o = self._getline() + if self._debugging > 1: print('*resp*', repr(resp)) + if not resp.startswith(b'+'): + raise error_proto(resp) + return resp + + + # Internal: get a response plus following text from the server. + + def _getlongresp(self): + resp = self._getresp() + list = []; octets = 0 + line, o = self._getline() + while line != b'.': + if line.startswith(b'..'): + o = o-1 + line = line[1:] + octets = octets + o + list.append(line) + line, o = self._getline() + return resp, list, octets + + + # Internal: send a command and get the response + + def _shortcmd(self, line): + self._putcmd(line) + return self._getresp() + + + # Internal: send a command and get the response plus following text + + def _longcmd(self, line): + self._putcmd(line) + return self._getlongresp() + + + # These can be useful: + + def getwelcome(self): + return self.welcome + + + def set_debuglevel(self, level): + self._debugging = level + + + # Here are all the POP commands: + + def user(self, user): + """Send user name, return response + + (should indicate password required). + """ + return self._shortcmd('USER %s' % user) + + + def pass_(self, pswd): + """Send password, return response + + (response includes message count, mailbox size). + + NB: mailbox is locked by server from here to 'quit()' + """ + return self._shortcmd('PASS %s' % pswd) + + + def stat(self): + """Get mailbox status. + + Result is tuple of 2 ints (message count, mailbox size) + """ + retval = self._shortcmd('STAT') + rets = retval.split() + if self._debugging: print('*stat*', repr(rets)) + numMessages = int(rets[1]) + sizeMessages = int(rets[2]) + return (numMessages, sizeMessages) + + + def list(self, which=None): + """Request listing, return result. + + Result without a message number argument is in form + ['response', ['mesg_num octets', ...], octets]. + + Result when a message number argument is given is a + single response: the "scan listing" for that message. + """ + if which is not None: + return self._shortcmd('LIST %s' % which) + return self._longcmd('LIST') + + + def retr(self, which): + """Retrieve whole message number 'which'. + + Result is in form ['response', ['line', ...], octets]. + """ + return self._longcmd('RETR %s' % which) + + + def dele(self, which): + """Delete message number 'which'. + + Result is 'response'. + """ + return self._shortcmd('DELE %s' % which) + + + def noop(self): + """Does nothing. + + One supposes the response indicates the server is alive. + """ + return self._shortcmd('NOOP') + + + def rset(self): + """Unmark all messages marked for deletion.""" + return self._shortcmd('RSET') + + + def quit(self): + """Signoff: commit changes on server, unlock mailbox, close connection.""" + resp = self._shortcmd('QUIT') + self.close() + return resp + + def close(self): + """Close the connection without assuming anything about it.""" + if self.file is not None: + self.file.close() + if self.sock is not None: + self.sock.close() + self.file = self.sock = None + + #__del__ = quit + + + # optional commands: + + def rpop(self, user): + """Not sure what this does.""" + return self._shortcmd('RPOP %s' % user) + + + timestamp = re.compile(br'\+OK.*(<[^>]+>)') + + def apop(self, user, password): + """Authorisation + + - only possible if server has supplied a timestamp in initial greeting. + + Args: + user - mailbox user; + password - mailbox password. + + NB: mailbox is locked by server from here to 'quit()' + """ + secret = bytes(password, self.encoding) + m = self.timestamp.match(self.welcome) + if not m: + raise error_proto('-ERR APOP not supported by server') + import hashlib + digest = m.group(1)+secret + digest = hashlib.md5(digest).hexdigest() + return self._shortcmd('APOP %s %s' % (user, digest)) + + + def top(self, which, howmuch): + """Retrieve message header of message number 'which' + and first 'howmuch' lines of message body. + + Result is in form ['response', ['line', ...], octets]. + """ + return self._longcmd('TOP %s %s' % (which, howmuch)) + + + def uidl(self, which=None): + """Return message digest (unique id) list. + + If 'which', result contains unique id for that message + in the form 'response mesgnum uid', otherwise result is + the list ['response', ['mesgnum uid', ...], octets] + """ + if which is not None: + return self._shortcmd('UIDL %s' % which) + return self._longcmd('UIDL') + +try: + import ssl +except ImportError: + pass +else: + + class POP3_SSL(POP3): + """POP3 client class over SSL connection + + Instantiate with: POP3_SSL(hostname, port=995, keyfile=None, certfile=None) + + hostname - the hostname of the pop3 over ssl server + port - port number + keyfile - PEM formatted file that countains your private key + certfile - PEM formatted certificate chain file + + See the methods of the parent class POP3 for more documentation. + """ + + def __init__(self, host, port=POP3_SSL_PORT, keyfile=None, certfile=None, + timeout=socket._GLOBAL_DEFAULT_TIMEOUT, context=None): + if context is not None and keyfile is not None: + raise ValueError("context and keyfile arguments are mutually " + "exclusive") + if context is not None and certfile is not None: + raise ValueError("context and certfile arguments are mutually " + "exclusive") + self.keyfile = keyfile + self.certfile = certfile + self.context = context + POP3.__init__(self, host, port, timeout) + + def _create_socket(self, timeout): + sock = POP3._create_socket(self, timeout) + if self.context is not None: + sock = self.context.wrap_socket(sock) + else: + sock = ssl.wrap_socket(sock, self.keyfile, self.certfile) + return sock + + __all__.append("POP3_SSL") + +if __name__ == "__main__": + import sys + a = POP3(sys.argv[1]) + print(a.getwelcome()) + a.user(sys.argv[2]) + a.pass_(sys.argv[3]) + a.list() + (numMsgs, totalSize) = a.stat() + for i in range(1, numMsgs + 1): + (header, msg, octets) = a.retr(i) + print("Message %d:" % i) + for line in msg: + print(' ' + line) + print('-----------------------') + a.quit() diff --git a/tests/bytecode/pylib-tests/pty.py b/tests/bytecode/pylib-tests/pty.py new file mode 100644 index 0000000000..3ccf619896 --- /dev/null +++ b/tests/bytecode/pylib-tests/pty.py @@ -0,0 +1,180 @@ +"""Pseudo terminal utilities.""" + +# Bugs: No signal handling. Doesn't set slave termios and window size. +# Only tested on Linux. +# See: W. Richard Stevens. 1992. Advanced Programming in the +# UNIX Environment. Chapter 19. +# Author: Steen Lumholt -- with additions by Guido. + +from select import select +import os +import tty + +__all__ = ["openpty","fork","spawn"] + +STDIN_FILENO = 0 +STDOUT_FILENO = 1 +STDERR_FILENO = 2 + +CHILD = 0 + +def openpty(): + """openpty() -> (master_fd, slave_fd) + Open a pty master/slave pair, using os.openpty() if possible.""" + + try: + return os.openpty() + except (AttributeError, OSError): + pass + master_fd, slave_name = _open_terminal() + slave_fd = slave_open(slave_name) + return master_fd, slave_fd + +def master_open(): + """master_open() -> (master_fd, slave_name) + Open a pty master and return the fd, and the filename of the slave end. + Deprecated, use openpty() instead.""" + + try: + master_fd, slave_fd = os.openpty() + except (AttributeError, OSError): + pass + else: + slave_name = os.ttyname(slave_fd) + os.close(slave_fd) + return master_fd, slave_name + + return _open_terminal() + +def _open_terminal(): + """Open pty master and return (master_fd, tty_name). + SGI and generic BSD version, for when openpty() fails.""" + try: + import sgi + except ImportError: + pass + else: + try: + tty_name, master_fd = sgi._getpty(os.O_RDWR, 0o666, 0) + except IOError as msg: + raise os.error(msg) + return master_fd, tty_name + for x in 'pqrstuvwxyzPQRST': + for y in '0123456789abcdef': + pty_name = '/dev/pty' + x + y + try: + fd = os.open(pty_name, os.O_RDWR) + except os.error: + continue + return (fd, '/dev/tty' + x + y) + raise os.error('out of pty devices') + +def slave_open(tty_name): + """slave_open(tty_name) -> slave_fd + Open the pty slave and acquire the controlling terminal, returning + opened filedescriptor. + Deprecated, use openpty() instead.""" + + result = os.open(tty_name, os.O_RDWR) + try: + from fcntl import ioctl, I_PUSH + except ImportError: + return result + try: + ioctl(result, I_PUSH, "ptem") + ioctl(result, I_PUSH, "ldterm") + except IOError: + pass + return result + +def fork(): + """fork() -> (pid, master_fd) + Fork and make the child a session leader with a controlling terminal.""" + + try: + pid, fd = os.forkpty() + except (AttributeError, OSError): + pass + else: + if pid == CHILD: + try: + os.setsid() + except OSError: + # os.forkpty() already set us session leader + pass + return pid, fd + + master_fd, slave_fd = openpty() + pid = os.fork() + if pid == CHILD: + # Establish a new session. + os.setsid() + os.close(master_fd) + + # Slave becomes stdin/stdout/stderr of child. + os.dup2(slave_fd, STDIN_FILENO) + os.dup2(slave_fd, STDOUT_FILENO) + os.dup2(slave_fd, STDERR_FILENO) + if (slave_fd > STDERR_FILENO): + os.close (slave_fd) + + # Explicitly open the tty to make it become a controlling tty. + tmp_fd = os.open(os.ttyname(STDOUT_FILENO), os.O_RDWR) + os.close(tmp_fd) + else: + os.close(slave_fd) + + # Parent and child process. + return pid, master_fd + +def _writen(fd, data): + """Write all the data to a descriptor.""" + while data: + n = os.write(fd, data) + data = data[n:] + +def _read(fd): + """Default read function.""" + return os.read(fd, 1024) + +def _copy(master_fd, master_read=_read, stdin_read=_read): + """Parent copy loop. + Copies + pty master -> standard output (master_read) + standard input -> pty master (stdin_read)""" + fds = [master_fd, STDIN_FILENO] + while True: + rfds, wfds, xfds = select(fds, [], []) + if master_fd in rfds: + data = master_read(master_fd) + if not data: # Reached EOF. + fds.remove(master_fd) + else: + os.write(STDOUT_FILENO, data) + if STDIN_FILENO in rfds: + data = stdin_read(STDIN_FILENO) + if not data: + fds.remove(STDIN_FILENO) + else: + _writen(master_fd, data) + +def spawn(argv, master_read=_read, stdin_read=_read): + """Create a spawned process.""" + if type(argv) == type(''): + argv = (argv,) + pid, master_fd = fork() + if pid == CHILD: + os.execlp(argv[0], *argv) + try: + mode = tty.tcgetattr(STDIN_FILENO) + tty.setraw(STDIN_FILENO) + restore = 1 + except tty.error: # This is the same as termios.error + restore = 0 + try: + _copy(master_fd, master_read, stdin_read) + except (IOError, OSError): + if restore: + tty.tcsetattr(STDIN_FILENO, tty.TCSAFLUSH, mode) + + os.close(master_fd) diff --git a/tests/bytecode/pylib-tests/reprlib.py b/tests/bytecode/pylib-tests/reprlib.py new file mode 100644 index 0000000000..f8033604da --- /dev/null +++ b/tests/bytecode/pylib-tests/reprlib.py @@ -0,0 +1,157 @@ +"""Redo the builtin repr() (representation) but with limits on most sizes.""" + +__all__ = ["Repr", "repr", "recursive_repr"] + +import builtins +from itertools import islice +try: + from _thread import get_ident +except ImportError: + from _dummy_thread import get_ident + +def recursive_repr(fillvalue='...'): + 'Decorator to make a repr function return fillvalue for a recursive call' + + def decorating_function(user_function): + repr_running = set() + + def wrapper(self): + key = id(self), get_ident() + if key in repr_running: + return fillvalue + repr_running.add(key) + try: + result = user_function(self) + finally: + repr_running.discard(key) + return result + + # Can't use functools.wraps() here because of bootstrap issues + wrapper.__module__ = getattr(user_function, '__module__') + wrapper.__doc__ = getattr(user_function, '__doc__') + wrapper.__name__ = getattr(user_function, '__name__') + wrapper.__annotations__ = getattr(user_function, '__annotations__', {}) + return wrapper + + return decorating_function + +class Repr: + + def __init__(self): + self.maxlevel = 6 + self.maxtuple = 6 + self.maxlist = 6 + self.maxarray = 5 + self.maxdict = 4 + self.maxset = 6 + self.maxfrozenset = 6 + self.maxdeque = 6 + self.maxstring = 30 + self.maxlong = 40 + self.maxother = 30 + + def repr(self, x): + return self.repr1(x, self.maxlevel) + + def repr1(self, x, level): + typename = type(x).__name__ + if ' ' in typename: + parts = typename.split() + typename = '_'.join(parts) + if hasattr(self, 'repr_' + typename): + return getattr(self, 'repr_' + typename)(x, level) + else: + return self.repr_instance(x, level) + + def _repr_iterable(self, x, level, left, right, maxiter, trail=''): + n = len(x) + if level <= 0 and n: + s = '...' + else: + newlevel = level - 1 + repr1 = self.repr1 + pieces = [repr1(elem, newlevel) for elem in islice(x, maxiter)] + if n > maxiter: pieces.append('...') + s = ', '.join(pieces) + if n == 1 and trail: right = trail + right + return '%s%s%s' % (left, s, right) + + def repr_tuple(self, x, level): + return self._repr_iterable(x, level, '(', ')', self.maxtuple, ',') + + def repr_list(self, x, level): + return self._repr_iterable(x, level, '[', ']', self.maxlist) + + def repr_array(self, x, level): + header = "array('%s', [" % x.typecode + return self._repr_iterable(x, level, header, '])', self.maxarray) + + def repr_set(self, x, level): + x = _possibly_sorted(x) + return self._repr_iterable(x, level, 'set([', '])', self.maxset) + + def repr_frozenset(self, x, level): + x = _possibly_sorted(x) + return self._repr_iterable(x, level, 'frozenset([', '])', + self.maxfrozenset) + + def repr_deque(self, x, level): + return self._repr_iterable(x, level, 'deque([', '])', self.maxdeque) + + def repr_dict(self, x, level): + n = len(x) + if n == 0: return '{}' + if level <= 0: return '{...}' + newlevel = level - 1 + repr1 = self.repr1 + pieces = [] + for key in islice(_possibly_sorted(x), self.maxdict): + keyrepr = repr1(key, newlevel) + valrepr = repr1(x[key], newlevel) + pieces.append('%s: %s' % (keyrepr, valrepr)) + if n > self.maxdict: pieces.append('...') + s = ', '.join(pieces) + return '{%s}' % (s,) + + def repr_str(self, x, level): + s = builtins.repr(x[:self.maxstring]) + if len(s) > self.maxstring: + i = max(0, (self.maxstring-3)//2) + j = max(0, self.maxstring-3-i) + s = builtins.repr(x[:i] + x[len(x)-j:]) + s = s[:i] + '...' + s[len(s)-j:] + return s + + def repr_int(self, x, level): + s = builtins.repr(x) # XXX Hope this isn't too slow... + if len(s) > self.maxlong: + i = max(0, (self.maxlong-3)//2) + j = max(0, self.maxlong-3-i) + s = s[:i] + '...' + s[len(s)-j:] + return s + + def repr_instance(self, x, level): + try: + s = builtins.repr(x) + # Bugs in x.__repr__() can cause arbitrary + # exceptions -- then make up something + except Exception: + return '<%s instance at %x>' % (x.__class__.__name__, id(x)) + if len(s) > self.maxother: + i = max(0, (self.maxother-3)//2) + j = max(0, self.maxother-3-i) + s = s[:i] + '...' + s[len(s)-j:] + return s + + +def _possibly_sorted(x): + # Since not all sequences of items can be sorted and comparison + # functions may raise arbitrary exceptions, return an unsorted + # sequence in that case. + try: + return sorted(x) + except Exception: + return list(x) + +aRepr = Repr() +repr = aRepr.repr diff --git a/tests/bytecode/pylib-tests/rlcompleter.py b/tests/bytecode/pylib-tests/rlcompleter.py new file mode 100644 index 0000000000..d3a443737a --- /dev/null +++ b/tests/bytecode/pylib-tests/rlcompleter.py @@ -0,0 +1,160 @@ +"""Word completion for GNU readline. + +The completer completes keywords, built-ins and globals in a selectable +namespace (which defaults to __main__); when completing NAME.NAME..., it +evaluates (!) the expression up to the last dot and completes its attributes. + +It's very cool to do "import sys" type "sys.", hit the completion key (twice), +and see the list of names defined by the sys module! + +Tip: to use the tab key as the completion key, call + + readline.parse_and_bind("tab: complete") + +Notes: + +- Exceptions raised by the completer function are *ignored* (and generally cause + the completion to fail). This is a feature -- since readline sets the tty + device in raw (or cbreak) mode, printing a traceback wouldn't work well + without some complicated hoopla to save, reset and restore the tty state. + +- The evaluation of the NAME.NAME... form may cause arbitrary application + defined code to be executed if an object with a __getattr__ hook is found. + Since it is the responsibility of the application (or the user) to enable this + feature, I consider this an acceptable risk. More complicated expressions + (e.g. function calls or indexing operations) are *not* evaluated. + +- When the original stdin is not a tty device, GNU readline is never + used, and this module (and the readline module) are silently inactive. + +""" + +import builtins +import __main__ + +__all__ = ["Completer"] + +class Completer: + def __init__(self, namespace = None): + """Create a new completer for the command line. + + Completer([namespace]) -> completer instance. + + If unspecified, the default namespace where completions are performed + is __main__ (technically, __main__.__dict__). Namespaces should be + given as dictionaries. + + Completer instances should be used as the completion mechanism of + readline via the set_completer() call: + + readline.set_completer(Completer(my_namespace).complete) + """ + + if namespace and not isinstance(namespace, dict): + raise TypeError('namespace must be a dictionary') + + # Don't bind to namespace quite yet, but flag whether the user wants a + # specific namespace or to use __main__.__dict__. This will allow us + # to bind to __main__.__dict__ at completion time, not now. + if namespace is None: + self.use_main_ns = 1 + else: + self.use_main_ns = 0 + self.namespace = namespace + + def complete(self, text, state): + """Return the next possible completion for 'text'. + + This is called successively with state == 0, 1, 2, ... until it + returns None. The completion should begin with 'text'. + + """ + if self.use_main_ns: + self.namespace = __main__.__dict__ + + if state == 0: + if "." in text: + self.matches = self.attr_matches(text) + else: + self.matches = self.global_matches(text) + try: + return self.matches[state] + except IndexError: + return None + + def _callable_postfix(self, val, word): + if callable(val): + word = word + "(" + return word + + def global_matches(self, text): + """Compute matches when text is a simple name. + + Return a list of all keywords, built-in functions and names currently + defined in self.namespace that match. + + """ + import keyword + matches = [] + n = len(text) + for word in keyword.kwlist: + if word[:n] == text: + matches.append(word) + for nspace in [builtins.__dict__, self.namespace]: + for word, val in nspace.items(): + if word[:n] == text and word != "__builtins__": + matches.append(self._callable_postfix(val, word)) + return matches + + def attr_matches(self, text): + """Compute matches when text contains a dot. + + Assuming the text is of the form NAME.NAME....[NAME], and is + evaluatable in self.namespace, it will be evaluated and its attributes + (as revealed by dir()) are used as possible completions. (For class + instances, class members are also considered.) + + WARNING: this can still invoke arbitrary C code, if an object + with a __getattr__ hook is evaluated. + + """ + import re + m = re.match(r"(\w+(\.\w+)*)\.(\w*)", text) + if not m: + return [] + expr, attr = m.group(1, 3) + try: + thisobject = eval(expr, self.namespace) + except Exception: + return [] + + # get the content of the object, except __builtins__ + words = dir(thisobject) + if "__builtins__" in words: + words.remove("__builtins__") + + if hasattr(thisobject, '__class__'): + words.append('__class__') + words.extend(get_class_members(thisobject.__class__)) + matches = [] + n = len(attr) + for word in words: + if word[:n] == attr and hasattr(thisobject, word): + val = getattr(thisobject, word) + word = self._callable_postfix(val, "%s.%s" % (expr, word)) + matches.append(word) + return matches + +def get_class_members(klass): + ret = dir(klass) + if hasattr(klass,'__bases__'): + for base in klass.__bases__: + ret = ret + get_class_members(base) + return ret + +try: + import readline +except ImportError: + pass +else: + readline.set_completer(Completer().complete) diff --git a/tests/bytecode/pylib-tests/runpy.py b/tests/bytecode/pylib-tests/runpy.py new file mode 100644 index 0000000000..39c0e9f7dd --- /dev/null +++ b/tests/bytecode/pylib-tests/runpy.py @@ -0,0 +1,262 @@ +"""runpy.py - locating and running Python code using the module namespace + +Provides support for locating and running Python scripts using the Python +module namespace instead of the native filesystem. + +This allows Python code to play nicely with non-filesystem based PEP 302 +importers when locating support scripts as well as when importing modules. +""" +# Written by Nick Coghlan <ncoghlan at gmail.com> +# to implement PEP 338 (Executing Modules as Scripts) + + +import os +import sys +import importlib.machinery # importlib first so we can test #15386 via -m +import imp +from pkgutil import read_code, get_loader, get_importer + +__all__ = [ + "run_module", "run_path", +] + +class _TempModule(object): + """Temporarily replace a module in sys.modules with an empty namespace""" + def __init__(self, mod_name): + self.mod_name = mod_name + self.module = imp.new_module(mod_name) + self._saved_module = [] + + def __enter__(self): + mod_name = self.mod_name + try: + self._saved_module.append(sys.modules[mod_name]) + except KeyError: + pass + sys.modules[mod_name] = self.module + return self + + def __exit__(self, *args): + if self._saved_module: + sys.modules[self.mod_name] = self._saved_module[0] + else: + del sys.modules[self.mod_name] + self._saved_module = [] + +class _ModifiedArgv0(object): + def __init__(self, value): + self.value = value + self._saved_value = self._sentinel = object() + + def __enter__(self): + if self._saved_value is not self._sentinel: + raise RuntimeError("Already preserving saved value") + self._saved_value = sys.argv[0] + sys.argv[0] = self.value + + def __exit__(self, *args): + self.value = self._sentinel + sys.argv[0] = self._saved_value + +def _run_code(code, run_globals, init_globals=None, + mod_name=None, mod_fname=None, + mod_loader=None, pkg_name=None): + """Helper to run code in nominated namespace""" + if init_globals is not None: + run_globals.update(init_globals) + run_globals.update(__name__ = mod_name, + __file__ = mod_fname, + __cached__ = None, + __doc__ = None, + __loader__ = mod_loader, + __package__ = pkg_name) + exec(code, run_globals) + return run_globals + +def _run_module_code(code, init_globals=None, + mod_name=None, mod_fname=None, + mod_loader=None, pkg_name=None): + """Helper to run code in new namespace with sys modified""" + with _TempModule(mod_name) as temp_module, _ModifiedArgv0(mod_fname): + mod_globals = temp_module.module.__dict__ + _run_code(code, mod_globals, init_globals, + mod_name, mod_fname, mod_loader, pkg_name) + # Copy the globals of the temporary module, as they + # may be cleared when the temporary module goes away + return mod_globals.copy() + + +# This helper is needed due to a missing component in the PEP 302 +# loader protocol (specifically, "get_filename" is non-standard) +# Since we can't introduce new features in maintenance releases, +# support was added to zipimporter under the name '_get_filename' +def _get_filename(loader, mod_name): + for attr in ("get_filename", "_get_filename"): + meth = getattr(loader, attr, None) + if meth is not None: + return os.path.abspath(meth(mod_name)) + return None + +# Helper to get the loader, code and filename for a module +def _get_module_details(mod_name): + loader = get_loader(mod_name) + if loader is None: + raise ImportError("No module named %s" % mod_name) + if loader.is_package(mod_name): + if mod_name == "__main__" or mod_name.endswith(".__main__"): + raise ImportError("Cannot use package as __main__ module") + try: + pkg_main_name = mod_name + ".__main__" + return _get_module_details(pkg_main_name) + except ImportError as e: + raise ImportError(("%s; %r is a package and cannot " + + "be directly executed") %(e, mod_name)) + code = loader.get_code(mod_name) + if code is None: + raise ImportError("No code object available for %s" % mod_name) + filename = _get_filename(loader, mod_name) + return mod_name, loader, code, filename + +# XXX ncoghlan: Should this be documented and made public? +# (Current thoughts: don't repeat the mistake that lead to its +# creation when run_module() no longer met the needs of +# mainmodule.c, but couldn't be changed because it was public) +def _run_module_as_main(mod_name, alter_argv=True): + """Runs the designated module in the __main__ namespace + + Note that the executed module will have full access to the + __main__ namespace. If this is not desirable, the run_module() + function should be used to run the module code in a fresh namespace. + + At the very least, these variables in __main__ will be overwritten: + __name__ + __file__ + __cached__ + __loader__ + __package__ + """ + try: + if alter_argv or mod_name != "__main__": # i.e. -m switch + mod_name, loader, code, fname = _get_module_details(mod_name) + else: # i.e. directory or zipfile execution + mod_name, loader, code, fname = _get_main_module_details() + except ImportError as exc: + # Try to provide a good error message + # for directories, zip files and the -m switch + if alter_argv: + # For -m switch, just display the exception + info = str(exc) + else: + # For directories/zipfiles, let the user + # know what the code was looking for + info = "can't find '__main__' module in %r" % sys.argv[0] + msg = "%s: %s" % (sys.executable, info) + sys.exit(msg) + pkg_name = mod_name.rpartition('.')[0] + main_globals = sys.modules["__main__"].__dict__ + if alter_argv: + sys.argv[0] = fname + return _run_code(code, main_globals, None, + "__main__", fname, loader, pkg_name) + +def run_module(mod_name, init_globals=None, + run_name=None, alter_sys=False): + """Execute a module's code without importing it + + Returns the resulting top level namespace dictionary + """ + mod_name, loader, code, fname = _get_module_details(mod_name) + if run_name is None: + run_name = mod_name + pkg_name = mod_name.rpartition('.')[0] + if alter_sys: + return _run_module_code(code, init_globals, run_name, + fname, loader, pkg_name) + else: + # Leave the sys module alone + return _run_code(code, {}, init_globals, run_name, + fname, loader, pkg_name) + +def _get_main_module_details(): + # Helper that gives a nicer error message when attempting to + # execute a zipfile or directory by invoking __main__.py + # Also moves the standard __main__ out of the way so that the + # preexisting __loader__ entry doesn't cause issues + main_name = "__main__" + saved_main = sys.modules[main_name] + del sys.modules[main_name] + try: + return _get_module_details(main_name) + except ImportError as exc: + if main_name in str(exc): + raise ImportError("can't find %r module in %r" % + (main_name, sys.path[0])) from exc + raise + finally: + sys.modules[main_name] = saved_main + + +def _get_code_from_file(run_name, fname): + # Check for a compiled file first + with open(fname, "rb") as f: + code = read_code(f) + if code is None: + # That didn't work, so try it as normal source code + with open(fname, "rb") as f: + code = compile(f.read(), fname, 'exec') + loader = importlib.machinery.SourceFileLoader(run_name, fname) + else: + loader = importlib.machinery.SourcelessFileLoader(run_name, fname) + return code, loader + +def run_path(path_name, init_globals=None, run_name=None): + """Execute code located at the specified filesystem location + + Returns the resulting top level namespace dictionary + + The file path may refer directly to a Python script (i.e. + one that could be directly executed with execfile) or else + it may refer to a zipfile or directory containing a top + level __main__.py script. + """ + if run_name is None: + run_name = "<run_path>" + pkg_name = run_name.rpartition(".")[0] + importer = get_importer(path_name) + if isinstance(importer, (type(None), imp.NullImporter)): + # Not a valid sys.path entry, so run the code directly + # execfile() doesn't help as we want to allow compiled files + code, mod_loader = _get_code_from_file(run_name, path_name) + return _run_module_code(code, init_globals, run_name, path_name, + mod_loader, pkg_name) + else: + # Importer is defined for path, so add it to + # the start of sys.path + sys.path.insert(0, path_name) + try: + # Here's where things are a little different from the run_module + # case. There, we only had to replace the module in sys while the + # code was running and doing so was somewhat optional. Here, we + # have no choice and we have to remove it even while we read the + # code. If we don't do this, a __loader__ attribute in the + # existing __main__ module may prevent location of the new module. + mod_name, loader, code, fname = _get_main_module_details() + with _TempModule(run_name) as temp_module, \ + _ModifiedArgv0(path_name): + mod_globals = temp_module.module.__dict__ + return _run_code(code, mod_globals, init_globals, + run_name, fname, loader, pkg_name).copy() + finally: + try: + sys.path.remove(path_name) + except ValueError: + pass + + +if __name__ == "__main__": + # Run the module specified as the next command line argument + if len(sys.argv) < 2: + print("No module specified for execution", file=sys.stderr) + else: + del sys.argv[0] # Make the requested module sys.argv[0] + _run_module_as_main(sys.argv[0]) diff --git a/tests/bytecode/pylib-tests/sched.py b/tests/bytecode/pylib-tests/sched.py new file mode 100644 index 0000000000..ccf8ce9074 --- /dev/null +++ b/tests/bytecode/pylib-tests/sched.py @@ -0,0 +1,168 @@ +"""A generally useful event scheduler class. + +Each instance of this class manages its own queue. +No multi-threading is implied; you are supposed to hack that +yourself, or use a single instance per application. + +Each instance is parametrized with two functions, one that is +supposed to return the current time, one that is supposed to +implement a delay. You can implement real-time scheduling by +substituting time and sleep from built-in module time, or you can +implement simulated time by writing your own functions. This can +also be used to integrate scheduling with STDWIN events; the delay +function is allowed to modify the queue. Time can be expressed as +integers or floating point numbers, as long as it is consistent. + +Events are specified by tuples (time, priority, action, argument, kwargs). +As in UNIX, lower priority numbers mean higher priority; in this +way the queue can be maintained as a priority queue. Execution of the +event means calling the action function, passing it the argument +sequence in "argument" (remember that in Python, multiple function +arguments are be packed in a sequence) and keyword parameters in "kwargs". +The action function may be an instance method so it +has another way to reference private data (besides global variables). +""" + +# XXX The timefunc and delayfunc should have been defined as methods +# XXX so you can define new kinds of schedulers using subclassing +# XXX instead of having to define a module or class just to hold +# XXX the global state of your particular time and delay functions. + +import time +import heapq +from collections import namedtuple +try: + import threading +except ImportError: + import dummy_threading as threading +try: + from time import monotonic as _time +except ImportError: + from time import time as _time + +__all__ = ["scheduler"] + +class Event(namedtuple('Event', 'time, priority, action, argument, kwargs')): + def __eq__(s, o): return (s.time, s.priority) == (o.time, o.priority) + def __ne__(s, o): return (s.time, s.priority) != (o.time, o.priority) + def __lt__(s, o): return (s.time, s.priority) < (o.time, o.priority) + def __le__(s, o): return (s.time, s.priority) <= (o.time, o.priority) + def __gt__(s, o): return (s.time, s.priority) > (o.time, o.priority) + def __ge__(s, o): return (s.time, s.priority) >= (o.time, o.priority) + +_sentinel = object() + +class scheduler: + + def __init__(self, timefunc=_time, delayfunc=time.sleep): + """Initialize a new instance, passing the time and delay + functions""" + self._queue = [] + self._lock = threading.RLock() + self.timefunc = timefunc + self.delayfunc = delayfunc + + def enterabs(self, time, priority, action, argument=(), kwargs=_sentinel): + """Enter a new event in the queue at an absolute time. + + Returns an ID for the event which can be used to remove it, + if necessary. + + """ + if kwargs is _sentinel: + kwargs = {} + with self._lock: + event = Event(time, priority, action, argument, kwargs) + heapq.heappush(self._queue, event) + return event # The ID + + def enter(self, delay, priority, action, argument=(), kwargs=_sentinel): + """A variant that specifies the time as a relative time. + + This is actually the more commonly used interface. + + """ + with self._lock: + time = self.timefunc() + delay + return self.enterabs(time, priority, action, argument, kwargs) + + def cancel(self, event): + """Remove an event from the queue. + + This must be presented the ID as returned by enter(). + If the event is not in the queue, this raises ValueError. + + """ + with self._lock: + self._queue.remove(event) + heapq.heapify(self._queue) + + def empty(self): + """Check whether the queue is empty.""" + with self._lock: + return not self._queue + + def run(self, blocking=True): + """Execute events until the queue is empty. + If blocking is False executes the scheduled events due to + expire soonest (if any) and then return the deadline of the + next scheduled call in the scheduler. + + When there is a positive delay until the first event, the + delay function is called and the event is left in the queue; + otherwise, the event is removed from the queue and executed + (its action function is called, passing it the argument). If + the delay function returns prematurely, it is simply + restarted. + + It is legal for both the delay function and the action + function to modify the queue or to raise an exception; + exceptions are not caught but the scheduler's state remains + well-defined so run() may be called again. + + A questionable hack is added to allow other threads to run: + just after an event is executed, a delay of 0 is executed, to + avoid monopolizing the CPU when other threads are also + runnable. + + """ + # localize variable access to minimize overhead + # and to improve thread safety + lock = self._lock + q = self._queue + delayfunc = self.delayfunc + timefunc = self.timefunc + pop = heapq.heappop + while True: + with lock: + if not q: + break + time, priority, action, argument, kwargs = q[0] + now = timefunc() + if time > now: + delay = True + else: + delay = False + pop(q) + if delay: + if not blocking: + return time - now + delayfunc(time - now) + else: + action(*argument, **kwargs) + delayfunc(0) # Let other threads run + + @property + def queue(self): + """An ordered list of upcoming events. + + Events are named tuples with fields for: + time, priority, action, arguments, kwargs + + """ + # Use heapq to sort the queue rather than using 'sorted(self._queue)'. + # With heapq, two events scheduled at the same time will show in + # the actual order they would be retrieved. + with self._lock: + events = self._queue[:] + return map(heapq.heappop, [events]*len(events)) diff --git a/tests/bytecode/pylib-tests/shelve.py b/tests/bytecode/pylib-tests/shelve.py new file mode 100644 index 0000000000..1b9bae1c13 --- /dev/null +++ b/tests/bytecode/pylib-tests/shelve.py @@ -0,0 +1,232 @@ +"""Manage shelves of pickled objects. + +A "shelf" is a persistent, dictionary-like object. The difference +with dbm databases is that the values (not the keys!) in a shelf can +be essentially arbitrary Python objects -- anything that the "pickle" +module can handle. This includes most class instances, recursive data +types, and objects containing lots of shared sub-objects. The keys +are ordinary strings. + +To summarize the interface (key is a string, data is an arbitrary +object): + + import shelve + d = shelve.open(filename) # open, with (g)dbm filename -- no suffix + + d[key] = data # store data at key (overwrites old data if + # using an existing key) + data = d[key] # retrieve a COPY of the data at key (raise + # KeyError if no such key) -- NOTE that this + # access returns a *copy* of the entry! + del d[key] # delete data stored at key (raises KeyError + # if no such key) + flag = key in d # true if the key exists + list = d.keys() # a list of all existing keys (slow!) + + d.close() # close it + +Dependent on the implementation, closing a persistent dictionary may +or may not be necessary to flush changes to disk. + +Normally, d[key] returns a COPY of the entry. This needs care when +mutable entries are mutated: for example, if d[key] is a list, + d[key].append(anitem) +does NOT modify the entry d[key] itself, as stored in the persistent +mapping -- it only modifies the copy, which is then immediately +discarded, so that the append has NO effect whatsoever. To append an +item to d[key] in a way that will affect the persistent mapping, use: + data = d[key] + data.append(anitem) + d[key] = data + +To avoid the problem with mutable entries, you may pass the keyword +argument writeback=True in the call to shelve.open. When you use: + d = shelve.open(filename, writeback=True) +then d keeps a cache of all entries you access, and writes them all back +to the persistent mapping when you call d.close(). This ensures that +such usage as d[key].append(anitem) works as intended. + +However, using keyword argument writeback=True may consume vast amount +of memory for the cache, and it may make d.close() very slow, if you +access many of d's entries after opening it in this way: d has no way to +check which of the entries you access are mutable and/or which ones you +actually mutate, so it must cache, and write back at close, all of the +entries that you access. You can call d.sync() to write back all the +entries in the cache, and empty the cache (d.sync() also synchronizes +the persistent dictionary on disk, if feasible). +""" + +from pickle import Pickler, Unpickler +from io import BytesIO + +import collections + +__all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"] + +class _ClosedDict(collections.MutableMapping): + """Marker for a closed dict. Access attempts raise a ValueError.""" + + def closed(self, *args): + raise ValueError('invalid operation on closed shelf') + __iter__ = __len__ = __getitem__ = __setitem__ = __delitem__ = keys = closed + + def __repr__(self): + return '<Closed Dictionary>' + + +class Shelf(collections.MutableMapping): + """Base class for shelf implementations. + + This is initialized with a dictionary-like object. + See the module's __doc__ string for an overview of the interface. + """ + + def __init__(self, dict, protocol=None, writeback=False, + keyencoding="utf-8"): + self.dict = dict + if protocol is None: + protocol = 3 + self._protocol = protocol + self.writeback = writeback + self.cache = {} + self.keyencoding = keyencoding + + def __iter__(self): + for k in self.dict.keys(): + yield k.decode(self.keyencoding) + + def __len__(self): + return len(self.dict) + + def __contains__(self, key): + return key.encode(self.keyencoding) in self.dict + + def get(self, key, default=None): + if key.encode(self.keyencoding) in self.dict: + return self[key] + return default + + def __getitem__(self, key): + try: + value = self.cache[key] + except KeyError: + f = BytesIO(self.dict[key.encode(self.keyencoding)]) + value = Unpickler(f).load() + if self.writeback: + self.cache[key] = value + return value + + def __setitem__(self, key, value): + if self.writeback: + self.cache[key] = value + f = BytesIO() + p = Pickler(f, self._protocol) + p.dump(value) + self.dict[key.encode(self.keyencoding)] = f.getvalue() + + def __delitem__(self, key): + del self.dict[key.encode(self.keyencoding)] + try: + del self.cache[key] + except KeyError: + pass + + def close(self): + self.sync() + try: + self.dict.close() + except AttributeError: + pass + # Catch errors that may happen when close is called from __del__ + # because CPython is in interpreter shutdown. + try: + self.dict = _ClosedDict() + except (NameError, TypeError): + self.dict = None + + def __del__(self): + if not hasattr(self, 'writeback'): + # __init__ didn't succeed, so don't bother closing + return + self.close() + + def sync(self): + if self.writeback and self.cache: + self.writeback = False + for key, entry in self.cache.items(): + self[key] = entry + self.writeback = True + self.cache = {} + if hasattr(self.dict, 'sync'): + self.dict.sync() + + +class BsdDbShelf(Shelf): + """Shelf implementation using the "BSD" db interface. + + This adds methods first(), next(), previous(), last() and + set_location() that have no counterpart in [g]dbm databases. + + The actual database must be opened using one of the "bsddb" + modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or + bsddb.rnopen) and passed to the constructor. + + See the module's __doc__ string for an overview of the interface. + """ + + def __init__(self, dict, protocol=None, writeback=False, + keyencoding="utf-8"): + Shelf.__init__(self, dict, protocol, writeback, keyencoding) + + def set_location(self, key): + (key, value) = self.dict.set_location(key) + f = BytesIO(value) + return (key.decode(self.keyencoding), Unpickler(f).load()) + + def next(self): + (key, value) = next(self.dict) + f = BytesIO(value) + return (key.decode(self.keyencoding), Unpickler(f).load()) + + def previous(self): + (key, value) = self.dict.previous() + f = BytesIO(value) + return (key.decode(self.keyencoding), Unpickler(f).load()) + + def first(self): + (key, value) = self.dict.first() + f = BytesIO(value) + return (key.decode(self.keyencoding), Unpickler(f).load()) + + def last(self): + (key, value) = self.dict.last() + f = BytesIO(value) + return (key.decode(self.keyencoding), Unpickler(f).load()) + + +class DbfilenameShelf(Shelf): + """Shelf implementation using the "dbm" generic dbm interface. + + This is initialized with the filename for the dbm database. + See the module's __doc__ string for an overview of the interface. + """ + + def __init__(self, filename, flag='c', protocol=None, writeback=False): + import dbm + Shelf.__init__(self, dbm.open(filename, flag), protocol, writeback) + + +def open(filename, flag='c', protocol=None, writeback=False): + """Open a persistent dictionary for reading and writing. + + The filename parameter is the base filename for the underlying + database. As a side-effect, an extension may be added to the + filename and more than one file may be created. The optional flag + parameter has the same interpretation as the flag parameter of + dbm.open(). The optional protocol parameter specifies the + version of the pickle protocol (0, 1, or 2). + + See the module's __doc__ string for an overview of the interface. + """ + + return DbfilenameShelf(filename, flag, protocol, writeback) diff --git a/tests/bytecode/pylib-tests/socket.py b/tests/bytecode/pylib-tests/socket.py new file mode 100644 index 0000000000..39ed325410 --- /dev/null +++ b/tests/bytecode/pylib-tests/socket.py @@ -0,0 +1,437 @@ +# Wrapper module for _socket, providing some additional facilities +# implemented in Python. + +"""\ +This module provides socket operations and some related functions. +On Unix, it supports IP (Internet Protocol) and Unix domain sockets. +On other systems, it only supports IP. Functions specific for a +socket are available as methods of the socket object. + +Functions: + +socket() -- create a new socket object +socketpair() -- create a pair of new socket objects [*] +fromfd() -- create a socket object from an open file descriptor [*] +fromshare() -- create a socket object from data received from socket.share() [*] +gethostname() -- return the current hostname +gethostbyname() -- map a hostname to its IP number +gethostbyaddr() -- map an IP number or hostname to DNS info +getservbyname() -- map a service name and a protocol name to a port number +getprotobyname() -- map a protocol name (e.g. 'tcp') to a number +ntohs(), ntohl() -- convert 16, 32 bit int from network to host byte order +htons(), htonl() -- convert 16, 32 bit int from host to network byte order +inet_aton() -- convert IP addr string (123.45.67.89) to 32-bit packed format +inet_ntoa() -- convert 32-bit packed format IP to string (123.45.67.89) +socket.getdefaulttimeout() -- get the default timeout value +socket.setdefaulttimeout() -- set the default timeout value +create_connection() -- connects to an address, with an optional timeout and + optional source address. + + [*] not available on all platforms! + +Special objects: + +SocketType -- type object for socket objects +error -- exception raised for I/O errors +has_ipv6 -- boolean value indicating if IPv6 is supported + +Integer constants: + +AF_INET, AF_UNIX -- socket domains (first argument to socket() call) +SOCK_STREAM, SOCK_DGRAM, SOCK_RAW -- socket types (second argument) + +Many other constants may be defined; these may be used in calls to +the setsockopt() and getsockopt() methods. +""" + +import _socket +from _socket import * + +import os, sys, io + +try: + import errno +except ImportError: + errno = None +EBADF = getattr(errno, 'EBADF', 9) +EAGAIN = getattr(errno, 'EAGAIN', 11) +EWOULDBLOCK = getattr(errno, 'EWOULDBLOCK', 11) + +__all__ = ["getfqdn", "create_connection"] +__all__.extend(os._get_exports_list(_socket)) + + +_realsocket = socket + +# WSA error codes +if sys.platform.lower().startswith("win"): + errorTab = {} + errorTab[10004] = "The operation was interrupted." + errorTab[10009] = "A bad file handle was passed." + errorTab[10013] = "Permission denied." + errorTab[10014] = "A fault occurred on the network??" # WSAEFAULT + errorTab[10022] = "An invalid operation was attempted." + errorTab[10035] = "The socket operation would block" + errorTab[10036] = "A blocking operation is already in progress." + errorTab[10048] = "The network address is in use." + errorTab[10054] = "The connection has been reset." + errorTab[10058] = "The network has been shut down." + errorTab[10060] = "The operation timed out." + errorTab[10061] = "Connection refused." + errorTab[10063] = "The name is too long." + errorTab[10064] = "The host is down." + errorTab[10065] = "The host is unreachable." + __all__.append("errorTab") + + +class socket(_socket.socket): + + """A subclass of _socket.socket adding the makefile() method.""" + + __slots__ = ["__weakref__", "_io_refs", "_closed"] + + def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0, fileno=None): + _socket.socket.__init__(self, family, type, proto, fileno) + self._io_refs = 0 + self._closed = False + + def __enter__(self): + return self + + def __exit__(self, *args): + if not self._closed: + self.close() + + def __repr__(self): + """Wrap __repr__() to reveal the real class name.""" + s = _socket.socket.__repr__(self) + if s.startswith("<socket object"): + s = "<%s.%s%s%s" % (self.__class__.__module__, + self.__class__.__name__, + getattr(self, '_closed', False) and " [closed] " or "", + s[7:]) + return s + + def __getstate__(self): + raise TypeError("Cannot serialize socket object") + + def dup(self): + """dup() -> socket object + + Return a new socket object connected to the same system resource. + """ + fd = dup(self.fileno()) + sock = self.__class__(self.family, self.type, self.proto, fileno=fd) + sock.settimeout(self.gettimeout()) + return sock + + def accept(self): + """accept() -> (socket object, address info) + + Wait for an incoming connection. Return a new socket + representing the connection, and the address of the client. + For IP sockets, the address info is a pair (hostaddr, port). + """ + fd, addr = self._accept() + sock = socket(self.family, self.type, self.proto, fileno=fd) + # Issue #7995: if no default timeout is set and the listening + # socket had a (non-zero) timeout, force the new socket in blocking + # mode to override platform-specific socket flags inheritance. + if getdefaulttimeout() is None and self.gettimeout(): + sock.setblocking(True) + return sock, addr + + def makefile(self, mode="r", buffering=None, *, + encoding=None, errors=None, newline=None): + """makefile(...) -> an I/O stream connected to the socket + + The arguments are as for io.open() after the filename, + except the only mode characters supported are 'r', 'w' and 'b'. + The semantics are similar too. (XXX refactor to share code?) + """ + for c in mode: + if c not in {a+"r", "w", "b"}: + raise ValueError("invalid mode %r (only r, w, b allowed)") + writing = "w" in mode + reading = "r" in mode or not writing + assert reading or writing + binary = "b" in mode + rawmode = "" + if reading: + rawmode += "r" + if writing: + rawmode += "w" + raw = SocketIO(self, rawmode) + self._io_refs += 1 + if buffering is None: + buffering = -1 + if buffering < 0: + buffering = io.DEFAULT_BUFFER_SIZE + if buffering == 0: + if not binary: + raise ValueError("unbuffered streams must be binary") + return raw + if reading and writing: + buffer = io.BufferedRWPair(raw, raw, buffering) + elif reading: + buffer = io.BufferedReader(raw, buffering) + else: + assert writing + buffer = io.BufferedWriter(raw, buffering) + if binary: + return buffer + text = io.TextIOWrapper(buffer, encoding, errors, newline) + text.mode = mode + return text + + def _decref_socketios(self): + if self._io_refs > 0: + self._io_refs -= 1 + if self._closed: + self.close() + + def _real_close(self, _ss=_socket.socket): + # This function should not reference any globals. See issue #808164. + _ss.close(self) + + def close(self): + # This function should not reference any globals. See issue #808164. + self._closed = True + if self._io_refs <= 0: + self._real_close() + + def detach(self): + """detach() -> file descriptor + + Close the socket object without closing the underlying file descriptor. + The object cannot be used after this call, but the file descriptor + can be reused for other purposes. The file descriptor is returned. + """ + self._closed = True + return super().detach() + +def fromfd(fd, family, type, proto=0): + """ fromfd(fd, family, type[, proto]) -> socket object + + Create a socket object from a duplicate of the given file + descriptor. The remaining arguments are the same as for socket(). + """ + nfd = dup(fd) + return socket(family, type, proto, nfd) + +if hasattr(_socket.socket, "share"): + def fromshare(info): + """ fromshare(info) -> socket object + + Create a socket object from a the bytes object returned by + socket.share(pid). + """ + return socket(0, 0, 0, info) + +if hasattr(_socket, "socketpair"): + + def socketpair(family=None, type=SOCK_STREAM, proto=0): + """socketpair([family[, type[, proto]]]) -> (socket object, socket object) + + Create a pair of socket objects from the sockets returned by the platform + socketpair() function. + The arguments are the same as for socket() except the default family is + AF_UNIX if defined on the platform; otherwise, the default is AF_INET. + """ + if family is None: + try: + family = AF_UNIX + except NameError: + family = AF_INET + a, b = _socket.socketpair(family, type, proto) + a = socket(family, type, proto, a.detach()) + b = socket(family, type, proto, b.detach()) + return a, b + + +_blocking_errnos = { EAGAIN, EWOULDBLOCK } + +class SocketIO(io.RawIOBase): + + """Raw I/O implementation for stream sockets. + + This class supports the makefile() method on sockets. It provides + the raw I/O interface on top of a socket object. + """ + + # One might wonder why not let FileIO do the job instead. There are two + # main reasons why FileIO is not adapted: + # - it wouldn't work under Windows (where you can't used read() and + # write() on a socket handle) + # - it wouldn't work with socket timeouts (FileIO would ignore the + # timeout and consider the socket non-blocking) + + # XXX More docs + + def __init__(self, sock, mode): + if mode not in ("r", "w", "rw", "rb", "wb", "rwb"): + raise ValueError("invalid mode: %r" % mode) + io.RawIOBase.__init__(self) + self._sock = sock + if "b" not in mode: + mode += "b" + self._mode = mode + self._reading = "r" in mode + self._writing = "w" in mode + self._timeout_occurred = False + + def readinto(self, b): + """Read up to len(b) bytes into the writable buffer *b* and return + the number of bytes read. If the socket is non-blocking and no bytes + are available, None is returned. + + If *b* is non-empty, a 0 return value indicates that the connection + was shutdown at the other end. + """ + self._checkClosed() + self._checkReadable() + if self._timeout_occurred: + raise IOError("cannot read from timed out object") + while True: + try: + return self._sock.recv_into(b) + except timeout: + self._timeout_occurred = True + raise + except InterruptedError: + continue + except error as e: + if e.args[0] in _blocking_errnos: + return None + raise + + def write(self, b): + """Write the given bytes or bytearray object *b* to the socket + and return the number of bytes written. This can be less than + len(b) if not all data could be written. If the socket is + non-blocking and no bytes could be written None is returned. + """ + self._checkClosed() + self._checkWritable() + try: + return self._sock.send(b) + except error as e: + # XXX what about EINTR? + if e.args[0] in _blocking_errnos: + return None + raise + + def readable(self): + """True if the SocketIO is open for reading. + """ + if self.closed: + raise ValueError("I/O operation on closed socket.") + return self._reading + + def writable(self): + """True if the SocketIO is open for writing. + """ + if self.closed: + raise ValueError("I/O operation on closed socket.") + return self._writing + + def seekable(self): + """True if the SocketIO is open for seeking. + """ + if self.closed: + raise ValueError("I/O operation on closed socket.") + return super().seekable() + + def fileno(self): + """Return the file descriptor of the underlying socket. + """ + self._checkClosed() + return self._sock.fileno() + + @property + def name(self): + if not self.closed: + return self.fileno() + else: + return -1 + + @property + def mode(self): + return self._mode + + def close(self): + """Close the SocketIO object. This doesn't close the underlying + socket, except if all references to it have disappeared. + """ + if self.closed: + return + io.RawIOBase.close(self) + self._sock._decref_socketios() + self._sock = None + + +def getfqdn(name=''): + """Get fully qualified domain name from name. + + An empty argument is interpreted as meaning the local host. + + First the hostname returned by gethostbyaddr() is checked, then + possibly existing aliases. In case no FQDN is available, hostname + from gethostname() is returned. + """ + name = name.strip() + if not name or name == '0.0.0.0': + name = gethostname() + try: + hostname, aliases, ipaddrs = gethostbyaddr(name) + except error: + pass + else: + aliases.insert(0, hostname) + for name in aliases: + if '.' in name: + break + else: + name = hostname + return name + + +_GLOBAL_DEFAULT_TIMEOUT = object() + +def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT, + source_address=None): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + err = None + for res in getaddrinfo(host, port, 0, SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket(af, socktype, proto) + if timeout is not _GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + sock.connect(sa) + return sock + + except error as _: + err = _ + if sock is not None: + sock.close() + + if err is not None: + raise err + else: + raise error("getaddrinfo returns an empty list") diff --git a/tests/bytecode/pylib-tests/socketserver.py b/tests/bytecode/pylib-tests/socketserver.py new file mode 100644 index 0000000000..8332fdf66d --- /dev/null +++ b/tests/bytecode/pylib-tests/socketserver.py @@ -0,0 +1,745 @@ +"""Generic socket server classes. + +This module tries to capture the various aspects of defining a server: + +For socket-based servers: + +- address family: + - AF_INET{,6}: IP (Internet Protocol) sockets (default) + - AF_UNIX: Unix domain sockets + - others, e.g. AF_DECNET are conceivable (see <socket.h> +- socket type: + - SOCK_STREAM (reliable stream, e.g. TCP) + - SOCK_DGRAM (datagrams, e.g. UDP) + +For request-based servers (including socket-based): + +- client address verification before further looking at the request + (This is actually a hook for any processing that needs to look + at the request before anything else, e.g. logging) +- how to handle multiple requests: + - synchronous (one request is handled at a time) + - forking (each request is handled by a new process) + - threading (each request is handled by a new thread) + +The classes in this module favor the server type that is simplest to +write: a synchronous TCP/IP server. This is bad class design, but +save some typing. (There's also the issue that a deep class hierarchy +slows down method lookups.) + +There are five classes in an inheritance diagram, four of which represent +synchronous servers of four types: + + +------------+ + | BaseServer | + +------------+ + | + v + +-----------+ +------------------+ + | TCPServer |------->| UnixStreamServer | + +-----------+ +------------------+ + | + v + +-----------+ +--------------------+ + | UDPServer |------->| UnixDatagramServer | + +-----------+ +--------------------+ + +Note that UnixDatagramServer derives from UDPServer, not from +UnixStreamServer -- the only difference between an IP and a Unix +stream server is the address family, which is simply repeated in both +unix server classes. + +Forking and threading versions of each type of server can be created +using the ForkingMixIn and ThreadingMixIn mix-in classes. For +instance, a threading UDP server class is created as follows: + + class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass + +The Mix-in class must come first, since it overrides a method defined +in UDPServer! Setting the various member variables also changes +the behavior of the underlying server mechanism. + +To implement a service, you must derive a class from +BaseRequestHandler and redefine its handle() method. You can then run +various versions of the service by combining one of the server classes +with your request handler class. + +The request handler class must be different for datagram or stream +services. This can be hidden by using the request handler +subclasses StreamRequestHandler or DatagramRequestHandler. + +Of course, you still have to use your head! + +For instance, it makes no sense to use a forking server if the service +contains state in memory that can be modified by requests (since the +modifications in the child process would never reach the initial state +kept in the parent process and passed to each child). In this case, +you can use a threading server, but you will probably have to use +locks to avoid two requests that come in nearly simultaneous to apply +conflicting changes to the server state. + +On the other hand, if you are building e.g. an HTTP server, where all +data is stored externally (e.g. in the file system), a synchronous +class will essentially render the service "deaf" while one request is +being handled -- which may be for a very long time if a client is slow +to read all the data it has requested. Here a threading or forking +server is appropriate. + +In some cases, it may be appropriate to process part of a request +synchronously, but to finish processing in a forked child depending on +the request data. This can be implemented by using a synchronous +server and doing an explicit fork in the request handler class +handle() method. + +Another approach to handling multiple simultaneous requests in an +environment that supports neither threads nor fork (or where these are +too expensive or inappropriate for the service) is to maintain an +explicit table of partially finished requests and to use select() to +decide which request to work on next (or whether to handle a new +incoming request). This is particularly important for stream services +where each client can potentially be connected for a long time (if +threads or subprocesses cannot be used). + +Future work: +- Standard classes for Sun RPC (which uses either UDP or TCP) +- Standard mix-in classes to implement various authentication + and encryption schemes +- Standard framework for select-based multiplexing + +XXX Open problems: +- What to do with out-of-band data? + +BaseServer: +- split generic "request" functionality out into BaseServer class. + Copyright (C) 2000 Luke Kenneth Casson Leighton <lkcl@samba.org> + + example: read entries from a SQL database (requires overriding + get_request() to return a table entry from the database). + entry is processed by a RequestHandlerClass. + +""" + +# Author of the BaseServer patch: Luke Kenneth Casson Leighton + +# XXX Warning! +# There is a test suite for this module, but it cannot be run by the +# standard regression test. +# To run it manually, run Lib/test/test_socketserver.py. + +__version__ = "0.4" + + +import socket +import select +import sys +import os +import errno +try: + import threading +except ImportError: + import dummy_threading as threading + +__all__ = ["TCPServer","UDPServer","ForkingUDPServer","ForkingTCPServer", + "ThreadingUDPServer","ThreadingTCPServer","BaseRequestHandler", + "StreamRequestHandler","DatagramRequestHandler", + "ThreadingMixIn", "ForkingMixIn"] +if hasattr(socket, "AF_UNIX"): + __all__.extend(["UnixStreamServer","UnixDatagramServer", + "ThreadingUnixStreamServer", + "ThreadingUnixDatagramServer"]) + +def _eintr_retry(func, *args): + """restart a system call interrupted by EINTR""" + while True: + try: + return func(*args) + except OSError as e: + if e.errno != errno.EINTR: + raise + +class BaseServer: + + """Base class for server classes. + + Methods for the caller: + + - __init__(server_address, RequestHandlerClass) + - serve_forever(poll_interval=0.5) + - shutdown() + - handle_request() # if you do not use serve_forever() + - fileno() -> int # for select() + + Methods that may be overridden: + + - server_bind() + - server_activate() + - get_request() -> request, client_address + - handle_timeout() + - verify_request(request, client_address) + - server_close() + - process_request(request, client_address) + - shutdown_request(request) + - close_request(request) + - service_actions() + - handle_error() + + Methods for derived classes: + + - finish_request(request, client_address) + + Class variables that may be overridden by derived classes or + instances: + + - timeout + - address_family + - socket_type + - allow_reuse_address + + Instance variables: + + - RequestHandlerClass + - socket + + """ + + timeout = None + + def __init__(self, server_address, RequestHandlerClass): + """Constructor. May be extended, do not override.""" + self.server_address = server_address + self.RequestHandlerClass = RequestHandlerClass + self.__is_shut_down = threading.Event() + self.__shutdown_request = False + + def server_activate(self): + """Called by constructor to activate the server. + + May be overridden. + + """ + pass + + def serve_forever(self, poll_interval=0.5): + """Handle one request at a time until shutdown. + + Polls for shutdown every poll_interval seconds. Ignores + self.timeout. If you need to do periodic tasks, do them in + another thread. + """ + self.__is_shut_down.clear() + try: + while not self.__shutdown_request: + # XXX: Consider using another file descriptor or + # connecting to the socket to wake this up instead of + # polling. Polling reduces our responsiveness to a + # shutdown request and wastes cpu at all other times. + r, w, e = _eintr_retry(select.select, [self], [], [], + poll_interval) + if self in r: + self._handle_request_noblock() + + self.service_actions() + finally: + self.__shutdown_request = False + self.__is_shut_down.set() + + def shutdown(self): + """Stops the serve_forever loop. + + Blocks until the loop has finished. This must be called while + serve_forever() is running in another thread, or it will + deadlock. + """ + self.__shutdown_request = True + self.__is_shut_down.wait() + + def service_actions(self): + """Called by the serve_forever() loop. + + May be overridden by a subclass / Mixin to implement any code that + needs to be run during the loop. + """ + pass + + # The distinction between handling, getting, processing and + # finishing a request is fairly arbitrary. Remember: + # + # - handle_request() is the top-level call. It calls + # select, get_request(), verify_request() and process_request() + # - get_request() is different for stream or datagram sockets + # - process_request() is the place that may fork a new process + # or create a new thread to finish the request + # - finish_request() instantiates the request handler class; + # this constructor will handle the request all by itself + + def handle_request(self): + """Handle one request, possibly blocking. + + Respects self.timeout. + """ + # Support people who used socket.settimeout() to escape + # handle_request before self.timeout was available. + timeout = self.socket.gettimeout() + if timeout is None: + timeout = self.timeout + elif self.timeout is not None: + timeout = min(timeout, self.timeout) + fd_sets = _eintr_retry(select.select, [self], [], [], timeout) + if not fd_sets[0]: + self.handle_timeout() + return + self._handle_request_noblock() + + def _handle_request_noblock(self): + """Handle one request, without blocking. + + I assume that select.select has returned that the socket is + readable before this function was called, so there should be + no risk of blocking in get_request(). + """ + try: + request, client_address = self.get_request() + except socket.error: + return + if self.verify_request(request, client_address): + try: + self.process_request(request, client_address) + except: + self.handle_error(request, client_address) + self.shutdown_request(request) + + def handle_timeout(self): + """Called if no new request arrives within self.timeout. + + Overridden by ForkingMixIn. + """ + pass + + def verify_request(self, request, client_address): + """Verify the request. May be overridden. + + Return True if we should proceed with this request. + + """ + return True + + def process_request(self, request, client_address): + """Call finish_request. + + Overridden by ForkingMixIn and ThreadingMixIn. + + """ + self.finish_request(request, client_address) + self.shutdown_request(request) + + def server_close(self): + """Called to clean-up the server. + + May be overridden. + + """ + pass + + def finish_request(self, request, client_address): + """Finish one request by instantiating RequestHandlerClass.""" + self.RequestHandlerClass(request, client_address, self) + + def shutdown_request(self, request): + """Called to shutdown and close an individual request.""" + self.close_request(request) + + def close_request(self, request): + """Called to clean up an individual request.""" + pass + + def handle_error(self, request, client_address): + """Handle an error gracefully. May be overridden. + + The default is to print a traceback and continue. + + """ + print('-'*40) + print('Exception happened during processing of request from', end=' ') + print(client_address) + import traceback + traceback.print_exc() # XXX But this goes to stderr! + print('-'*40) + + +class TCPServer(BaseServer): + + """Base class for various socket-based server classes. + + Defaults to synchronous IP stream (i.e., TCP). + + Methods for the caller: + + - __init__(server_address, RequestHandlerClass, bind_and_activate=True) + - serve_forever(poll_interval=0.5) + - shutdown() + - handle_request() # if you don't use serve_forever() + - fileno() -> int # for select() + + Methods that may be overridden: + + - server_bind() + - server_activate() + - get_request() -> request, client_address + - handle_timeout() + - verify_request(request, client_address) + - process_request(request, client_address) + - shutdown_request(request) + - close_request(request) + - handle_error() + + Methods for derived classes: + + - finish_request(request, client_address) + + Class variables that may be overridden by derived classes or + instances: + + - timeout + - address_family + - socket_type + - request_queue_size (only for stream sockets) + - allow_reuse_address + + Instance variables: + + - server_address + - RequestHandlerClass + - socket + + """ + + address_family = socket.AF_INET + + socket_type = socket.SOCK_STREAM + + request_queue_size = 5 + + allow_reuse_address = False + + def __init__(self, server_address, RequestHandlerClass, bind_and_activate=True): + """Constructor. May be extended, do not override.""" + BaseServer.__init__(self, server_address, RequestHandlerClass) + self.socket = socket.socket(self.address_family, + self.socket_type) + if bind_and_activate: + self.server_bind() + self.server_activate() + + def server_bind(self): + """Called by constructor to bind the socket. + + May be overridden. + + """ + if self.allow_reuse_address: + self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + self.socket.bind(self.server_address) + self.server_address = self.socket.getsockname() + + def server_activate(self): + """Called by constructor to activate the server. + + May be overridden. + + """ + self.socket.listen(self.request_queue_size) + + def server_close(self): + """Called to clean-up the server. + + May be overridden. + + """ + self.socket.close() + + def fileno(self): + """Return socket file number. + + Interface required by select(). + + """ + return self.socket.fileno() + + def get_request(self): + """Get the request and client address from the socket. + + May be overridden. + + """ + return self.socket.accept() + + def shutdown_request(self, request): + """Called to shutdown and close an individual request.""" + try: + #explicitly shutdown. socket.close() merely releases + #the socket and waits for GC to perform the actual close. + request.shutdown(socket.SHUT_WR) + except socket.error: + pass #some platforms may raise ENOTCONN here + self.close_request(request) + + def close_request(self, request): + """Called to clean up an individual request.""" + request.close() + + +class UDPServer(TCPServer): + + """UDP server class.""" + + allow_reuse_address = False + + socket_type = socket.SOCK_DGRAM + + max_packet_size = 8192 + + def get_request(self): + data, client_addr = self.socket.recvfrom(self.max_packet_size) + return (data, self.socket), client_addr + + def server_activate(self): + # No need to call listen() for UDP. + pass + + def shutdown_request(self, request): + # No need to shutdown anything. + self.close_request(request) + + def close_request(self, request): + # No need to close anything. + pass + +class ForkingMixIn: + + """Mix-in class to handle each request in a new process.""" + + timeout = 300 + active_children = None + max_children = 40 + + def collect_children(self): + """Internal routine to wait for children that have exited.""" + if self.active_children is None: return + while len(self.active_children) >= self.max_children: + # XXX: This will wait for any child process, not just ones + # spawned by this library. This could confuse other + # libraries that expect to be able to wait for their own + # children. + try: + pid, status = os.waitpid(0, 0) + except os.error: + pid = None + if pid not in self.active_children: continue + self.active_children.remove(pid) + + # XXX: This loop runs more system calls than it ought + # to. There should be a way to put the active_children into a + # process group and then use os.waitpid(-pgid) to wait for any + # of that set, but I couldn't find a way to allocate pgids + # that couldn't collide. + for child in self.active_children: + try: + pid, status = os.waitpid(child, os.WNOHANG) + except os.error: + pid = None + if not pid: continue + try: + self.active_children.remove(pid) + except ValueError as e: + raise ValueError('%s. x=%d and list=%r' % (e.message, pid, + self.active_children)) + + def handle_timeout(self): + """Wait for zombies after self.timeout seconds of inactivity. + + May be extended, do not override. + """ + self.collect_children() + + def service_actions(self): + """Collect the zombie child processes regularly in the ForkingMixIn. + + service_actions is called in the BaseServer's serve_forver loop. + """ + self.collect_children() + + def process_request(self, request, client_address): + """Fork a new subprocess to process the request.""" + pid = os.fork() + if pid: + # Parent process + if self.active_children is None: + self.active_children = [] + self.active_children.append(pid) + self.close_request(request) + return + else: + # Child process. + # This must never return, hence os._exit()! + try: + self.finish_request(request, client_address) + self.shutdown_request(request) + os._exit(0) + except: + try: + self.handle_error(request, client_address) + self.shutdown_request(request) + finally: + os._exit(1) + + +class ThreadingMixIn: + """Mix-in class to handle each request in a new thread.""" + + # Decides how threads will act upon termination of the + # main process + daemon_threads = False + + def process_request_thread(self, request, client_address): + """Same as in BaseServer but as a thread. + + In addition, exception handling is done here. + + """ + try: + self.finish_request(request, client_address) + self.shutdown_request(request) + except: + self.handle_error(request, client_address) + self.shutdown_request(request) + + def process_request(self, request, client_address): + """Start a new thread to process the request.""" + t = threading.Thread(target = self.process_request_thread, + args = (request, client_address)) + t.daemon = self.daemon_threads + t.start() + + +class ForkingUDPServer(ForkingMixIn, UDPServer): pass +class ForkingTCPServer(ForkingMixIn, TCPServer): pass + +class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass +class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass + +if hasattr(socket, 'AF_UNIX'): + + class UnixStreamServer(TCPServer): + address_family = socket.AF_UNIX + + class UnixDatagramServer(UDPServer): + address_family = socket.AF_UNIX + + class ThreadingUnixStreamServer(ThreadingMixIn, UnixStreamServer): pass + + class ThreadingUnixDatagramServer(ThreadingMixIn, UnixDatagramServer): pass + +class BaseRequestHandler: + + """Base class for request handler classes. + + This class is instantiated for each request to be handled. The + constructor sets the instance variables request, client_address + and server, and then calls the handle() method. To implement a + specific service, all you need to do is to derive a class which + defines a handle() method. + + The handle() method can find the request as self.request, the + client address as self.client_address, and the server (in case it + needs access to per-server information) as self.server. Since a + separate instance is created for each request, the handle() method + can define arbitrary other instance variariables. + + """ + + def __init__(self, request, client_address, server): + self.request = request + self.client_address = client_address + self.server = server + self.setup() + try: + self.handle() + finally: + self.finish() + + def setup(self): + pass + + def handle(self): + pass + + def finish(self): + pass + + +# The following two classes make it possible to use the same service +# class for stream or datagram servers. +# Each class sets up these instance variables: +# - rfile: a file object from which receives the request is read +# - wfile: a file object to which the reply is written +# When the handle() method returns, wfile is flushed properly + + +class StreamRequestHandler(BaseRequestHandler): + + """Define self.rfile and self.wfile for stream sockets.""" + + # Default buffer sizes for rfile, wfile. + # We default rfile to buffered because otherwise it could be + # really slow for large data (a getc() call per byte); we make + # wfile unbuffered because (a) often after a write() we want to + # read and we need to flush the line; (b) big writes to unbuffered + # files are typically optimized by stdio even when big reads + # aren't. + rbufsize = -1 + wbufsize = 0 + + # A timeout to apply to the request socket, if not None. + timeout = None + + # Disable nagle algorithm for this socket, if True. + # Use only when wbufsize != 0, to avoid small packets. + disable_nagle_algorithm = False + + def setup(self): + self.connection = self.request + if self.timeout is not None: + self.connection.settimeout(self.timeout) + if self.disable_nagle_algorithm: + self.connection.setsockopt(socket.IPPROTO_TCP, + socket.TCP_NODELAY, True) + self.rfile = self.connection.makefile('rb', self.rbufsize) + self.wfile = self.connection.makefile('wb', self.wbufsize) + + def finish(self): + if not self.wfile.closed: + try: + self.wfile.flush() + except socket.error: + # An final socket error may have occurred here, such as + # the local error ECONNABORTED. + pass + self.wfile.close() + self.rfile.close() + + +class DatagramRequestHandler(BaseRequestHandler): + + # XXX Regrettably, I cannot get this working on Linux; + # s.recvfrom() doesn't return a meaningful client address. + + """Define self.rfile and self.wfile for datagram sockets.""" + + def setup(self): + from io import BytesIO + self.packet, self.socket = self.request + self.rfile = BytesIO(self.packet) + self.wfile = BytesIO() + + def finish(self): + self.socket.sendto(self.wfile.getvalue(), self.client_address) diff --git a/tests/bytecode/pylib-tests/sre_constants.py b/tests/bytecode/pylib-tests/sre_constants.py new file mode 100644 index 0000000000..5898d5411a --- /dev/null +++ b/tests/bytecode/pylib-tests/sre_constants.py @@ -0,0 +1,259 @@ +# +# Secret Labs' Regular Expression Engine +# +# various symbols used by the regular expression engine. +# run this script to update the _sre include files! +# +# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. +# +# See the sre.py file for information on usage and redistribution. +# + +"""Internal support module for sre""" + +# update when constants are added or removed + +MAGIC = 20031017 + +from _sre import MAXREPEAT + +# SRE standard exception (access as sre.error) +# should this really be here? + +class error(Exception): + pass + +# operators + +FAILURE = "failure" +SUCCESS = "success" + +ANY = "any" +ANY_ALL = "any_all" +ASSERT = "assert" +ASSERT_NOT = "assert_not" +AT = "at" +BIGCHARSET = "bigcharset" +BRANCH = "branch" +CALL = "call" +CATEGORY = "category" +CHARSET = "charset" +GROUPREF = "groupref" +GROUPREF_IGNORE = "groupref_ignore" +GROUPREF_EXISTS = "groupref_exists" +IN = "in" +IN_IGNORE = "in_ignore" +INFO = "info" +JUMP = "jump" +LITERAL = "literal" +LITERAL_IGNORE = "literal_ignore" +MARK = "mark" +MAX_REPEAT = "max_repeat" +MAX_UNTIL = "max_until" +MIN_REPEAT = "min_repeat" +MIN_UNTIL = "min_until" +NEGATE = "negate" +NOT_LITERAL = "not_literal" +NOT_LITERAL_IGNORE = "not_literal_ignore" +RANGE = "range" +REPEAT = "repeat" +REPEAT_ONE = "repeat_one" +SUBPATTERN = "subpattern" +MIN_REPEAT_ONE = "min_repeat_one" + +# positions +AT_BEGINNING = "at_beginning" +AT_BEGINNING_LINE = "at_beginning_line" +AT_BEGINNING_STRING = "at_beginning_string" +AT_BOUNDARY = "at_boundary" +AT_NON_BOUNDARY = "at_non_boundary" +AT_END = "at_end" +AT_END_LINE = "at_end_line" +AT_END_STRING = "at_end_string" +AT_LOC_BOUNDARY = "at_loc_boundary" +AT_LOC_NON_BOUNDARY = "at_loc_non_boundary" +AT_UNI_BOUNDARY = "at_uni_boundary" +AT_UNI_NON_BOUNDARY = "at_uni_non_boundary" + +# categories +CATEGORY_DIGIT = "category_digit" +CATEGORY_NOT_DIGIT = "category_not_digit" +CATEGORY_SPACE = "category_space" +CATEGORY_NOT_SPACE = "category_not_space" +CATEGORY_WORD = "category_word" +CATEGORY_NOT_WORD = "category_not_word" +CATEGORY_LINEBREAK = "category_linebreak" +CATEGORY_NOT_LINEBREAK = "category_not_linebreak" +CATEGORY_LOC_WORD = "category_loc_word" +CATEGORY_LOC_NOT_WORD = "category_loc_not_word" +CATEGORY_UNI_DIGIT = "category_uni_digit" +CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" +CATEGORY_UNI_SPACE = "category_uni_space" +CATEGORY_UNI_NOT_SPACE = "category_uni_not_space" +CATEGORY_UNI_WORD = "category_uni_word" +CATEGORY_UNI_NOT_WORD = "category_uni_not_word" +CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" +CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" + +OPCODES = [ + + # failure=0 success=1 (just because it looks better that way :-) + FAILURE, SUCCESS, + + ANY, ANY_ALL, + ASSERT, ASSERT_NOT, + AT, + BRANCH, + CALL, + CATEGORY, + CHARSET, BIGCHARSET, + GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE, + IN, IN_IGNORE, + INFO, + JUMP, + LITERAL, LITERAL_IGNORE, + MARK, + MAX_UNTIL, + MIN_UNTIL, + NOT_LITERAL, NOT_LITERAL_IGNORE, + NEGATE, + RANGE, + REPEAT, + REPEAT_ONE, + SUBPATTERN, + MIN_REPEAT_ONE + +] + +ATCODES = [ + AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, + AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING, + AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY, + AT_UNI_NON_BOUNDARY +] + +CHCODES = [ + CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, + CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, + CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, + CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, + CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, + CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, + CATEGORY_UNI_NOT_LINEBREAK +] + +def makedict(list): + d = {} + i = 0 + for item in list: + d[item] = i + i = i + 1 + return d + +OPCODES = makedict(OPCODES) +ATCODES = makedict(ATCODES) +CHCODES = makedict(CHCODES) + +# replacement operations for "ignore case" mode +OP_IGNORE = { + GROUPREF: GROUPREF_IGNORE, + IN: IN_IGNORE, + LITERAL: LITERAL_IGNORE, + NOT_LITERAL: NOT_LITERAL_IGNORE +} + +AT_MULTILINE = { + AT_BEGINNING: AT_BEGINNING_LINE, + AT_END: AT_END_LINE +} + +AT_LOCALE = { + AT_BOUNDARY: AT_LOC_BOUNDARY, + AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY +} + +AT_UNICODE = { + AT_BOUNDARY: AT_UNI_BOUNDARY, + AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY +} + +CH_LOCALE = { + CATEGORY_DIGIT: CATEGORY_DIGIT, + CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, + CATEGORY_SPACE: CATEGORY_SPACE, + CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, + CATEGORY_WORD: CATEGORY_LOC_WORD, + CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, + CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, + CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK +} + +CH_UNICODE = { + CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, + CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, + CATEGORY_SPACE: CATEGORY_UNI_SPACE, + CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, + CATEGORY_WORD: CATEGORY_UNI_WORD, + CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, + CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, + CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK +} + +# flags +SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) +SRE_FLAG_IGNORECASE = 2 # case insensitive +SRE_FLAG_LOCALE = 4 # honour system locale +SRE_FLAG_MULTILINE = 8 # treat target as multiline string +SRE_FLAG_DOTALL = 16 # treat target as a single string +SRE_FLAG_UNICODE = 32 # use unicode "locale" +SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments +SRE_FLAG_DEBUG = 128 # debugging +SRE_FLAG_ASCII = 256 # use ascii "locale" + +# flags for INFO primitive +SRE_INFO_PREFIX = 1 # has prefix +SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) +SRE_INFO_CHARSET = 4 # pattern starts with character from given set + +if __name__ == "__main__": + def dump(f, d, prefix): + items = sorted(d.items(), key=lambda a: a[1]) + for k, v in items: + f.write("#define %s_%s %s\n" % (prefix, k.upper(), v)) + f = open("sre_constants.h", "w") + f.write("""\ +/* + * Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * NOTE: This file is generated by sre_constants.py. If you need + * to change anything in here, edit sre_constants.py and run it. + * + * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. + * + * See the _sre.c file for information on usage and redistribution. + */ + +""") + + f.write("#define SRE_MAGIC %d\n" % MAGIC) + + dump(f, OPCODES, "SRE_OP") + dump(f, ATCODES, "SRE") + dump(f, CHCODES, "SRE") + + f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE) + f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE) + f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE) + f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE) + f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL) + f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE) + f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE) + + f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX) + f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL) + f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET) + + f.close() + print("done") diff --git a/tests/bytecode/pylib-tests/stat.py b/tests/bytecode/pylib-tests/stat.py new file mode 100644 index 0000000000..704adfe2e1 --- /dev/null +++ b/tests/bytecode/pylib-tests/stat.py @@ -0,0 +1,149 @@ +"""Constants/functions for interpreting results of os.stat() and os.lstat(). + +Suggested usage: from stat import * +""" + +# Indices for stat struct members in the tuple returned by os.stat() + +ST_MODE = 0 +ST_INO = 1 +ST_DEV = 2 +ST_NLINK = 3 +ST_UID = 4 +ST_GID = 5 +ST_SIZE = 6 +ST_ATIME = 7 +ST_MTIME = 8 +ST_CTIME = 9 + +# Extract bits from the mode + +def S_IMODE(mode): + """Return the portion of the file's mode that can be set by + os.chmod(). + """ + return mode & 0o7777 + +def S_IFMT(mode): + """Return the portion of the file's mode that describes the + file type. + """ + return mode & 0o170000 + +# Constants used as S_IFMT() for various file types +# (not all are implemented on all systems) + +S_IFDIR = 0o040000 # directory +S_IFCHR = 0o020000 # character device +S_IFBLK = 0o060000 # block device +S_IFREG = 0o100000 # regular file +S_IFIFO = 0o010000 # fifo (named pipe) +S_IFLNK = 0o120000 # symbolic link +S_IFSOCK = 0o140000 # socket file + +# Functions to test for each file type + +def S_ISDIR(mode): + """Return True if mode is from a directory.""" + return S_IFMT(mode) == S_IFDIR + +def S_ISCHR(mode): + """Return True if mode is from a character special device file.""" + return S_IFMT(mode) == S_IFCHR + +def S_ISBLK(mode): + """Return True if mode is from a block special device file.""" + return S_IFMT(mode) == S_IFBLK + +def S_ISREG(mode): + """Return True if mode is from a regular file.""" + return S_IFMT(mode) == S_IFREG + +def S_ISFIFO(mode): + """Return True if mode is from a FIFO (named pipe).""" + return S_IFMT(mode) == S_IFIFO + +def S_ISLNK(mode): + """Return True if mode is from a symbolic link.""" + return S_IFMT(mode) == S_IFLNK + +def S_ISSOCK(mode): + """Return True if mode is from a socket.""" + return S_IFMT(mode) == S_IFSOCK + +# Names for permission bits + +S_ISUID = 0o4000 # set UID bit +S_ISGID = 0o2000 # set GID bit +S_ENFMT = S_ISGID # file locking enforcement +S_ISVTX = 0o1000 # sticky bit +S_IREAD = 0o0400 # Unix V7 synonym for S_IRUSR +S_IWRITE = 0o0200 # Unix V7 synonym for S_IWUSR +S_IEXEC = 0o0100 # Unix V7 synonym for S_IXUSR +S_IRWXU = 0o0700 # mask for owner permissions +S_IRUSR = 0o0400 # read by owner +S_IWUSR = 0o0200 # write by owner +S_IXUSR = 0o0100 # execute by owner +S_IRWXG = 0o0070 # mask for group permissions +S_IRGRP = 0o0040 # read by group +S_IWGRP = 0o0020 # write by group +S_IXGRP = 0o0010 # execute by group +S_IRWXO = 0o0007 # mask for others (not in group) permissions +S_IROTH = 0o0004 # read by others +S_IWOTH = 0o0002 # write by others +S_IXOTH = 0o0001 # execute by others + +# Names for file flags + +UF_NODUMP = 0x00000001 # do not dump file +UF_IMMUTABLE = 0x00000002 # file may not be changed +UF_APPEND = 0x00000004 # file may only be appended to +UF_OPAQUE = 0x00000008 # directory is opaque when viewed through a union stack +UF_NOUNLINK = 0x00000010 # file may not be renamed or deleted +UF_COMPRESSED = 0x00000020 # OS X: file is hfs-compressed +UF_HIDDEN = 0x00008000 # OS X: file should not be displayed +SF_ARCHIVED = 0x00010000 # file may be archived +SF_IMMUTABLE = 0x00020000 # file may not be changed +SF_APPEND = 0x00040000 # file may only be appended to +SF_NOUNLINK = 0x00100000 # file may not be renamed or deleted +SF_SNAPSHOT = 0x00200000 # file is a snapshot file + + +_filemode_table = ( + ((S_IFLNK, "l"), + (S_IFREG, "-"), + (S_IFBLK, "b"), + (S_IFDIR, "d"), + (S_IFCHR, "c"), + (S_IFIFO, "p")), + + ((S_IRUSR, "r"),), + ((S_IWUSR, "w"),), + ((S_IXUSR|S_ISUID, "s"), + (S_ISUID, "S"), + (S_IXUSR, "x")), + + ((S_IRGRP, "r"),), + ((S_IWGRP, "w"),), + ((S_IXGRP|S_ISGID, "s"), + (S_ISGID, "S"), + (S_IXGRP, "x")), + + ((S_IROTH, "r"),), + ((S_IWOTH, "w"),), + ((S_IXOTH|S_ISVTX, "t"), + (S_ISVTX, "T"), + (S_IXOTH, "x")) +) + +def filemode(mode): + """Convert a file's mode to a string of the form '-rwxrwxrwx'.""" + perm = [] + for table in _filemode_table: + for bit, char in table: + if mode & bit == bit: + perm.append(char) + break + else: + perm.append("-") + return "".join(perm) diff --git a/tests/bytecode/pylib-tests/struct.py b/tests/bytecode/pylib-tests/struct.py new file mode 100644 index 0000000000..9bfc23f8d5 --- /dev/null +++ b/tests/bytecode/pylib-tests/struct.py @@ -0,0 +1,14 @@ +__all__ = [ + # Functions + 'calcsize', 'pack', 'pack_into', 'unpack', 'unpack_from', + + # Classes + 'Struct', + + # Exceptions + 'error' + ] + +from _struct import * +from _struct import _clearcache +from _struct import __doc__ diff --git a/tests/bytecode/pylib-tests/sunau.py b/tests/bytecode/pylib-tests/sunau.py new file mode 100644 index 0000000000..84180298be --- /dev/null +++ b/tests/bytecode/pylib-tests/sunau.py @@ -0,0 +1,485 @@ +"""Stuff to parse Sun and NeXT audio files. + +An audio file consists of a header followed by the data. The structure +of the header is as follows. + + +---------------+ + | magic word | + +---------------+ + | header size | + +---------------+ + | data size | + +---------------+ + | encoding | + +---------------+ + | sample rate | + +---------------+ + | # of channels | + +---------------+ + | info | + | | + +---------------+ + +The magic word consists of the 4 characters '.snd'. Apart from the +info field, all header fields are 4 bytes in size. They are all +32-bit unsigned integers encoded in big-endian byte order. + +The header size really gives the start of the data. +The data size is the physical size of the data. From the other +parameters the number of frames can be calculated. +The encoding gives the way in which audio samples are encoded. +Possible values are listed below. +The info field currently consists of an ASCII string giving a +human-readable description of the audio file. The info field is +padded with NUL bytes to the header size. + +Usage. + +Reading audio files: + f = sunau.open(file, 'r') +where file is either the name of a file or an open file pointer. +The open file pointer must have methods read(), seek(), and close(). +When the setpos() and rewind() methods are not used, the seek() +method is not necessary. + +This returns an instance of a class with the following public methods: + getnchannels() -- returns number of audio channels (1 for + mono, 2 for stereo) + getsampwidth() -- returns sample width in bytes + getframerate() -- returns sampling frequency + getnframes() -- returns number of audio frames + getcomptype() -- returns compression type ('NONE' or 'ULAW') + getcompname() -- returns human-readable version of + compression type ('not compressed' matches 'NONE') + getparams() -- returns a tuple consisting of all of the + above in the above order + getmarkers() -- returns None (for compatibility with the + aifc module) + getmark(id) -- raises an error since the mark does not + exist (for compatibility with the aifc module) + readframes(n) -- returns at most n frames of audio + rewind() -- rewind to the beginning of the audio stream + setpos(pos) -- seek to the specified position + tell() -- return the current position + close() -- close the instance (make it unusable) +The position returned by tell() and the position given to setpos() +are compatible and have nothing to do with the actual position in the +file. +The close() method is called automatically when the class instance +is destroyed. + +Writing audio files: + f = sunau.open(file, 'w') +where file is either the name of a file or an open file pointer. +The open file pointer must have methods write(), tell(), seek(), and +close(). + +This returns an instance of a class with the following public methods: + setnchannels(n) -- set the number of channels + setsampwidth(n) -- set the sample width + setframerate(n) -- set the frame rate + setnframes(n) -- set the number of frames + setcomptype(type, name) + -- set the compression type and the + human-readable compression type + setparams(tuple)-- set all parameters at once + tell() -- return current position in output file + writeframesraw(data) + -- write audio frames without pathing up the + file header + writeframes(data) + -- write audio frames and patch up the file header + close() -- patch up the file header and close the + output file +You should set the parameters before the first writeframesraw or +writeframes. The total number of frames does not need to be set, +but when it is set to the correct value, the header does not have to +be patched up. +It is best to first set all parameters, perhaps possibly the +compression type, and then write audio frames using writeframesraw. +When all frames have been written, either call writeframes('') or +close() to patch up the sizes in the header. +The close() method is called automatically when the class instance +is destroyed. +""" + +# from <multimedia/audio_filehdr.h> +AUDIO_FILE_MAGIC = 0x2e736e64 +AUDIO_FILE_ENCODING_MULAW_8 = 1 +AUDIO_FILE_ENCODING_LINEAR_8 = 2 +AUDIO_FILE_ENCODING_LINEAR_16 = 3 +AUDIO_FILE_ENCODING_LINEAR_24 = 4 +AUDIO_FILE_ENCODING_LINEAR_32 = 5 +AUDIO_FILE_ENCODING_FLOAT = 6 +AUDIO_FILE_ENCODING_DOUBLE = 7 +AUDIO_FILE_ENCODING_ADPCM_G721 = 23 +AUDIO_FILE_ENCODING_ADPCM_G722 = 24 +AUDIO_FILE_ENCODING_ADPCM_G723_3 = 25 +AUDIO_FILE_ENCODING_ADPCM_G723_5 = 26 +AUDIO_FILE_ENCODING_ALAW_8 = 27 + +# from <multimedia/audio_hdr.h> +AUDIO_UNKNOWN_SIZE = 0xFFFFFFFF # ((unsigned)(~0)) + +_simple_encodings = [AUDIO_FILE_ENCODING_MULAW_8, + AUDIO_FILE_ENCODING_LINEAR_8, + AUDIO_FILE_ENCODING_LINEAR_16, + AUDIO_FILE_ENCODING_LINEAR_24, + AUDIO_FILE_ENCODING_LINEAR_32, + AUDIO_FILE_ENCODING_ALAW_8] + +class Error(Exception): + pass + +def _read_u32(file): + x = 0 + for i in range(4): + byte = file.read(1) + if not byte: + raise EOFError + x = x*256 + ord(byte) + return x + +def _write_u32(file, x): + data = [] + for i in range(4): + d, m = divmod(x, 256) + data.insert(0, int(m)) + x = d + file.write(bytes(data)) + +class Au_read: + + def __init__(self, f): + if type(f) == type(''): + import builtins + f = builtins.open(f, 'rb') + self._opened = True + else: + self._opened = False + self.initfp(f) + + def __del__(self): + if self._file: + self.close() + + def initfp(self, file): + self._file = file + self._soundpos = 0 + magic = int(_read_u32(file)) + if magic != AUDIO_FILE_MAGIC: + raise Error('bad magic number') + self._hdr_size = int(_read_u32(file)) + if self._hdr_size < 24: + raise Error('header size too small') + if self._hdr_size > 100: + raise Error('header size ridiculously large') + self._data_size = _read_u32(file) + if self._data_size != AUDIO_UNKNOWN_SIZE: + self._data_size = int(self._data_size) + self._encoding = int(_read_u32(file)) + if self._encoding not in _simple_encodings: + raise Error('encoding not (yet) supported') + if self._encoding in (AUDIO_FILE_ENCODING_MULAW_8, + AUDIO_FILE_ENCODING_ALAW_8): + self._sampwidth = 2 + self._framesize = 1 + elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_8: + self._framesize = self._sampwidth = 1 + elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_16: + self._framesize = self._sampwidth = 2 + elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_24: + self._framesize = self._sampwidth = 3 + elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_32: + self._framesize = self._sampwidth = 4 + else: + raise Error('unknown encoding') + self._framerate = int(_read_u32(file)) + self._nchannels = int(_read_u32(file)) + self._framesize = self._framesize * self._nchannels + if self._hdr_size > 24: + self._info = file.read(self._hdr_size - 24) + for i in range(len(self._info)): + if self._info[i] == b'\0': + self._info = self._info[:i] + break + else: + self._info = '' + + def getfp(self): + return self._file + + def getnchannels(self): + return self._nchannels + + def getsampwidth(self): + return self._sampwidth + + def getframerate(self): + return self._framerate + + def getnframes(self): + if self._data_size == AUDIO_UNKNOWN_SIZE: + return AUDIO_UNKNOWN_SIZE + if self._encoding in _simple_encodings: + return self._data_size / self._framesize + return 0 # XXX--must do some arithmetic here + + def getcomptype(self): + if self._encoding == AUDIO_FILE_ENCODING_MULAW_8: + return 'ULAW' + elif self._encoding == AUDIO_FILE_ENCODING_ALAW_8: + return 'ALAW' + else: + return 'NONE' + + def getcompname(self): + if self._encoding == AUDIO_FILE_ENCODING_MULAW_8: + return 'CCITT G.711 u-law' + elif self._encoding == AUDIO_FILE_ENCODING_ALAW_8: + return 'CCITT G.711 A-law' + else: + return 'not compressed' + + def getparams(self): + return self.getnchannels(), self.getsampwidth(), \ + self.getframerate(), self.getnframes(), \ + self.getcomptype(), self.getcompname() + + def getmarkers(self): + return None + + def getmark(self, id): + raise Error('no marks') + + def readframes(self, nframes): + if self._encoding in _simple_encodings: + if nframes == AUDIO_UNKNOWN_SIZE: + data = self._file.read() + else: + data = self._file.read(nframes * self._framesize * self._nchannels) + if self._encoding == AUDIO_FILE_ENCODING_MULAW_8: + import audioop + data = audioop.ulaw2lin(data, self._sampwidth) + return data + return None # XXX--not implemented yet + + def rewind(self): + self._soundpos = 0 + self._file.seek(self._hdr_size) + + def tell(self): + return self._soundpos + + def setpos(self, pos): + if pos < 0 or pos > self.getnframes(): + raise Error('position not in range') + self._file.seek(pos * self._framesize + self._hdr_size) + self._soundpos = pos + + def close(self): + if self._opened and self._file: + self._file.close() + self._file = None + +class Au_write: + + def __init__(self, f): + if type(f) == type(''): + import builtins + f = builtins.open(f, 'wb') + self._opened = True + else: + self._opened = False + self.initfp(f) + + def __del__(self): + if self._file: + self.close() + self._file = None + + def initfp(self, file): + self._file = file + self._framerate = 0 + self._nchannels = 0 + self._sampwidth = 0 + self._framesize = 0 + self._nframes = AUDIO_UNKNOWN_SIZE + self._nframeswritten = 0 + self._datawritten = 0 + self._datalength = 0 + self._info = b'' + self._comptype = 'ULAW' # default is U-law + + def setnchannels(self, nchannels): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + if nchannels not in (x+1, 2, 4): + raise Error('only 1, 2, or 4 channels supported') + self._nchannels = nchannels + + def getnchannels(self): + if not self._nchannels: + raise Error('number of channels not set') + return self._nchannels + + def setsampwidth(self, sampwidth): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + if sampwidth not in (x+1, 2, 4): + raise Error('bad sample width') + self._sampwidth = sampwidth + + def getsampwidth(self): + if not self._framerate: + raise Error('sample width not specified') + return self._sampwidth + + def setframerate(self, framerate): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + self._framerate = framerate + + def getframerate(self): + if not self._framerate: + raise Error('frame rate not set') + return self._framerate + + def setnframes(self, nframes): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + if nframes < 0: + raise Error('# of frames cannot be negative') + self._nframes = nframes + + def getnframes(self): + return self._nframeswritten + + def setcomptype(self, type, name): + if type in (0+'NONE', 'ULAW'): + self._comptype = type + else: + raise Error('unknown compression type') + + def getcomptype(self): + return self._comptype + + def getcompname(self): + if self._comptype == 'ULAW': + return 'CCITT G.711 u-law' + elif self._comptype == 'ALAW': + return 'CCITT G.711 A-law' + else: + return 'not compressed' + + def setparams(self, params): + nchannels, sampwidth, framerate, nframes, comptype, compname = params + self.setnchannels(nchannels) + self.setsampwidth(sampwidth) + self.setframerate(framerate) + self.setnframes(nframes) + self.setcomptype(comptype, compname) + + def getparams(self): + return self.getnchannels(), self.getsampwidth(), \ + self.getframerate(), self.getnframes(), \ + self.getcomptype(), self.getcompname() + + def tell(self): + return self._nframeswritten + + def writeframesraw(self, data): + self._ensure_header_written() + nframes = len(data) / self._framesize + if self._comptype == 'ULAW': + import audioop + data = audioop.lin2ulaw(data, self._sampwidth) + self._file.write(data) + self._nframeswritten = self._nframeswritten + nframes + self._datawritten = self._datawritten + len(data) + + def writeframes(self, data): + self.writeframesraw(data) + if self._nframeswritten != self._nframes or \ + self._datalength != self._datawritten: + self._patchheader() + + def close(self): + self._ensure_header_written() + if self._nframeswritten != self._nframes or \ + self._datalength != self._datawritten: + self._patchheader() + self._file.flush() + if self._opened and self._file: + self._file.close() + self._file = None + + # + # private methods + # + + def _ensure_header_written(self): + if not self._nframeswritten: + if not self._nchannels: + raise Error('# of channels not specified') + if not self._sampwidth: + raise Error('sample width not specified') + if not self._framerate: + raise Error('frame rate not specified') + self._write_header() + + def _write_header(self): + if self._comptype == 'NONE': + if self._sampwidth == 1: + encoding = AUDIO_FILE_ENCODING_LINEAR_8 + self._framesize = 1 + elif self._sampwidth == 2: + encoding = AUDIO_FILE_ENCODING_LINEAR_16 + self._framesize = 2 + elif self._sampwidth == 4: + encoding = AUDIO_FILE_ENCODING_LINEAR_32 + self._framesize = 4 + else: + raise Error('internal error') + elif self._comptype == 'ULAW': + encoding = AUDIO_FILE_ENCODING_MULAW_8 + self._framesize = 1 + else: + raise Error('internal error') + self._framesize = self._framesize * self._nchannels + _write_u32(self._file, AUDIO_FILE_MAGIC) + header_size = 25 + len(self._info) + header_size = (header_size + 7) & ~7 + _write_u32(self._file, header_size) + if self._nframes == AUDIO_UNKNOWN_SIZE: + length = AUDIO_UNKNOWN_SIZE + else: + length = self._nframes * self._framesize + _write_u32(self._file, length) + self._datalength = length + _write_u32(self._file, encoding) + _write_u32(self._file, self._framerate) + _write_u32(self._file, self._nchannels) + self._file.write(self._info) + self._file.write(b'\0'*(header_size - len(self._info) - 24)) + + def _patchheader(self): + self._file.seek(8) + _write_u32(self._file, self._datawritten) + self._datalength = self._datawritten + self._file.seek(0, 2) + +def open(f, mode=None): + if mode is None: + if hasattr(f, 'mode'): + mode = f.mode + else: + mode = 'rb' + if mode in (0+'r', 'rb'): + return Au_read(f) + elif mode in (0+'w', 'wb'): + return Au_write(f) + else: + raise Error("mode must be 'r', 'rb', 'w', or 'wb'") + +openfp = open diff --git a/tests/bytecode/pylib-tests/symbol.py b/tests/bytecode/pylib-tests/symbol.py new file mode 100644 index 0000000000..34143b5d8e --- /dev/null +++ b/tests/bytecode/pylib-tests/symbol.py @@ -0,0 +1,111 @@ +#! /usr/bin/env python3 + +"""Non-terminal symbols of Python grammar (from "graminit.h").""" + +# This file is automatically generated; please don't muck it up! +# +# To update the symbols in this file, 'cd' to the top directory of +# the python source tree after building the interpreter and run: +# +# ./python Lib/symbol.py + +#--start constants-- +single_input = 256 +file_input = 257 +eval_input = 258 +decorator = 259 +decorators = 260 +decorated = 261 +funcdef = 262 +parameters = 263 +typedargslist = 264 +tfpdef = 265 +varargslist = 266 +vfpdef = 267 +stmt = 268 +simple_stmt = 269 +small_stmt = 270 +expr_stmt = 271 +testlist_star_expr = 272 +augassign = 273 +del_stmt = 274 +pass_stmt = 275 +flow_stmt = 276 +break_stmt = 277 +continue_stmt = 278 +return_stmt = 279 +yield_stmt = 280 +raise_stmt = 281 +import_stmt = 282 +import_name = 283 +import_from = 284 +import_as_name = 285 +dotted_as_name = 286 +import_as_names = 287 +dotted_as_names = 288 +dotted_name = 289 +global_stmt = 290 +nonlocal_stmt = 291 +assert_stmt = 292 +compound_stmt = 293 +if_stmt = 294 +while_stmt = 295 +for_stmt = 296 +try_stmt = 297 +with_stmt = 298 +with_item = 299 +except_clause = 300 +suite = 301 +test = 302 +test_nocond = 303 +lambdef = 304 +lambdef_nocond = 305 +or_test = 306 +and_test = 307 +not_test = 308 +comparison = 309 +comp_op = 310 +star_expr = 311 +expr = 312 +xor_expr = 313 +and_expr = 314 +shift_expr = 315 +arith_expr = 316 +term = 317 +factor = 318 +power = 319 +atom = 320 +testlist_comp = 321 +trailer = 322 +subscriptlist = 323 +subscript = 324 +sliceop = 325 +exprlist = 326 +testlist = 327 +dictorsetmaker = 328 +classdef = 329 +arglist = 330 +argument = 331 +comp_iter = 332 +comp_for = 333 +comp_if = 334 +encoding_decl = 335 +yield_expr = 336 +yield_arg = 337 +#--end constants-- + +sym_name = {} +for _name, _value in list(globals().items()): + if type(_value) is type(0): + sym_name[_value] = _name + + +def main(): + import sys + import token + if len(sys.argv) == 1: + sys.argv = sys.argv + ["Include/graminit.h", "Lib/symbol.py"] + token._main() + +if __name__ == "__main__": + main() diff --git a/tests/bytecode/pylib-tests/tabnanny.py b/tests/bytecode/pylib-tests/tabnanny.py new file mode 100644 index 0000000000..5b9b444c1a --- /dev/null +++ b/tests/bytecode/pylib-tests/tabnanny.py @@ -0,0 +1,332 @@ +#! /usr/bin/env python3 + +"""The Tab Nanny despises ambiguous indentation. She knows no mercy. + +tabnanny -- Detection of ambiguous indentation + +For the time being this module is intended to be called as a script. +However it is possible to import it into an IDE and use the function +check() described below. + +Warning: The API provided by this module is likely to change in future +releases; such changes may not be backward compatible. +""" + +# Released to the public domain, by Tim Peters, 15 April 1998. + +# XXX Note: this is now a standard library module. +# XXX The API needs to undergo changes however; the current code is too +# XXX script-like. This will be addressed later. + +__version__ = "6" + +import os +import sys +import getopt +import tokenize +if not hasattr(tokenize, 'NL'): + raise ValueError("tokenize.NL doesn't exist -- tokenize module too old") + +__all__ = ["check", "NannyNag", "process_tokens"] + +verbose = 0 +filename_only = 0 + +def errprint(*args): + sep = "" + for arg in args: + sys.stderr.write(sep + str(arg)) + sep = " " + sys.stderr.write("\n") + +def main(): + global verbose, filename_only + try: + opts, args = getopt.getopt(sys.argv[1:], "qv") + except getopt.error as msg: + errprint(msg) + return + for o, a in opts: + if o == '-q': + filename_only = filename_only + 1 + if o == '-v': + verbose = verbose + 1 + if not args: + errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...") + return + for arg in args: + check(arg) + +class NannyNag(Exception): + """ + Raised by tokeneater() if detecting an ambiguous indent. + Captured and handled in check(). + """ + def __init__(self, lineno, msg, line): + self.lineno, self.msg, self.line = lineno, msg, line + def get_lineno(self): + return self.lineno + def get_msg(self): + return self.msg + def get_line(self): + return self.line + +def check(file): + """check(file_or_dir) + + If file_or_dir is a directory and not a symbolic link, then recursively + descend the directory tree named by file_or_dir, checking all .py files + along the way. If file_or_dir is an ordinary Python source file, it is + checked for whitespace related problems. The diagnostic messages are + written to standard output using the print statement. + """ + + if os.path.isdir(file) and not os.path.islink(file): + if verbose: + print("%r: listing directory" % (file,)) + names = os.listdir(file) + for name in names: + fullname = os.path.join(file, name) + if (os.path.isdir(fullname) and + not os.path.islink(fullname) or + os.path.normcase(name[-3:]) == ".py"): + check(fullname) + return + + try: + f = tokenize.open(file) + except IOError as msg: + errprint("%r: I/O Error: %s" % (file, msg)) + return + + if verbose > 1: + print("checking %r ..." % file) + + try: + process_tokens(tokenize.generate_tokens(f.readline)) + + except tokenize.TokenError as msg: + errprint("%r: Token Error: %s" % (file, msg)) + return + + except IndentationError as msg: + errprint("%r: Indentation Error: %s" % (file, msg)) + return + + except NannyNag as nag: + badline = nag.get_lineno() + line = nag.get_line() + if verbose: + print("%r: *** Line %d: trouble in tab city! ***" % (file, badline)) + print("offending line: %r" % (line,)) + print(nag.get_msg()) + else: + if ' ' in file: file = '"' + file + '"' + if filename_only: print(file) + else: print(file, badline, repr(line)) + return + + finally: + f.close() + + if verbose: + print("%r: Clean bill of health." % (file,)) + +class Whitespace: + # the characters used for space and tab + S, T = ' \t' + + # members: + # raw + # the original string + # n + # the number of leading whitespace characters in raw + # nt + # the number of tabs in raw[:n] + # norm + # the normal form as a pair (count, trailing), where: + # count + # a tuple such that raw[:n] contains count[i] + # instances of S * i + T + # trailing + # the number of trailing spaces in raw[:n] + # It's A Theorem that m.indent_level(t) == + # n.indent_level(t) for all t >= 1 iff m.norm == n.norm. + # is_simple + # true iff raw[:n] is of the form (T*)(S*) + + def __init__(self, ws): + self.raw = ws + S, T = Whitespace.S, Whitespace.T + count = [] + b = n = nt = 0 + for ch in self.raw: + if ch == S: + n = n + 1 + b = b + 1 + elif ch == T: + n = n + 1 + nt = nt + 1 + if b >= len(count): + count = count + [0] * (b - len(count) + 1) + count[b] = count[b] + 1 + b = 0 + else: + break + self.n = n + self.nt = nt + self.norm = tuple(count), b + self.is_simple = len(count) <= 1 + + # return length of longest contiguous run of spaces (whether or not + # preceding a tab) + def longest_run_of_spaces(self): + count, trailing = self.norm + return max(len(count)-1, trailing) + + def indent_level(self, tabsize): + # count, il = self.norm + # for i in range(len(count)): + # if count[i]: + # il = il + (i//tabsize + 1)*tabsize * count[i] + # return il + + # quicker: + # il = trailing + sum (i//ts + 1)*ts*count[i] = + # trailing + ts * sum (i//ts + 1)*count[i] = + # trailing + ts * sum i//ts*count[i] + count[i] = + # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] = + # trailing + ts * [(sum i//ts*count[i]) + num_tabs] + # and note that i//ts*count[i] is 0 when i < ts + + count, trailing = self.norm + il = 0 + for i in range(tabsize, len(count)): + il = il + i//tabsize * count[i] + return trailing + tabsize * (il + self.nt) + + # return true iff self.indent_level(t) == other.indent_level(t) + # for all t >= 1 + def equal(self, other): + return self.norm == other.norm + + # return a list of tuples (ts, i1, i2) such that + # i1 == self.indent_level(ts) != other.indent_level(ts) == i2. + # Intended to be used after not self.equal(other) is known, in which + # case it will return at least one witnessing tab size. + def not_equal_witness(self, other): + n = max(self.longest_run_of_spaces(), + other.longest_run_of_spaces()) + 1 + a = [] + for ts in range(1, n+1): + if self.indent_level(ts) != other.indent_level(ts): + a.append( (ts, + self.indent_level(ts), + other.indent_level(ts)) ) + return a + + # Return True iff self.indent_level(t) < other.indent_level(t) + # for all t >= 1. + # The algorithm is due to Vincent Broman. + # Easy to prove it's correct. + # XXXpost that. + # Trivial to prove n is sharp (consider T vs ST). + # Unknown whether there's a faster general way. I suspected so at + # first, but no longer. + # For the special (but common!) case where M and N are both of the + # form (T*)(S*), M.less(N) iff M.len() < N.len() and + # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded. + # XXXwrite that up. + # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1. + def less(self, other): + if self.n >= other.n: + return False + if self.is_simple and other.is_simple: + return self.nt <= other.nt + n = max(self.longest_run_of_spaces(), + other.longest_run_of_spaces()) + 1 + # the self.n >= other.n test already did it for ts=1 + for ts in range(2, n+1): + if self.indent_level(ts) >= other.indent_level(ts): + return False + return True + + # return a list of tuples (ts, i1, i2) such that + # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2. + # Intended to be used after not self.less(other) is known, in which + # case it will return at least one witnessing tab size. + def not_less_witness(self, other): + n = max(self.longest_run_of_spaces(), + other.longest_run_of_spaces()) + 1 + a = [] + for ts in range(1, n+1): + if self.indent_level(ts) >= other.indent_level(ts): + a.append( (ts, + self.indent_level(ts), + other.indent_level(ts)) ) + return a + +def format_witnesses(w): + firsts = (str(tup[0]) for tup in w) + prefix = "at tab size" + if len(w) > 1: + prefix = prefix + "s" + return prefix + " " + ', '.join(firsts) + +def process_tokens(tokens): + INDENT = tokenize.INDENT + DEDENT = tokenize.DEDENT + NEWLINE = tokenize.NEWLINE + JUNK = tokenize.COMMENT, tokenize.NL + indents = [Whitespace("")] + check_equal = 0 + + for (type, token, start, end, line) in tokens: + if type == NEWLINE: + # a program statement, or ENDMARKER, will eventually follow, + # after some (possibly empty) run of tokens of the form + # (NL | COMMENT)* (INDENT | DEDENT+)? + # If an INDENT appears, setting check_equal is wrong, and will + # be undone when we see the INDENT. + check_equal = 1 + + elif type == INDENT: + check_equal = 0 + thisguy = Whitespace(token) + if not indents[-1].less(thisguy): + witness = indents[-1].not_less_witness(thisguy) + msg = "indent not greater e.g. " + format_witnesses(witness) + raise NannyNag(start[0], msg, line) + indents.append(thisguy) + + elif type == DEDENT: + # there's nothing we need to check here! what's important is + # that when the run of DEDENTs ends, the indentation of the + # program statement (or ENDMARKER) that triggered the run is + # equal to what's left at the top of the indents stack + + # Ouch! This assert triggers if the last line of the source + # is indented *and* lacks a newline -- then DEDENTs pop out + # of thin air. + # assert check_equal # else no earlier NEWLINE, or an earlier INDENT + check_equal = 1 + + del indents[-1] + + elif check_equal and type not in JUNK: + # this is the first "real token" following a NEWLINE, so it + # must be the first token of the next program statement, or an + # ENDMARKER; the "line" argument exposes the leading whitespace + # for this statement; in the case of ENDMARKER, line is an empty + # string, so will properly match the empty string with which the + # "indents" stack was seeded + check_equal = 0 + thisguy = Whitespace(line) + if not indents[-1].equal(thisguy): + witness = indents[-1].not_equal_witness(thisguy) + msg = "indent not equal e.g. " + format_witnesses(witness) + raise NannyNag(start[0], msg, line) + + +if __name__ == '__main__': + main() diff --git a/tests/bytecode/pylib-tests/tempfile.py b/tests/bytecode/pylib-tests/tempfile.py new file mode 100644 index 0000000000..4aad7b5d42 --- /dev/null +++ b/tests/bytecode/pylib-tests/tempfile.py @@ -0,0 +1,709 @@ +"""Temporary files. + +This module provides generic, low- and high-level interfaces for +creating temporary files and directories. The interfaces listed +as "safe" just below can be used without fear of race conditions. +Those listed as "unsafe" cannot, and are provided for backward +compatibility only. + +This module also provides some data items to the user: + + TMP_MAX - maximum number of names that will be tried before + giving up. + tempdir - If this is set to a string before the first use of + any routine from this module, it will be considered as + another candidate location to store temporary files. +""" + +__all__ = [ + "NamedTemporaryFile", "TemporaryFile", # high level safe interfaces + "SpooledTemporaryFile", "TemporaryDirectory", + "mkstemp", "mkdtemp", # low level safe interfaces + "mktemp", # deprecated unsafe interface + "TMP_MAX", "gettempprefix", # constants + "tempdir", "gettempdir" + ] + + +# Imports. + +import warnings as _warnings +import sys as _sys +import io as _io +import os as _os +import errno as _errno +from random import Random as _Random + +try: + import fcntl as _fcntl +except ImportError: + def _set_cloexec(fd): + pass +else: + def _set_cloexec(fd): + try: + flags = _fcntl.fcntl(fd, _fcntl.F_GETFD, 0) + except OSError: + pass + else: + # flags read successfully, modify + flags |= _fcntl.FD_CLOEXEC + _fcntl.fcntl(fd, _fcntl.F_SETFD, flags) + + +try: + import _thread +except ImportError: + import _dummy_thread as _thread +_allocate_lock = _thread.allocate_lock + +_text_openflags = _os.O_RDWR | _os.O_CREAT | _os.O_EXCL +if hasattr(_os, 'O_NOINHERIT'): + _text_openflags |= _os.O_NOINHERIT +if hasattr(_os, 'O_NOFOLLOW'): + _text_openflags |= _os.O_NOFOLLOW + +_bin_openflags = _text_openflags +if hasattr(_os, 'O_BINARY'): + _bin_openflags |= _os.O_BINARY + +if hasattr(_os, 'TMP_MAX'): + TMP_MAX = _os.TMP_MAX +else: + TMP_MAX = 10000 + +# Although it does not have an underscore for historical reasons, this +# variable is an internal implementation detail (see issue 10354). +template = "tmp" + +# Internal routines. + +_once_lock = _allocate_lock() + +if hasattr(_os, "lstat"): + _stat = _os.lstat +elif hasattr(_os, "stat"): + _stat = _os.stat +else: + # Fallback. All we need is something that raises OSError if the + # file doesn't exist. + def _stat(fn): + f = open(fn) + f.close() + +def _exists(fn): + try: + _stat(fn) + except OSError: + return False + else: + return True + +class _RandomNameSequence: + """An instance of _RandomNameSequence generates an endless + sequence of unpredictable strings which can safely be incorporated + into file names. Each string is six characters long. Multiple + threads can safely use the same instance at the same time. + + _RandomNameSequence is an iterator.""" + + characters = "abcdefghijklmnopqrstuvwxyz0123456789_" + + @property + def rng(self): + cur_pid = _os.getpid() + if cur_pid != getattr(self, '_rng_pid', None): + self._rng = _Random() + self._rng_pid = cur_pid + return self._rng + + def __iter__(self): + return self + + def __next__(self): + c = self.characters + choose = self.rng.choice + letters = [choose(c) for dummy in "123456"] + return ''.join(letters) + +def _candidate_tempdir_list(): + """Generate a list of candidate temporary directories which + _get_default_tempdir will try.""" + + dirlist = [] + + # First, try the environment. + for envname in 'TMPDIR', 'TEMP', 'TMP': + dirname = _os.getenv(envname) + if dirname: dirlist.append(dirname) + + # Failing that, try OS-specific locations. + if _os.name == 'nt': + dirlist.extend([ r'c:\temp', r'c:\tmp', r'\temp', r'\tmp' ]) + else: + dirlist.extend([ '/tmp', '/var/tmp', '/usr/tmp' ]) + + # As a last resort, the current directory. + try: + dirlist.append(_os.getcwd()) + except (AttributeError, OSError): + dirlist.append(_os.curdir) + + return dirlist + +def _get_default_tempdir(): + """Calculate the default directory to use for temporary files. + This routine should be called exactly once. + + We determine whether or not a candidate temp dir is usable by + trying to create and write to a file in that directory. If this + is successful, the test file is deleted. To prevent denial of + service, the name of the test file must be randomized.""" + + namer = _RandomNameSequence() + dirlist = _candidate_tempdir_list() + + for dir in dirlist: + if dir != _os.curdir: + dir = _os.path.normcase(_os.path.abspath(dir)) + # Try only a few names per directory. + for seq in range(100): + name = next(namer) + filename = _os.path.join(dir, name) + try: + fd = _os.open(filename, _bin_openflags, 0o600) + try: + try: + with _io.open(fd, 'wb', closefd=False) as fp: + fp.write(b'blat') + finally: + _os.close(fd) + finally: + _os.unlink(filename) + return dir + except FileExistsError: + pass + except OSError: + break # no point trying more names in this directory + raise FileNotFoundError(_errno.ENOENT, + "No usable temporary directory found in %s" % + dirlist) + +_name_sequence = None + +def _get_candidate_names(): + """Common setup sequence for all user-callable interfaces.""" + + global _name_sequence + if _name_sequence is None: + _once_lock.acquire() + try: + if _name_sequence is None: + _name_sequence = _RandomNameSequence() + finally: + _once_lock.release() + return _name_sequence + + +def _mkstemp_inner(dir, pre, suf, flags): + """Code common to mkstemp, TemporaryFile, and NamedTemporaryFile.""" + + names = _get_candidate_names() + + for seq in range(TMP_MAX): + name = next(names) + file = _os.path.join(dir, pre + name + suf) + try: + fd = _os.open(file, flags, 0o600) + _set_cloexec(fd) + return (fd, _os.path.abspath(file)) + except FileExistsError: + continue # try again + + raise FileExistsError(_errno.EEXIST, + "No usable temporary file name found") + + +# User visible interfaces. + +def gettempprefix(): + """Accessor for tempdir.template.""" + return template + +tempdir = None + +def gettempdir(): + """Accessor for tempfile.tempdir.""" + global tempdir + if tempdir is None: + _once_lock.acquire() + try: + if tempdir is None: + tempdir = _get_default_tempdir() + finally: + _once_lock.release() + return tempdir + +def mkstemp(suffix="", prefix=template, dir=None, text=False): + """User-callable function to create and return a unique temporary + file. The return value is a pair (fd, name) where fd is the + file descriptor returned by os.open, and name is the filename. + + If 'suffix' is specified, the file name will end with that suffix, + otherwise there will be no suffix. + + If 'prefix' is specified, the file name will begin with that prefix, + otherwise a default prefix is used. + + If 'dir' is specified, the file will be created in that directory, + otherwise a default directory is used. + + If 'text' is specified and true, the file is opened in text + mode. Else (the default) the file is opened in binary mode. On + some operating systems, this makes no difference. + + The file is readable and writable only by the creating user ID. + If the operating system uses permission bits to indicate whether a + file is executable, the file is executable by no one. The file + descriptor is not inherited by children of this process. + + Caller is responsible for deleting the file when done with it. + """ + + if dir is None: + dir = gettempdir() + + if text: + flags = _text_openflags + else: + flags = _bin_openflags + + return _mkstemp_inner(dir, prefix, suffix, flags) + + +def mkdtemp(suffix="", prefix=template, dir=None): + """User-callable function to create and return a unique temporary + directory. The return value is the pathname of the directory. + + Arguments are as for mkstemp, except that the 'text' argument is + not accepted. + + The directory is readable, writable, and searchable only by the + creating user. + + Caller is responsible for deleting the directory when done with it. + """ + + if dir is None: + dir = gettempdir() + + names = _get_candidate_names() + + for seq in range(TMP_MAX): + name = next(names) + file = _os.path.join(dir, prefix + name + suffix) + try: + _os.mkdir(file, 0o700) + return file + except FileExistsError: + continue # try again + + raise FileExistsError(_errno.EEXIST, + "No usable temporary directory name found") + +def mktemp(suffix="", prefix=template, dir=None): + """User-callable function to return a unique temporary file name. The + file is not created. + + Arguments are as for mkstemp, except that the 'text' argument is + not accepted. + + This function is unsafe and should not be used. The file name + refers to a file that did not exist at some point, but by the time + you get around to creating it, someone else may have beaten you to + the punch. + """ + +## from warnings import warn as _warn +## _warn("mktemp is a potential security risk to your program", +## RuntimeWarning, stacklevel=2) + + if dir is None: + dir = gettempdir() + + names = _get_candidate_names() + for seq in range(TMP_MAX): + name = next(names) + file = _os.path.join(dir, prefix + name + suffix) + if not _exists(file): + return file + + raise FileExistsError(_errno.EEXIST, + "No usable temporary filename found") + + +class _TemporaryFileWrapper: + """Temporary file wrapper + + This class provides a wrapper around files opened for + temporary use. In particular, it seeks to automatically + remove the file when it is no longer needed. + """ + + def __init__(self, file, name, delete=True): + self.file = file + self.name = name + self.close_called = False + self.delete = delete + + def __getattr__(self, name): + # Attribute lookups are delegated to the underlying file + # and cached for non-numeric results + # (i.e. methods are cached, closed and friends are not) + file = self.__dict__['file'] + a = getattr(file, name) + if not isinstance(a, int): + setattr(self, name, a) + return a + + # The underlying __enter__ method returns the wrong object + # (self.file) so override it to return the wrapper + def __enter__(self): + self.file.__enter__() + return self + + # iter() doesn't use __getattr__ to find the __iter__ method + def __iter__(self): + return iter(self.file) + + # NT provides delete-on-close as a primitive, so we don't need + # the wrapper to do anything special. We still use it so that + # file.name is useful (i.e. not "(fdopen)") with NamedTemporaryFile. + if _os.name != 'nt': + # Cache the unlinker so we don't get spurious errors at + # shutdown when the module-level "os" is None'd out. Note + # that this must be referenced as self.unlink, because the + # name TemporaryFileWrapper may also get None'd out before + # __del__ is called. + unlink = _os.unlink + + def close(self): + if not self.close_called: + self.close_called = True + self.file.close() + if self.delete: + self.unlink(self.name) + + def __del__(self): + self.close() + + # Need to trap __exit__ as well to ensure the file gets + # deleted when used in a with statement + def __exit__(self, exc, value, tb): + result = self.file.__exit__(exc, value, tb) + self.close() + return result + else: + def __exit__(self, exc, value, tb): + self.file.__exit__(exc, value, tb) + + +def NamedTemporaryFile(mode='w+b', buffering=-1, encoding=None, + newline=None, suffix="", prefix=template, + dir=None, delete=True): + """Create and return a temporary file. + Arguments: + 'prefix', 'suffix', 'dir' -- as for mkstemp. + 'mode' -- the mode argument to io.open (default "w+b"). + 'buffering' -- the buffer size argument to io.open (default -1). + 'encoding' -- the encoding argument to io.open (default None) + 'newline' -- the newline argument to io.open (default None) + 'delete' -- whether the file is deleted on close (default True). + The file is created as mkstemp() would do it. + + Returns an object with a file-like interface; the name of the file + is accessible as file.name. The file will be automatically deleted + when it is closed unless the 'delete' argument is set to False. + """ + + if dir is None: + dir = gettempdir() + + flags = _bin_openflags + + # Setting O_TEMPORARY in the flags causes the OS to delete + # the file when it is closed. This is only supported by Windows. + if _os.name == 'nt' and delete: + flags |= _os.O_TEMPORARY + + (fd, name) = _mkstemp_inner(dir, prefix, suffix, flags) + file = _io.open(fd, mode, buffering=buffering, + newline=newline, encoding=encoding) + + return _TemporaryFileWrapper(file, name, delete) + +if _os.name != 'posix' or _os.sys.platform == 'cygwin': + # On non-POSIX and Cygwin systems, assume that we cannot unlink a file + # while it is open. + TemporaryFile = NamedTemporaryFile + +else: + def TemporaryFile(mode='w+b', buffering=-1, encoding=None, + newline=None, suffix="", prefix=template, + dir=None): + """Create and return a temporary file. + Arguments: + 'prefix', 'suffix', 'dir' -- as for mkstemp. + 'mode' -- the mode argument to io.open (default "w+b"). + 'buffering' -- the buffer size argument to io.open (default -1). + 'encoding' -- the encoding argument to io.open (default None) + 'newline' -- the newline argument to io.open (default None) + The file is created as mkstemp() would do it. + + Returns an object with a file-like interface. The file has no + name, and will cease to exist when it is closed. + """ + + if dir is None: + dir = gettempdir() + + flags = _bin_openflags + + (fd, name) = _mkstemp_inner(dir, prefix, suffix, flags) + try: + _os.unlink(name) + return _io.open(fd, mode, buffering=buffering, + newline=newline, encoding=encoding) + except: + _os.close(fd) + raise + +class SpooledTemporaryFile: + """Temporary file wrapper, specialized to switch from BytesIO + or StringIO to a real file when it exceeds a certain size or + when a fileno is needed. + """ + _rolled = False + + def __init__(self, max_size=0, mode='w+b', buffering=-1, + encoding=None, newline=None, + suffix="", prefix=template, dir=None): + if 'b' in mode: + self._file = _io.BytesIO() + else: + # Setting newline="\n" avoids newline translation; + # this is important because otherwise on Windows we'd + # hget double newline translation upon rollover(). + self._file = _io.StringIO(newline="\n") + self._max_size = max_size + self._rolled = False + self._TemporaryFileArgs = {'mode': mode, 'buffering': buffering, + 'suffix': suffix, 'prefix': prefix, + 'encoding': encoding, 'newline': newline, + 'dir': dir} + + def _check(self, file): + if self._rolled: return + max_size = self._max_size + if max_size and file.tell() > max_size: + self.rollover() + + def rollover(self): + if self._rolled: return + file = self._file + newfile = self._file = TemporaryFile(**self._TemporaryFileArgs) + del self._TemporaryFileArgs + + newfile.write(file.getvalue()) + newfile.seek(file.tell(), 0) + + self._rolled = True + + # The method caching trick from NamedTemporaryFile + # won't work here, because _file may change from a + # _StringIO instance to a real file. So we list + # all the methods directly. + + # Context management protocol + def __enter__(self): + if self._file.closed: + raise ValueError("Cannot enter context with closed file") + return self + + def __exit__(self, exc, value, tb): + self._file.close() + + # file protocol + def __iter__(self): + return self._file.__iter__() + + def close(self): + self._file.close() + + @property + def closed(self): + return self._file.closed + + @property + def encoding(self): + try: + return self._file.encoding + except AttributeError: + if 'b' in self._TemporaryFileArgs['mode']: + raise + return self._TemporaryFileArgs['encoding'] + + def fileno(self): + self.rollover() + return self._file.fileno() + + def flush(self): + self._file.flush() + + def isatty(self): + return self._file.isatty() + + @property + def mode(self): + try: + return self._file.mode + except AttributeError: + return self._TemporaryFileArgs['mode'] + + @property + def name(self): + try: + return self._file.name + except AttributeError: + return None + + @property + def newlines(self): + try: + return self._file.newlines + except AttributeError: + if 'b' in self._TemporaryFileArgs['mode']: + raise + return self._TemporaryFileArgs['newline'] + + def read(self, *args): + return self._file.read(*args) + + def readline(self, *args): + return self._file.readline(*args) + + def readlines(self, *args): + return self._file.readlines(*args) + + def seek(self, *args): + self._file.seek(*args) + + @property + def softspace(self): + return self._file.softspace + + def tell(self): + return self._file.tell() + + def truncate(self, size=None): + if size is None: + self._file.truncate() + else: + if size > self._max_size: + self.rollover() + self._file.truncate(size) + + def write(self, s): + file = self._file + rv = file.write(s) + self._check(file) + return rv + + def writelines(self, iterable): + file = self._file + rv = file.writelines(iterable) + self._check(file) + return rv + + +class TemporaryDirectory(object): + """Create and return a temporary directory. This has the same + behavior as mkdtemp but can be used as a context manager. For + example: + + with TemporaryDirectory() as tmpdir: + ... + + Upon exiting the context, the directory and everthing contained + in it are removed. + """ + + def __init__(self, suffix="", prefix=template, dir=None): + self._closed = False + self.name = None # Handle mkdtemp raising an exception + self.name = mkdtemp(suffix, prefix, dir) + + def __repr__(self): + return "<{} {!r}>".format(self.__class__.__name__, self.name) + + def __enter__(self): + return self.name + + def cleanup(self, _warn=False): + if self.name and not self._closed: + try: + self._rmtree(self.name) + except (TypeError, AttributeError) as ex: + # Issue #10188: Emit a warning on stderr + # if the directory could not be cleaned + # up due to missing globals + if "None" not in str(ex): + raise + print("ERROR: {!r} while cleaning up {!r}".format(ex, self,), + file=_sys.stderr) + return + self._closed = True + if _warn: + self._warn("Implicitly cleaning up {!r}".format(self), + ResourceWarning) + + def __exit__(self, exc, value, tb): + self.cleanup() + + def __del__(self): + # Issue a ResourceWarning if implicit cleanup needed + self.cleanup(_warn=True) + + # XXX (ncoghlan): The following code attempts to make + # this class tolerant of the module nulling out process + # that happens during CPython interpreter shutdown + # Alas, it doesn't actually manage it. See issue #10188 + _listdir = staticmethod(_os.listdir) + _path_join = staticmethod(_os.path.join) + _isdir = staticmethod(_os.path.isdir) + _islink = staticmethod(_os.path.islink) + _remove = staticmethod(_os.remove) + _rmdir = staticmethod(_os.rmdir) + _os_error = OSError + _warn = _warnings.warn + + def _rmtree(self, path): + # Essentially a stripped down version of shutil.rmtree. We can't + # use globals because they may be None'ed out at shutdown. + for name in self._listdir(path): + fullname = self._path_join(path, name) + try: + isdir = self._isdir(fullname) and not self._islink(fullname) + except self._os_error: + isdir = False + if isdir: + self._rmtree(fullname) + else: + try: + self._remove(fullname) + except self._os_error: + pass + try: + self._rmdir(path) + except self._os_error: + pass diff --git a/tests/bytecode/pylib-tests/this.py b/tests/bytecode/pylib-tests/this.py new file mode 100644 index 0000000000..b188c6e0b0 --- /dev/null +++ b/tests/bytecode/pylib-tests/this.py @@ -0,0 +1,28 @@ +s = """Gur Mra bs Clguba, ol Gvz Crgref + +Ornhgvshy vf orggre guna htyl. +Rkcyvpvg vf orggre guna vzcyvpvg. +Fvzcyr vf orggre guna pbzcyrk. +Pbzcyrk vf orggre guna pbzcyvpngrq. +Syng vf orggre guna arfgrq. +Fcnefr vf orggre guna qrafr. +Ernqnovyvgl pbhagf. +Fcrpvny pnfrf nera'g fcrpvny rabhtu gb oernx gur ehyrf. +Nygubhtu cenpgvpnyvgl orngf chevgl. +Reebef fubhyq arire cnff fvyragyl. +Hayrff rkcyvpvgyl fvyraprq. +Va gur snpr bs nzovthvgl, ershfr gur grzcgngvba gb thrff. +Gurer fubhyq or bar-- naq cersrenoyl bayl bar --boivbhf jnl gb qb vg. +Nygubhtu gung jnl znl abg or boivbhf ng svefg hayrff lbh'er Qhgpu. +Abj vf orggre guna arire. +Nygubhtu arire vf bsgra orggre guna *evtug* abj. +Vs gur vzcyrzragngvba vf uneq gb rkcynva, vg'f n onq vqrn. +Vs gur vzcyrzragngvba vf rnfl gb rkcynva, vg znl or n tbbq vqrn. +Anzrfcnprf ner bar ubaxvat terng vqrn -- yrg'f qb zber bs gubfr!""" + +d = {} +for c in (x+65, 97): + for i in range(26): + d[chr(i+c)] = chr((i+13) % 26 + c) + +print("".join([d.get(c, c) for c in s])) diff --git a/tests/bytecode/pylib-tests/timeit.py b/tests/bytecode/pylib-tests/timeit.py new file mode 100644 index 0000000000..6d0eaf8120 --- /dev/null +++ b/tests/bytecode/pylib-tests/timeit.py @@ -0,0 +1,334 @@ +#! /usr/bin/env python3 + +"""Tool for measuring execution time of small code snippets. + +This module avoids a number of common traps for measuring execution +times. See also Tim Peters' introduction to the Algorithms chapter in +the Python Cookbook, published by O'Reilly. + +Library usage: see the Timer class. + +Command line usage: + python timeit.py [-n N] [-r N] [-s S] [-t] [-c] [-p] [-h] [--] [statement] + +Options: + -n/--number N: how many times to execute 'statement' (default: see below) + -r/--repeat N: how many times to repeat the timer (default 3) + -s/--setup S: statement to be executed once initially (default 'pass') + -p/--process: use time.process_time() (default is time.perf_counter()) + -t/--time: use time.time() (deprecated) + -c/--clock: use time.clock() (deprecated) + -v/--verbose: print raw timing results; repeat for more digits precision + -h/--help: print this usage message and exit + --: separate options from statement, use when statement starts with - + statement: statement to be timed (default 'pass') + +A multi-line statement may be given by specifying each line as a +separate argument; indented lines are possible by enclosing an +argument in quotes and using leading spaces. Multiple -s options are +treated similarly. + +If -n is not given, a suitable number of loops is calculated by trying +successive powers of 10 until the total time is at least 0.2 seconds. + +The difference in default timer function is because on Windows, +clock() has microsecond granularity but time()'s granularity is 1/60th +of a second; on Unix, clock() has 1/100th of a second granularity and +time() is much more precise. On either platform, the default timer +functions measure wall clock time, not the CPU time. This means that +other processes running on the same computer may interfere with the +timing. The best thing to do when accurate timing is necessary is to +repeat the timing a few times and use the best time. The -r option is +good for this; the default of 3 repetitions is probably enough in most +cases. On Unix, you can use clock() to measure CPU time. + +Note: there is a certain baseline overhead associated with executing a +pass statement. The code here doesn't try to hide it, but you should +be aware of it. The baseline overhead can be measured by invoking the +program without arguments. + +The baseline overhead differs between Python versions! Also, to +fairly compare older Python versions to Python 2.3, you may want to +use python -O for the older versions to avoid timing SET_LINENO +instructions. +""" + +import gc +import sys +import time +try: + import itertools +except ImportError: + # Must be an older Python version (see timeit() below) + itertools = None + +__all__ = ["Timer"] + +dummy_src_name = "<timeit-src>" +default_number = 1000000 +default_repeat = 3 +default_timer = time.perf_counter + +# Don't change the indentation of the template; the reindent() calls +# in Timer.__init__() depend on setup being indented 4 spaces and stmt +# being indented 8 spaces. +template = """ +def inner(_it, _timer): + {setup} + _t0 = _timer() + for _i in _it: + {stmt} + _t1 = _timer() + return _t1 - _t0 +""" + +def reindent(src, indent): + """Helper to reindent a multi-line statement.""" + return src.replace("\n", "\n" + " "*indent) + +def _template_func(setup, func): + """Create a timer function. Used if the "statement" is a callable.""" + def inner(_it, _timer, _func=func): + setup() + _t0 = _timer() + for _i in _it: + _func() + _t1 = _timer() + return _t1 - _t0 + return inner + +class Timer: + """Class for timing execution speed of small code snippets. + + The constructor takes a statement to be timed, an additional + statement used for setup, and a timer function. Both statements + default to 'pass'; the timer function is platform-dependent (see + module doc string). + + To measure the execution time of the first statement, use the + timeit() method. The repeat() method is a convenience to call + timeit() multiple times and return a list of results. + + The statements may contain newlines, as long as they don't contain + multi-line string literals. + """ + + def __init__(self, stmt="pass", setup="pass", timer=default_timer): + """Constructor. See class doc string.""" + self.timer = timer + ns = {} + if isinstance(stmt, str): + stmt = reindent(stmt, 8) + if isinstance(setup, str): + setup = reindent(setup, 4) + src = template.format(stmt=stmt, setup=setup) + elif callable(setup): + src = template.format(stmt=stmt, setup='_setup()') + ns['_setup'] = setup + else: + raise ValueError("setup is neither a string nor callable") + self.src = src # Save for traceback display + code = compile(src, dummy_src_name, "exec") + exec(code, globals(), ns) + self.inner = ns["inner"] + elif callable(stmt): + self.src = None + if isinstance(setup, str): + _setup = setup + def setup(): + exec(_setup, globals(), ns) + elif not callable(setup): + raise ValueError("setup is neither a string nor callable") + self.inner = _template_func(setup, stmt) + else: + raise ValueError("stmt is neither a string nor callable") + + def print_exc(self, file=None): + """Helper to print a traceback from the timed code. + + Typical use: + + t = Timer(...) # outside the try/except + try: + t.timeit(...) # or t.repeat(...) + except: + t.print_exc() + + The advantage over the standard traceback is that source lines + in the compiled template will be displayed. + + The optional file argument directs where the traceback is + sent; it defaults to sys.stderr. + """ + import linecache, traceback + if self.src is not None: + linecache.cache[dummy_src_name] = (len(self.src), + None, + self.src.split("\n"), + dummy_src_name) + # else the source is already stored somewhere else + + traceback.print_exc(file=file) + + def timeit(self, number=default_number): + """Time 'number' executions of the main statement. + + To be precise, this executes the setup statement once, and + then returns the time it takes to execute the main statement + a number of times, as a float measured in seconds. The + argument is the number of times through the loop, defaulting + to one million. The main statement, the setup statement and + the timer function to be used are passed to the constructor. + """ + if itertools: + it = itertools.repeat(None, number) + else: + it = [None] * number + gcold = gc.isenabled() + gc.disable() + try: + timing = self.inner(it, self.timer) + finally: + if gcold: + gc.enable() + return timing + + def repeat(self, repeat=default_repeat, number=default_number): + """Call timeit() a few times. + + This is a convenience function that calls the timeit() + repeatedly, returning a list of results. The first argument + specifies how many times to call timeit(), defaulting to 3; + the second argument specifies the timer argument, defaulting + to one million. + + Note: it's tempting to calculate mean and standard deviation + from the result vector and report these. However, this is not + very useful. In a typical case, the lowest value gives a + lower bound for how fast your machine can run the given code + snippet; higher values in the result vector are typically not + caused by variability in Python's speed, but by other + processes interfering with your timing accuracy. So the min() + of the result is probably the only number you should be + interested in. After that, you should look at the entire + vector and apply common sense rather than statistics. + """ + r = [] + for i in range(repeat): + t = self.timeit(number) + r.append(t) + return r + +def timeit(stmt="pass", setup="pass", timer=default_timer, + number=default_number): + """Convenience function to create Timer object and call timeit method.""" + return Timer(stmt, setup, timer).timeit(number) + +def repeat(stmt="pass", setup="pass", timer=default_timer, + repeat=default_repeat, number=default_number): + """Convenience function to create Timer object and call repeat method.""" + return Timer(stmt, setup, timer).repeat(repeat, number) + +def main(args=None, *, _wrap_timer=None): + """Main program, used when run as a script. + + The optional 'args' argument specifies the command line to be parsed, + defaulting to sys.argv[1:]. + + The return value is an exit code to be passed to sys.exit(); it + may be None to indicate success. + + When an exception happens during timing, a traceback is printed to + stderr and the return value is 1. Exceptions at other times + (including the template compilation) are not caught. + + '_wrap_timer' is an internal interface used for unit testing. If it + is not None, it must be a callable that accepts a timer function + and returns another timer function (used for unit testing). + """ + if args is None: + args = sys.argv[1:] + import getopt + try: + opts, args = getopt.getopt(args, "n:s:r:tcpvh", + ["number=", "setup=", "repeat=", + "time", "clock", "process", + "verbose", "help"]) + except getopt.error as err: + print(err) + print("use -h/--help for command line help") + return 2 + timer = default_timer + stmt = "\n".join(args) or "pass" + number = 0 # auto-determine + setup = [] + repeat = default_repeat + verbose = 0 + precision = 3 + for o, a in opts: + if o in (0+"-n", "--number"): + number = int(a) + if o in (0+"-s", "--setup"): + setup.append(a) + if o in (0+"-r", "--repeat"): + repeat = int(a) + if repeat <= 0: + repeat = 1 + if o in (0+"-t", "--time"): + timer = time.time + if o in (0+"-c", "--clock"): + timer = time.clock + if o in (0+"-p", "--process"): + timer = time.process_time + if o in (0+"-v", "--verbose"): + if verbose: + precision += 1 + verbose += 1 + if o in (0+"-h", "--help"): + print(__doc__, end=' ') + return 0 + setup = "\n".join(setup) or "pass" + # Include the current directory, so that local imports work (sys.path + # contains the directory of this script, rather than the current + # directory) + import os + sys.path.insert(0, os.curdir) + if _wrap_timer is not None: + timer = _wrap_timer(timer) + t = Timer(stmt, setup, timer) + if number == 0: + # determine number so that 0.2 <= total time < 2.0 + for i in range(1, 10): + number = 10**i + try: + x = t.timeit(number) + except: + t.print_exc() + return 1 + if verbose: + print("%d loops -> %.*g secs" % (number, precision, x)) + if x >= 0.2: + break + try: + r = t.repeat(repeat, number) + except: + t.print_exc() + return 1 + best = min(r) + if verbose: + print("raw times:", " ".join(["%.*g" % (precision, x) for x in r])) + print("%d loops," % number, end=' ') + usec = best * 1e6 / number + if usec < 1000: + print("best of %d: %.*g usec per loop" % (repeat, precision, usec)) + else: + msec = usec / 1000 + if msec < 1000: + print("best of %d: %.*g msec per loop" % (repeat, precision, msec)) + else: + sec = msec / 1000 + print("best of %d: %.*g sec per loop" % (repeat, precision, sec)) + return None + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/bytecode/pylib-tests/tty.py b/tests/bytecode/pylib-tests/tty.py new file mode 100644 index 0000000000..a72eb67554 --- /dev/null +++ b/tests/bytecode/pylib-tests/tty.py @@ -0,0 +1,36 @@ +"""Terminal utilities.""" + +# Author: Steen Lumholt. + +from termios import * + +__all__ = ["setraw", "setcbreak"] + +# Indexes for termios list. +IFLAG = 0 +OFLAG = 1 +CFLAG = 2 +LFLAG = 3 +ISPEED = 4 +OSPEED = 5 +CC = 6 + +def setraw(fd, when=TCSAFLUSH): + """Put terminal into a raw mode.""" + mode = tcgetattr(fd) + mode[IFLAG] = mode[IFLAG] & ~(BRKINT | ICRNL | INPCK | ISTRIP | IXON) + mode[OFLAG] = mode[OFLAG] & ~(OPOST) + mode[CFLAG] = mode[CFLAG] & ~(CSIZE | PARENB) + mode[CFLAG] = mode[CFLAG] | CS8 + mode[LFLAG] = mode[LFLAG] & ~(ECHO | ICANON | IEXTEN | ISIG) + mode[CC][VMIN] = 1 + mode[CC][VTIME] = 0 + tcsetattr(fd, when, mode) + +def setcbreak(fd, when=TCSAFLUSH): + """Put terminal into a cbreak mode.""" + mode = tcgetattr(fd) + mode[LFLAG] = mode[LFLAG] & ~(ECHO | ICANON) + mode[CC][VMIN] = 1 + mode[CC][VTIME] = 0 + tcsetattr(fd, when, mode) diff --git a/tests/bytecode/pylib-tests/types.py b/tests/bytecode/pylib-tests/types.py new file mode 100644 index 0000000000..cfd09eaaff --- /dev/null +++ b/tests/bytecode/pylib-tests/types.py @@ -0,0 +1,101 @@ +""" +Define names for built-in types that aren't directly accessible as a builtin. +""" +import sys + +# Iterators in Python aren't a matter of type but of protocol. A large +# and changing number of builtin types implement *some* flavor of +# iterator. Don't check the type! Use hasattr to check for both +# "__iter__" and "__next__" attributes instead. + +def _f(): pass +FunctionType = type(_f) +LambdaType = type(lambda: None) # Same as FunctionType +CodeType = type(_f.__code__) +MappingProxyType = type(type.__dict__) +SimpleNamespace = type(sys.implementation) + +def _g(): + yield 1 +GeneratorType = type(_g()) + +class _C: + def _m(self): pass +MethodType = type(_C()._m) + +BuiltinFunctionType = type(len) +BuiltinMethodType = type([].append) # Same as BuiltinFunctionType + +ModuleType = type(sys) + +try: + raise TypeError +except TypeError: + tb = sys.exc_info()[2] + TracebackType = type(tb) + FrameType = type(tb.tb_frame) + tb = None; del tb + +# For Jython, the following two types are identical +GetSetDescriptorType = type(FunctionType.__code__) +MemberDescriptorType = type(FunctionType.__globals__) + +del sys, _f, _g, _C, # Not for export + + +# Provide a PEP 3115 compliant mechanism for class creation +def new_class(name, bases=(), kwds=None, exec_body=None): + """Create a class object dynamically using the appropriate metaclass.""" + meta, ns, kwds = prepare_class(name, bases, kwds) + if exec_body is not None: + exec_body(ns) + return meta(name, bases, ns, **kwds) + +def prepare_class(name, bases=(), kwds=None): + """Call the __prepare__ method of the appropriate metaclass. + + Returns (metaclass, namespace, kwds) as a 3-tuple + + *metaclass* is the appropriate metaclass + *namespace* is the prepared class namespace + *kwds* is an updated copy of the passed in kwds argument with any + 'metaclass' entry removed. If no kwds argument is passed in, this will + be an empty dict. + """ + if kwds is None: + kwds = {} + else: + kwds = dict(kwds) # Don't alter the provided mapping + if 'metaclass' in kwds: + meta = kwds.pop('metaclass') + else: + if bases: + meta = type(bases[0]) + else: + meta = type + if isinstance(meta, type): + # when meta is a type, we first determine the most-derived metaclass + # instead of invoking the initial candidate directly + meta = _calculate_meta(meta, bases) + if hasattr(meta, '__prepare__'): + ns = meta.__prepare__(name, bases, **kwds) + else: + ns = {} + return meta, ns, kwds + +def _calculate_meta(meta, bases): + """Calculate the most derived metaclass.""" + winner = meta + for base in bases: + base_meta = type(base) + if issubclass(winner, base_meta): + continue + if issubclass(base_meta, winner): + winner = base_meta + continue + # else: + raise TypeError("metaclass conflict: " + "the metaclass of a derived class " + "must be a (non-strict) subclass " + "of the metaclasses of all its bases") + return winner diff --git a/tests/bytecode/pylib-tests/uu.py b/tests/bytecode/pylib-tests/uu.py new file mode 100644 index 0000000000..d68d29374a --- /dev/null +++ b/tests/bytecode/pylib-tests/uu.py @@ -0,0 +1,199 @@ +#! /usr/bin/env python3 + +# Copyright 1994 by Lance Ellinghouse +# Cathedral City, California Republic, United States of America. +# All Rights Reserved +# Permission to use, copy, modify, and distribute this software and its +# documentation for any purpose and without fee is hereby granted, +# provided that the above copyright notice appear in all copies and that +# both that copyright notice and this permission notice appear in +# supporting documentation, and that the name of Lance Ellinghouse +# not be used in advertising or publicity pertaining to distribution +# of the software without specific, written prior permission. +# LANCE ELLINGHOUSE DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +# FITNESS, IN NO EVENT SHALL LANCE ELLINGHOUSE CENTRUM BE LIABLE +# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# +# Modified by Jack Jansen, CWI, July 1995: +# - Use binascii module to do the actual line-by-line conversion +# between ascii and binary. This results in a 1000-fold speedup. The C +# version is still 5 times faster, though. +# - Arguments more compliant with python standard + +"""Implementation of the UUencode and UUdecode functions. + +encode(in_file, out_file [,name, mode]) +decode(in_file [, out_file, mode]) +""" + +import binascii +import os +import sys + +__all__ = ["Error", "encode", "decode"] + +class Error(Exception): + pass + +def encode(in_file, out_file, name=None, mode=None): + """Uuencode file""" + # + # If in_file is a pathname open it and change defaults + # + opened_files = [] + try: + if in_file == '-': + in_file = sys.stdin.buffer + elif isinstance(in_file, str): + if name is None: + name = os.path.basename(in_file) + if mode is None: + try: + mode = os.stat(in_file).st_mode + except AttributeError: + pass + in_file = open(in_file, 'rb') + opened_files.append(in_file) + # + # Open out_file if it is a pathname + # + if out_file == '-': + out_file = sys.stdout.buffer + elif isinstance(out_file, str): + out_file = open(out_file, 'wb') + opened_files.append(out_file) + # + # Set defaults for name and mode + # + if name is None: + name = '-' + if mode is None: + mode = 0o666 + # + # Write the data + # + out_file.write(('begin %o %s\n' % ((mode & 0o777), name)).encode("ascii")) + data = in_file.read(45) + while len(data) > 0: + out_file.write(binascii.b2a_uu(data)) + data = in_file.read(45) + out_file.write(b' \nend\n') + finally: + for f in opened_files: + f.close() + + +def decode(in_file, out_file=None, mode=None, quiet=False): + """Decode uuencoded file""" + # + # Open the input file, if needed. + # + opened_files = [] + if in_file == '-': + in_file = sys.stdin.buffer + elif isinstance(in_file, str): + in_file = open(in_file, 'rb') + opened_files.append(in_file) + + try: + # + # Read until a begin is encountered or we've exhausted the file + # + while True: + hdr = in_file.readline() + if not hdr: + raise Error('No valid begin line found in input file') + if not hdr.startswith(b'begin'): + continue + hdrfields = hdr.split(b' ', 2) + if len(hdrfields) == 3 and hdrfields[0] == b'begin': + try: + int(hdrfields[1], 8) + break + except ValueError: + pass + if out_file is None: + # If the filename isn't ASCII, what's up with that?!? + out_file = hdrfields[2].rstrip(b' \t\r\n\f').decode("ascii") + if os.path.exists(out_file): + raise Error('Cannot overwrite existing file: %s' % out_file) + if mode is None: + mode = int(hdrfields[1], 8) + # + # Open the output file + # + if out_file == '-': + out_file = sys.stdout.buffer + elif isinstance(out_file, str): + fp = open(out_file, 'wb') + try: + os.path.chmod(out_file, mode) + except AttributeError: + pass + out_file = fp + opened_files.append(out_file) + # + # Main decoding loop + # + s = in_file.readline() + while s and s.strip(b' \t\r\n\f') != b'end': + try: + data = binascii.a2b_uu(s) + except binascii.Error as v: + # Workaround for broken uuencoders by /Fredrik Lundh + nbytes = (((s[0]-32) & 63) * 4 + 5) // 3 + data = binascii.a2b_uu(s[:nbytes]) + if not quiet: + sys.stderr.write("Warning: %s\n" % v) + out_file.write(data) + s = in_file.readline() + if not s: + raise Error('Truncated input file') + finally: + for f in opened_files: + f.close() + +def test(): + """uuencode/uudecode main program""" + + import optparse + parser = optparse.OptionParser(usage='usage: %prog [-d] [-t] [input [output]]') + parser.add_option('-d', '--decode', dest='decode', help='Decode (instead of encode)?', default=False, action='store_true') + parser.add_option('-t', '--text', dest='text', help='data is text, encoded format unix-compatible text?', default=False, action='store_true') + + (options, args) = parser.parse_args() + if len(args) > 2: + parser.error('incorrect number of arguments') + sys.exit(1) + + # Use the binary streams underlying stdin/stdout + input = sys.stdin.buffer + output = sys.stdout.buffer + if len(args) > 0: + input = args[0] + if len(args) > 1: + output = args[1] + + if options.decode: + if options.text: + if isinstance(output, str): + output = open(output, 'wb') + else: + print(sys.argv[0], ': cannot do -t to stdout') + sys.exit(1) + decode(input, output) + else: + if options.text: + if isinstance(input, str): + input = open(input, 'rb') + else: + print(sys.argv[0], ': cannot do -t from stdin') + sys.exit(1) + encode(input, output) + +if __name__ == '__main__': + test() diff --git a/tests/bytecode/pylib-tests/wave.py b/tests/bytecode/pylib-tests/wave.py new file mode 100644 index 0000000000..64a471a963 --- /dev/null +++ b/tests/bytecode/pylib-tests/wave.py @@ -0,0 +1,504 @@ +"""Stuff to parse WAVE files. + +Usage. + +Reading WAVE files: + f = wave.open(file, 'r') +where file is either the name of a file or an open file pointer. +The open file pointer must have methods read(), seek(), and close(). +When the setpos() and rewind() methods are not used, the seek() +method is not necessary. + +This returns an instance of a class with the following public methods: + getnchannels() -- returns number of audio channels (1 for + mono, 2 for stereo) + getsampwidth() -- returns sample width in bytes + getframerate() -- returns sampling frequency + getnframes() -- returns number of audio frames + getcomptype() -- returns compression type ('NONE' for linear samples) + getcompname() -- returns human-readable version of + compression type ('not compressed' linear samples) + getparams() -- returns a tuple consisting of all of the + above in the above order + getmarkers() -- returns None (for compatibility with the + aifc module) + getmark(id) -- raises an error since the mark does not + exist (for compatibility with the aifc module) + readframes(n) -- returns at most n frames of audio + rewind() -- rewind to the beginning of the audio stream + setpos(pos) -- seek to the specified position + tell() -- return the current position + close() -- close the instance (make it unusable) +The position returned by tell() and the position given to setpos() +are compatible and have nothing to do with the actual position in the +file. +The close() method is called automatically when the class instance +is destroyed. + +Writing WAVE files: + f = wave.open(file, 'w') +where file is either the name of a file or an open file pointer. +The open file pointer must have methods write(), tell(), seek(), and +close(). + +This returns an instance of a class with the following public methods: + setnchannels(n) -- set the number of channels + setsampwidth(n) -- set the sample width + setframerate(n) -- set the frame rate + setnframes(n) -- set the number of frames + setcomptype(type, name) + -- set the compression type and the + human-readable compression type + setparams(tuple) + -- set all parameters at once + tell() -- return current position in output file + writeframesraw(data) + -- write audio frames without pathing up the + file header + writeframes(data) + -- write audio frames and patch up the file header + close() -- patch up the file header and close the + output file +You should set the parameters before the first writeframesraw or +writeframes. The total number of frames does not need to be set, +but when it is set to the correct value, the header does not have to +be patched up. +It is best to first set all parameters, perhaps possibly the +compression type, and then write audio frames using writeframesraw. +When all frames have been written, either call writeframes('') or +close() to patch up the sizes in the header. +The close() method is called automatically when the class instance +is destroyed. +""" + +import builtins + +__all__ = ["open", "openfp", "Error"] + +class Error(Exception): + pass + +WAVE_FORMAT_PCM = 0x0001 + +_array_fmts = 0+None, 'b', 'h', None, 'l' + +# Determine endian-ness +import struct +if struct.pack("h", 1) == b"\000\001": + big_endian = 1 +else: + big_endian = 0 + +from chunk import Chunk + +class Wave_read: + """Variables used in this class: + + These variables are available to the user though appropriate + methods of this class: + _file -- the open file with methods read(), close(), and seek() + set through the __init__() method + _nchannels -- the number of audio channels + available through the getnchannels() method + _nframes -- the number of audio frames + available through the getnframes() method + _sampwidth -- the number of bytes per audio sample + available through the getsampwidth() method + _framerate -- the sampling frequency + available through the getframerate() method + _comptype -- the AIFF-C compression type ('NONE' if AIFF) + available through the getcomptype() method + _compname -- the human-readable AIFF-C compression type + available through the getcomptype() method + _soundpos -- the position in the audio stream + available through the tell() method, set through the + setpos() method + + These variables are used internally only: + _fmt_chunk_read -- 1 iff the FMT chunk has been read + _data_seek_needed -- 1 iff positioned correctly in audio + file for readframes() + _data_chunk -- instantiation of a chunk class for the DATA chunk + _framesize -- size of one frame in the file + """ + + def initfp(self, file): + self._convert = None + self._soundpos = 0 + self._file = Chunk(file, bigendian = 0) + if self._file.getname() != b'RIFF': + raise Error('file does not start with RIFF id') + if self._file.read(4) != b'WAVE': + raise Error('not a WAVE file') + self._fmt_chunk_read = 0 + self._data_chunk = None + while 1: + self._data_seek_needed = 1 + try: + chunk = Chunk(self._file, bigendian = 0) + except EOFError: + break + chunkname = chunk.getname() + if chunkname == b'fmt ': + self._read_fmt_chunk(chunk) + self._fmt_chunk_read = 1 + elif chunkname == b'data': + if not self._fmt_chunk_read: + raise Error('data chunk before fmt chunk') + self._data_chunk = chunk + self._nframes = chunk.chunksize // self._framesize + self._data_seek_needed = 0 + break + chunk.skip() + if self._fmt_chunk_read or self._data_chunk: + raise Error('fmt chunk and/or data chunk missing') + + def __init__(self, f): + self._i_opened_the_file = None + if isinstance(f, str): + f = builtins.open(f, 'rb') + self._i_opened_the_file = f + # else, assume it is an open file object already + try: + self.initfp(f) + except: + if self._i_opened_the_file: + f.close() + raise + + def __del__(self): + self.close() + # + # User visible methods. + # + def getfp(self): + return self._file + + def rewind(self): + self._data_seek_needed = 1 + self._soundpos = 0 + + def close(self): + if self._i_opened_the_file: + self._i_opened_the_file.close() + self._i_opened_the_file = None + self._file = None + + def tell(self): + return self._soundpos + + def getnchannels(self): + return self._nchannels + + def getnframes(self): + return self._nframes + + def getsampwidth(self): + return self._sampwidth + + def getframerate(self): + return self._framerate + + def getcomptype(self): + return self._comptype + + def getcompname(self): + return self._compname + + def getparams(self): + return self.getnchannels(), self.getsampwidth(), \ + self.getframerate(), self.getnframes(), \ + self.getcomptype(), self.getcompname() + + def getmarkers(self): + return None + + def getmark(self, id): + raise Error('no marks') + + def setpos(self, pos): + if pos < 0 or pos > self._nframes: + raise Error('position not in range') + self._soundpos = pos + self._data_seek_needed = 1 + + def readframes(self, nframes): + if self._data_seek_needed: + self._data_chunk.seek(0, 0) + pos = self._soundpos * self._framesize + if pos: + self._data_chunk.seek(pos, 0) + self._data_seek_needed = 0 + if nframes == 0: + return b'' + if self._sampwidth > 1 and big_endian: + # unfortunately the fromfile() method does not take + # something that only looks like a file object, so + # we have to reach into the innards of the chunk object + import array + chunk = self._data_chunk + data = array.array(_array_fmts[self._sampwidth]) + nitems = nframes * self._nchannels + if nitems * self._sampwidth > chunk.chunksize - chunk.size_read: + nitems = (chunk.chunksize - chunk.size_read) // self._sampwidth + data.fromfile(chunk.file.file, nitems) + # "tell" data chunk how much was read + chunk.size_read = chunk.size_read + nitems * self._sampwidth + # do the same for the outermost chunk + chunk = chunk.file + chunk.size_read = chunk.size_read + nitems * self._sampwidth + data.byteswap() + data = data.tobytes() + else: + data = self._data_chunk.read(nframes * self._framesize) + if self._convert and data: + data = self._convert(data) + self._soundpos = self._soundpos + len(data) // (self._nchannels * self._sampwidth) + return data + + # + # Internal methods. + # + + def _read_fmt_chunk(self, chunk): + wFormatTag, self._nchannels, self._framerate, dwAvgBytesPerSec, wBlockAlign = struct.unpack_from('<HHLLH', chunk.read(14)) + if wFormatTag == WAVE_FORMAT_PCM: + sampwidth = struct.unpack_from('<H', chunk.read(2))[0] + self._sampwidth = (sampwidth + 7) // 8 + else: + raise Error('unknown format: %r' % (wFormatTag,)) + self._framesize = self._nchannels * self._sampwidth + self._comptype = 'NONE' + self._compname = 'not compressed' + +class Wave_write: + """Variables used in this class: + + These variables are user settable through appropriate methods + of this class: + _file -- the open file with methods write(), close(), tell(), seek() + set through the __init__() method + _comptype -- the AIFF-C compression type ('NONE' in AIFF) + set through the setcomptype() or setparams() method + _compname -- the human-readable AIFF-C compression type + set through the setcomptype() or setparams() method + _nchannels -- the number of audio channels + set through the setnchannels() or setparams() method + _sampwidth -- the number of bytes per audio sample + set through the setsampwidth() or setparams() method + _framerate -- the sampling frequency + set through the setframerate() or setparams() method + _nframes -- the number of audio frames written to the header + set through the setnframes() or setparams() method + + These variables are used internally only: + _datalength -- the size of the audio samples written to the header + _nframeswritten -- the number of frames actually written + _datawritten -- the size of the audio samples actually written + """ + + def __init__(self, f): + self._i_opened_the_file = None + if isinstance(f, str): + f = builtins.open(f, 'wb') + self._i_opened_the_file = f + try: + self.initfp(f) + except: + if self._i_opened_the_file: + f.close() + raise + + def initfp(self, file): + self._file = file + self._convert = None + self._nchannels = 0 + self._sampwidth = 0 + self._framerate = 0 + self._nframes = 0 + self._nframeswritten = 0 + self._datawritten = 0 + self._datalength = 0 + self._headerwritten = False + + def __del__(self): + self.close() + + # + # User visible methods. + # + def setnchannels(self, nchannels): + if self._datawritten: + raise Error('cannot change parameters after starting to write') + if nchannels < 1: + raise Error('bad # of channels') + self._nchannels = nchannels + + def getnchannels(self): + if not self._nchannels: + raise Error('number of channels not set') + return self._nchannels + + def setsampwidth(self, sampwidth): + if self._datawritten: + raise Error('cannot change parameters after starting to write') + if sampwidth < 1 or sampwidth > 4: + raise Error('bad sample width') + self._sampwidth = sampwidth + + def getsampwidth(self): + if not self._sampwidth: + raise Error('sample width not set') + return self._sampwidth + + def setframerate(self, framerate): + if self._datawritten: + raise Error('cannot change parameters after starting to write') + if framerate <= 0: + raise Error('bad frame rate') + self._framerate = int(round(framerate)) + + def getframerate(self): + if not self._framerate: + raise Error('frame rate not set') + return self._framerate + + def setnframes(self, nframes): + if self._datawritten: + raise Error('cannot change parameters after starting to write') + self._nframes = nframes + + def getnframes(self): + return self._nframeswritten + + def setcomptype(self, comptype, compname): + if self._datawritten: + raise Error('cannot change parameters after starting to write') + if comptype not in (0+'NONE',): + raise Error('unsupported compression type') + self._comptype = comptype + self._compname = compname + + def getcomptype(self): + return self._comptype + + def getcompname(self): + return self._compname + + def setparams(self, params): + nchannels, sampwidth, framerate, nframes, comptype, compname = params + if self._datawritten: + raise Error('cannot change parameters after starting to write') + self.setnchannels(nchannels) + self.setsampwidth(sampwidth) + self.setframerate(framerate) + self.setnframes(nframes) + self.setcomptype(comptype, compname) + + def getparams(self): + if self._nchannels or self._sampwidth or self._framerate: + raise Error('not all parameters set') + return self._nchannels, self._sampwidth, self._framerate, \ + self._nframes, self._comptype, self._compname + + def setmark(self, id, pos, name): + raise Error('setmark() not supported') + + def getmark(self, id): + raise Error('no marks') + + def getmarkers(self): + return None + + def tell(self): + return self._nframeswritten + + def writeframesraw(self, data): + self._ensure_header_written(len(data)) + nframes = len(data) // (self._sampwidth * self._nchannels) + if self._convert: + data = self._convert(data) + if self._sampwidth > 1 and big_endian: + import array + data = array.array(_array_fmts[self._sampwidth], data) + data.byteswap() + data.tofile(self._file) + self._datawritten = self._datawritten + len(data) * self._sampwidth + else: + self._file.write(data) + self._datawritten = self._datawritten + len(data) + self._nframeswritten = self._nframeswritten + nframes + + def writeframes(self, data): + self.writeframesraw(data) + if self._datalength != self._datawritten: + self._patchheader() + + def close(self): + if self._file: + self._ensure_header_written(0) + if self._datalength != self._datawritten: + self._patchheader() + self._file.flush() + self._file = None + if self._i_opened_the_file: + self._i_opened_the_file.close() + self._i_opened_the_file = None + + # + # Internal methods. + # + + def _ensure_header_written(self, datasize): + if not self._headerwritten: + if not self._nchannels: + raise Error('# channels not specified') + if not self._sampwidth: + raise Error('sample width not specified') + if not self._framerate: + raise Error('sampling rate not specified') + self._write_header(datasize) + + def _write_header(self, initlength): + assert self._headerwritten + self._file.write(b'RIFF') + if not self._nframes: + self._nframes = initlength // (self._nchannels * self._sampwidth) + self._datalength = self._nframes * self._nchannels * self._sampwidth + self._form_length_pos = self._file.tell() + self._file.write(struct.pack('<L4s4sLHHLLHH4s', + 36 + self._datalength, b'WAVE', b'fmt ', 16, + WAVE_FORMAT_PCM, self._nchannels, self._framerate, + self._nchannels * self._framerate * self._sampwidth, + self._nchannels * self._sampwidth, + self._sampwidth * 8, b'data')) + self._data_length_pos = self._file.tell() + self._file.write(struct.pack('<L', self._datalength)) + self._headerwritten = True + + def _patchheader(self): + assert self._headerwritten + if self._datawritten == self._datalength: + return + curpos = self._file.tell() + self._file.seek(self._form_length_pos, 0) + self._file.write(struct.pack('<L', 36 + self._datawritten)) + self._file.seek(self._data_length_pos, 0) + self._file.write(struct.pack('<L', self._datawritten)) + self._file.seek(curpos, 0) + self._datalength = self._datawritten + +def open(f, mode=None): + if mode is None: + if hasattr(f, 'mode'): + mode = f.mode + else: + mode = 'rb' + if mode in (0+'r', 'rb'): + return Wave_read(f) + elif mode in (0+'w', 'wb'): + return Wave_write(f) + else: + raise Error("mode must be 'r', 'rb', 'w', or 'wb'") + +openfp = open # B/W compatibility diff --git a/tests/bytecode/pylib-tests/weakref.py b/tests/bytecode/pylib-tests/weakref.py new file mode 100644 index 0000000000..fcb6b74d1b --- /dev/null +++ b/tests/bytecode/pylib-tests/weakref.py @@ -0,0 +1,385 @@ +"""Weak reference support for Python. + +This module is an implementation of PEP 205: + +http://www.python.org/dev/peps/pep-0205/ +""" + +# Naming convention: Variables named "wr" are weak reference objects; +# they are called this instead of "ref" to avoid name collisions with +# the module-global ref() function imported from _weakref. + +from _weakref import ( + getweakrefcount, + getweakrefs, + ref, + proxy, + CallableProxyType, + ProxyType, + ReferenceType) + +from _weakrefset import WeakSet, _IterationGuard + +import collections # Import after _weakref to avoid circular import. + +ProxyTypes = (ProxyType, CallableProxyType) + +__all__ = ["ref", "proxy", "getweakrefcount", "getweakrefs", + "WeakKeyDictionary", "ReferenceType", "ProxyType", + "CallableProxyType", "ProxyTypes", "WeakValueDictionary", + "WeakSet"] + + +class WeakValueDictionary(collections.MutableMapping): + """Mapping class that references values weakly. + + Entries in the dictionary will be discarded when no strong + reference to the value exists anymore + """ + # We inherit the constructor without worrying about the input + # dictionary; since it uses our .update() method, we get the right + # checks (if the other dictionary is a WeakValueDictionary, + # objects are unwrapped on the way out, and we always wrap on the + # way in). + + def __init__(self, *args, **kw): + def remove(wr, selfref=ref(self)): + self = selfref() + if self is not None: + if self._iterating: + self._pending_removals.append(wr.key) + else: + del self.data[wr.key] + self._remove = remove + # A list of keys to be removed + self._pending_removals = [] + self._iterating = set() + self.data = d = {} + self.update(*args, **kw) + + def _commit_removals(self): + l = self._pending_removals + d = self.data + # We shouldn't encounter any KeyError, because this method should + # always be called *before* mutating the dict. + while l: + del d[l.pop()] + + def __getitem__(self, key): + o = self.data[key]() + if o is None: + raise KeyError(key) + else: + return o + + def __delitem__(self, key): + if self._pending_removals: + self._commit_removals() + del self.data[key] + + def __len__(self): + return len(self.data) - len(self._pending_removals) + + def __contains__(self, key): + try: + o = self.data[key]() + except KeyError: + return False + return o is not None + + def __repr__(self): + return "<WeakValueDictionary at %s>" % id(self) + + def __setitem__(self, key, value): + if self._pending_removals: + self._commit_removals() + self.data[key] = KeyedRef(value, self._remove, key) + + def copy(self): + new = WeakValueDictionary() + for key, wr in self.data.items(): + o = wr() + if o is not None: + new[key] = o + return new + + __copy__ = copy + + def __deepcopy__(self, memo): + from copy import deepcopy + new = self.__class__() + for key, wr in self.data.items(): + o = wr() + if o is not None: + new[deepcopy(key, memo)] = o + return new + + def get(self, key, default=None): + try: + wr = self.data[key] + except KeyError: + return default + else: + o = wr() + if o is None: + # This should only happen + return default + else: + return o + + def items(self): + with _IterationGuard(self): + for k, wr in self.data.items(): + v = wr() + if v is not None: + yield k, v + + def keys(self): + with _IterationGuard(self): + for k, wr in self.data.items(): + if wr() is not None: + yield k + + __iter__ = keys + + def itervaluerefs(self): + """Return an iterator that yields the weak references to the values. + + The references are not guaranteed to be 'live' at the time + they are used, so the result of calling the references needs + to be checked before being used. This can be used to avoid + creating references that will cause the garbage collector to + keep the values around longer than needed. + + """ + with _IterationGuard(self): + for wr in self.data.values(): + yield wr + + def values(self): + with _IterationGuard(self): + for wr in self.data.values(): + obj = wr() + if obj is not None: + yield obj + + def popitem(self): + if self._pending_removals: + self._commit_removals() + while True: + key, wr = self.data.popitem() + o = wr() + if o is not None: + return key, o + + def pop(self, key, *args): + if self._pending_removals: + self._commit_removals() + try: + o = self.data.pop(key)() + except KeyError: + if args: + return args[0] + raise + if o is None: + raise KeyError(key) + else: + return o + + def setdefault(self, key, default=None): + try: + wr = self.data[key] + except KeyError: + if self._pending_removals: + self._commit_removals() + self.data[key] = KeyedRef(default, self._remove, key) + return default + else: + return wr() + + def update(self, dict=None, **kwargs): + if self._pending_removals: + self._commit_removals() + d = self.data + if dict is not None: + if not hasattr(dict, "items"): + dict = type({})(dict) + for key, o in dict.items(): + d[key] = KeyedRef(o, self._remove, key) + if len(kwargs): + self.update(kwargs) + + def valuerefs(self): + """Return a list of weak references to the values. + + The references are not guaranteed to be 'live' at the time + they are used, so the result of calling the references needs + to be checked before being used. This can be used to avoid + creating references that will cause the garbage collector to + keep the values around longer than needed. + + """ + return list(self.data.values()) + + +class KeyedRef(ref): + """Specialized reference that includes a key corresponding to the value. + + This is used in the WeakValueDictionary to avoid having to create + a function object for each key stored in the mapping. A shared + callback object can use the 'key' attribute of a KeyedRef instead + of getting a reference to the key from an enclosing scope. + + """ + + __slots__ = "key", + + def __new__(type, ob, callback, key): + self = ref.__new__(type, ob, callback) + self.key = key + return self + + def __init__(self, ob, callback, key): + super().__init__(ob, callback) + + +class WeakKeyDictionary(collections.MutableMapping): + """ Mapping class that references keys weakly. + + Entries in the dictionary will be discarded when there is no + longer a strong reference to the key. This can be used to + associate additional data with an object owned by other parts of + an application without adding attributes to those objects. This + can be especially useful with objects that override attribute + accesses. + """ + + def __init__(self, dict=None): + self.data = {} + def remove(k, selfref=ref(self)): + self = selfref() + if self is not None: + if self._iterating: + self._pending_removals.append(k) + else: + del self.data[k] + self._remove = remove + # A list of dead weakrefs (keys to be removed) + self._pending_removals = [] + self._iterating = set() + if dict is not None: + self.update(dict) + + def _commit_removals(self): + # NOTE: We don't need to call this method before mutating the dict, + # because a dead weakref never compares equal to a live weakref, + # even if they happened to refer to equal objects. + # However, it means keys may already have been removed. + l = self._pending_removals + d = self.data + while l: + try: + del d[l.pop()] + except KeyError: + pass + + def __delitem__(self, key): + del self.data[ref(key)] + + def __getitem__(self, key): + return self.data[ref(key)] + + def __len__(self): + return len(self.data) - len(self._pending_removals) + + def __repr__(self): + return "<WeakKeyDictionary at %s>" % id(self) + + def __setitem__(self, key, value): + self.data[ref(key, self._remove)] = value + + def copy(self): + new = WeakKeyDictionary() + for key, value in self.data.items(): + o = key() + if o is not None: + new[o] = value + return new + + __copy__ = copy + + def __deepcopy__(self, memo): + from copy import deepcopy + new = self.__class__() + for key, value in self.data.items(): + o = key() + if o is not None: + new[o] = deepcopy(value, memo) + return new + + def get(self, key, default=None): + return self.data.get(ref(key),default) + + def __contains__(self, key): + try: + wr = ref(key) + except TypeError: + return False + return wr in self.data + + def items(self): + with _IterationGuard(self): + for wr, value in self.data.items(): + key = wr() + if key is not None: + yield key, value + + def keys(self): + with _IterationGuard(self): + for wr in self.data: + obj = wr() + if obj is not None: + yield obj + + __iter__ = keys + + def values(self): + with _IterationGuard(self): + for wr, value in self.data.items(): + if wr() is not None: + yield value + + def keyrefs(self): + """Return a list of weak references to the keys. + + The references are not guaranteed to be 'live' at the time + they are used, so the result of calling the references needs + to be checked before being used. This can be used to avoid + creating references that will cause the garbage collector to + keep the keys around longer than needed. + + """ + return list(self.data) + + def popitem(self): + while True: + key, value = self.data.popitem() + o = key() + if o is not None: + return o, value + + def pop(self, key, *args): + return self.data.pop(ref(key), *args) + + def setdefault(self, key, default=None): + return self.data.setdefault(ref(key, self._remove),default) + + def update(self, dict=None, **kwargs): + d = self.data + if dict is not None: + if not hasattr(dict, "items"): + dict = type({})(dict) + for key, value in dict.items(): + d[ref(key, self._remove)] = value + if len(kwargs): + self.update(kwargs) diff --git a/tests/bytecode/pylib-tests/xdrlib.py b/tests/bytecode/pylib-tests/xdrlib.py new file mode 100644 index 0000000000..c05cf87e4a --- /dev/null +++ b/tests/bytecode/pylib-tests/xdrlib.py @@ -0,0 +1,224 @@ +"""Implements (a subset of) Sun XDR -- eXternal Data Representation. + +See: RFC 1014 + +""" + +import struct +from io import BytesIO + +__all__ = ["Error", "Packer", "Unpacker", "ConversionError"] + +# exceptions +class Error(Exception): + """Exception class for this module. Use: + + except xdrlib.Error as var: + # var has the Error instance for the exception + + Public ivars: + msg -- contains the message + + """ + def __init__(self, msg): + self.msg = msg + def __repr__(self): + return repr(self.msg) + def __str__(self): + return str(self.msg) + + +class ConversionError(Error): + pass + + + +class Packer: + """Pack various data representations into a buffer.""" + + def __init__(self): + self.reset() + + def reset(self): + self.__buf = BytesIO() + + def get_buffer(self): + return self.__buf.getvalue() + # backwards compatibility + get_buf = get_buffer + + def pack_uint(self, x): + self.__buf.write(struct.pack('>L', x)) + + def pack_int(self, x): + self.__buf.write(struct.pack('>l', x)) + + pack_enum = pack_int + + def pack_bool(self, x): + if x: self.__buf.write(b'\0\0\0\1') + else: self.__buf.write(b'\0\0\0\0') + + def pack_uhyper(self, x): + self.pack_uint(x>>32 & 0xffffffff) + self.pack_uint(x & 0xffffffff) + + pack_hyper = pack_uhyper + + def pack_float(self, x): + try: self.__buf.write(struct.pack('>f', x)) + except struct.error as msg: + raise ConversionError(msg) + + def pack_double(self, x): + try: self.__buf.write(struct.pack('>d', x)) + except struct.error as msg: + raise ConversionError(msg) + + def pack_fstring(self, n, s): + if n < 0: + raise ValueError('fstring size must be nonnegative') + data = s[:n] + n = ((n+3)//4)*4 + data = data + (n - len(data)) * b'\0' + self.__buf.write(data) + + pack_fopaque = pack_fstring + + def pack_string(self, s): + n = len(s) + self.pack_uint(n) + self.pack_fstring(n, s) + + pack_opaque = pack_string + pack_bytes = pack_string + + def pack_list(self, list, pack_item): + for item in list: + self.pack_uint(1) + pack_item(item) + self.pack_uint(0) + + def pack_farray(self, n, list, pack_item): + if len(list) != n: + raise ValueError('wrong array size') + for item in list: + pack_item(item) + + def pack_array(self, list, pack_item): + n = len(list) + self.pack_uint(n) + self.pack_farray(n, list, pack_item) + + + +class Unpacker: + """Unpacks various data representations from the given buffer.""" + + def __init__(self, data): + self.reset(data) + + def reset(self, data): + self.__buf = data + self.__pos = 0 + + def get_position(self): + return self.__pos + + def set_position(self, position): + self.__pos = position + + def get_buffer(self): + return self.__buf + + def done(self): + if self.__pos < len(self.__buf): + raise Error('unextracted data remains') + + def unpack_uint(self): + i = self.__pos + self.__pos = j = i+4 + data = self.__buf[i:j] + if len(data) < 4: + raise EOFError + return struct.unpack('>L', data)[0] + + def unpack_int(self): + i = self.__pos + self.__pos = j = i+4 + data = self.__buf[i:j] + if len(data) < 4: + raise EOFError + return struct.unpack('>l', data)[0] + + unpack_enum = unpack_int + + def unpack_bool(self): + return bool(self.unpack_int()) + + def unpack_uhyper(self): + hi = self.unpack_uint() + lo = self.unpack_uint() + return int(hi)<<32 | lo + + def unpack_hyper(self): + x = self.unpack_uhyper() + if x >= 0x8000000000000000: + x = x - 0x10000000000000000 + return x + + def unpack_float(self): + i = self.__pos + self.__pos = j = i+4 + data = self.__buf[i:j] + if len(data) < 4: + raise EOFError + return struct.unpack('>f', data)[0] + + def unpack_double(self): + i = self.__pos + self.__pos = j = i+8 + data = self.__buf[i:j] + if len(data) < 8: + raise EOFError + return struct.unpack('>d', data)[0] + + def unpack_fstring(self, n): + if n < 0: + raise ValueError('fstring size must be nonnegative') + i = self.__pos + j = i + (n+3)//4*4 + if j > len(self.__buf): + raise EOFError + self.__pos = j + return self.__buf[i:i+n] + + unpack_fopaque = unpack_fstring + + def unpack_string(self): + n = self.unpack_uint() + return self.unpack_fstring(n) + + unpack_opaque = unpack_string + unpack_bytes = unpack_string + + def unpack_list(self, unpack_item): + list = [] + while 1: + x = self.unpack_uint() + if x == 0: break + if x != 1: + raise ConversionError('0 or 1 expected, got %r' % (x,)) + item = unpack_item() + list.append(item) + return list + + def unpack_farray(self, n, unpack_item): + list = [] + for i in range(n): + list.append(unpack_item()) + return list + + def unpack_array(self, unpack_item): + n = self.unpack_uint() + return self.unpack_farray(n, unpack_item) |