diff options
Diffstat (limited to 'Tools/gdb/libpython.py')
-rw-r--r-- | Tools/gdb/libpython.py | 340 |
1 files changed, 290 insertions, 50 deletions
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index e12769d7d27..8bbbb1048b6 100644 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -19,9 +19,10 @@ giving file/line information and the state of local variables In particular, given a gdb.Value corresponding to a PyObject* in the inferior process, we can generate a "proxy value" within the gdb process. For example, given a PyObject* in the inferior process that is in fact a PyListObject* -holding three PyObject* that turn out to be PyStringObject* instances, we can -generate a proxy value within the gdb process that is a list of strings: - ["foo", "bar", "baz"] +holding three PyObject* that turn out to be PyBytesObject* instances, we can +generate a proxy value within the gdb process that is a list of bytes +instances: + [b"foo", b"bar", b"baz"] Doing so can be expensive for complicated graphs of objects, and could take some time, so we also have a "write_repr" method that writes a representation @@ -41,6 +42,7 @@ The module also extends gdb with some python-specific commands. ''' from __future__ import with_statement import gdb +import locale import sys # Look up the gdb.Type for some standard types: @@ -53,11 +55,10 @@ SIZEOF_VOID_P = _type_void_ptr.sizeof Py_TPFLAGS_HEAPTYPE = (1L << 9) -Py_TPFLAGS_INT_SUBCLASS = (1L << 23) Py_TPFLAGS_LONG_SUBCLASS = (1L << 24) Py_TPFLAGS_LIST_SUBCLASS = (1L << 25) Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26) -Py_TPFLAGS_STRING_SUBCLASS = (1L << 27) +Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27) Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28) Py_TPFLAGS_DICT_SUBCLASS = (1L << 29) Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30) @@ -66,6 +67,10 @@ Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31) MAX_OUTPUT_LEN=1024 +hexdigits = "0123456789abcdef" + +ENCODING = locale.getpreferredencoding() + class NullPyObjectPtr(RuntimeError): pass @@ -82,6 +87,30 @@ def safe_range(val): # threshold in case the data was corrupted return xrange(safety_limit(val)) +def write_unicode(file, text): + # Write a byte or unicode string to file. Unicode strings are encoded to + # ENCODING encoding with 'backslashreplace' error handler to avoid + # UnicodeEncodeError. + if isinstance(text, unicode): + text = text.encode(ENCODING, 'backslashreplace') + file.write(text) + +def os_fsencode(filename): + if not isinstance(filename, unicode): + return filename + encoding = sys.getfilesystemencoding() + if encoding == 'mbcs': + # mbcs doesn't support surrogateescape + return filename.encode(encoding) + encoded = [] + for char in filename: + # surrogateescape error handler + if 0xDC80 <= ord(char) <= 0xDCFF: + byte = chr(ord(char) - 0xDC00) + else: + byte = char.encode(encoding) + encoded.append(byte) + return ''.join(encoded) class StringTruncated(RuntimeError): pass @@ -108,7 +137,7 @@ class TruncatedStringIO(object): class PyObjectPtr(object): """ Class wrapping a gdb.Value that's a either a (PyObject*) within the - inferior process, or some subclass pointer e.g. (PyStringObject*) + inferior process, or some subclass pointer e.g. (PyBytesObject*) There will be a subclass for every refined PyObject type that we care about. @@ -148,12 +177,8 @@ class PyObjectPtr(object): return pyo_ptr.dereference()[name] if name == 'ob_size': - try: - # Python 2: - return self._gdbval.dereference()[name] - except RuntimeError: - # Python 3: - return self._gdbval.dereference()['ob_base'][name] + pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type()) + return pyo_ptr.dereference()[name] # General case: look it up inside the object: return self._gdbval.dereference()[name] @@ -310,16 +335,14 @@ class PyObjectPtr(object): if tp_flags & Py_TPFLAGS_HEAPTYPE: return HeapTypeObjectPtr - if tp_flags & Py_TPFLAGS_INT_SUBCLASS: - return PyIntObjectPtr if tp_flags & Py_TPFLAGS_LONG_SUBCLASS: return PyLongObjectPtr if tp_flags & Py_TPFLAGS_LIST_SUBCLASS: return PyListObjectPtr if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS: return PyTupleObjectPtr - if tp_flags & Py_TPFLAGS_STRING_SUBCLASS: - return PyStringObjectPtr + if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS: + return PyBytesObjectPtr if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS: return PyUnicodeObjectPtr if tp_flags & Py_TPFLAGS_DICT_SUBCLASS: @@ -355,6 +378,8 @@ class PyObjectPtr(object): def as_address(self): return long(self._gdbval) +class PyVarObjectPtr(PyObjectPtr): + _typename = 'PyVarObject' class ProxyAlreadyVisited(object): ''' @@ -519,20 +544,6 @@ class PyBaseExceptionObjectPtr(PyObjectPtr): out.write(self.safe_tp_name()) self.write_field_repr('args', out, visited) -class PyBoolObjectPtr(PyObjectPtr): - """ - Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two - <bool> instances (Py_True/Py_False) within the process being debugged. - """ - _typename = 'PyBoolObject' - - def proxyval(self, visited): - if int_from_int(self.field('ob_ival')): - return True - else: - return False - - class PyClassObjectPtr(PyObjectPtr): """ Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj> @@ -695,13 +706,6 @@ class PyInstanceObjectPtr(PyObjectPtr): _write_instance_repr(out, visited, cl_name, pyop_in_dict, self.as_address()) -class PyIntObjectPtr(PyObjectPtr): - _typename = 'PyIntObject' - - def proxyval(self, visited): - result = int_from_int(self.field('ob_ival')) - return result - class PyListObjectPtr(PyObjectPtr): _typename = 'PyListObject' @@ -774,6 +778,22 @@ class PyLongObjectPtr(PyObjectPtr): result = -result return result + def write_repr(self, out, visited): + # Write this out as a Python 3 int literal, i.e. without the "L" suffix + proxy = self.proxyval(visited) + out.write("%s" % proxy) + + +class PyBoolObjectPtr(PyLongObjectPtr): + """ + Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two + <bool> instances (Py_True/Py_False) within the process being debugged. + """ + def proxyval(self, visited): + if PyLongObjectPtr.proxyval(self, visited): + return True + else: + return False class PyNoneStructPtr(PyObjectPtr): """ @@ -887,7 +907,12 @@ class PyFrameObjectPtr(PyObjectPtr): newline character''' if self.is_optimized_out(): return '(frame information optimized out)' - with open(self.filename(), 'r') as f: + filename = self.filename() + try: + f = open(os_fsencode(filename), 'r') + except IOError: + return None + with f: all_lines = f.readlines() # Convert from 1-based current_line_num to 0-based list offset: return all_lines[self.current_line_num()-1] @@ -898,9 +923,9 @@ class PyFrameObjectPtr(PyObjectPtr): return out.write('Frame 0x%x, for file %s, line %i, in %s (' % (self.as_address(), - self.co_filename, + self.co_filename.proxyval(visited), self.current_line_num(), - self.co_name)) + self.co_name.proxyval(visited))) first = True for pyop_name, pyop_value in self.iter_locals(): if not first: @@ -913,6 +938,16 @@ class PyFrameObjectPtr(PyObjectPtr): out.write(')') + def print_traceback(self): + if self.is_optimized_out(): + sys.stdout.write(' (frame information optimized out)\n') + return + visited = set() + sys.stdout.write(' File "%s", line %i, in %s\n' + % (self.co_filename.proxyval(visited), + self.current_line_num(), + self.co_name.proxyval(visited))) + class PySetObjectPtr(PyObjectPtr): _typename = 'PySetObject' @@ -937,7 +972,8 @@ class PySetObjectPtr(PyObjectPtr): return set(members) def write_repr(self, out, visited): - out.write(self.safe_tp_name()) + # Emulate Python 3's set_repr + tp_name = self.safe_tp_name() # Guard against infinite loops: if self.as_address() in visited: @@ -945,7 +981,18 @@ class PySetObjectPtr(PyObjectPtr): return visited.add(self.as_address()) - out.write('([') + # Python 3's set_repr special-cases the empty set: + if not self.field('used'): + out.write(tp_name) + out.write('()') + return + + # Python 3 uses {} for set literals: + if tp_name != 'set': + out.write(tp_name) + out.write('(') + + out.write('{') first = True table = self.field('table') for i in safe_range(self.field('mask')+1): @@ -959,11 +1006,14 @@ class PySetObjectPtr(PyObjectPtr): out.write(', ') first = False pyop_key.write_repr(out, visited) - out.write('])') + out.write('}') + + if tp_name != 'set': + out.write(')') -class PyStringObjectPtr(PyObjectPtr): - _typename = 'PyStringObject' +class PyBytesObjectPtr(PyObjectPtr): + _typename = 'PyBytesObject' def __str__(self): field_ob_size = self.field('ob_size') @@ -974,6 +1024,37 @@ class PyStringObjectPtr(PyObjectPtr): def proxyval(self, visited): return str(self) + def write_repr(self, out, visited): + # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix + + # Get a PyStringObject* within the Python 2 gdb process: + proxy = self.proxyval(visited) + + # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr + # to Python 2 code: + quote = "'" + if "'" in proxy and not '"' in proxy: + quote = '"' + out.write('b') + out.write(quote) + for byte in proxy: + if byte == quote or byte == '\\': + out.write('\\') + out.write(byte) + elif byte == '\t': + out.write('\\t') + elif byte == '\n': + out.write('\\n') + elif byte == '\r': + out.write('\\r') + elif byte < ' ' or ord(byte) >= 0x7f: + out.write('\\x') + out.write(hexdigits[(ord(byte) & 0xf0) >> 4]) + out.write(hexdigits[ord(byte) & 0xf]) + else: + out.write(byte) + out.write(quote) + class PyTupleObjectPtr(PyObjectPtr): _typename = 'PyTupleObject' @@ -1014,6 +1095,13 @@ class PyTypeObjectPtr(PyObjectPtr): _typename = 'PyTypeObject' +def _unichr_is_printable(char): + # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py + if char == u" ": + return True + import unicodedata + return unicodedata.category(char) not in ("C", "Z") + if sys.maxunicode >= 0x10000: _unichr = unichr else: @@ -1026,6 +1114,7 @@ else: ch2 = 0xDC00 | (x & 0x3FF) return unichr(ch1) + unichr(ch2) + class PyUnicodeObjectPtr(PyObjectPtr): _typename = 'PyUnicodeObject' @@ -1072,6 +1161,116 @@ class PyUnicodeObjectPtr(PyObjectPtr): result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs]) return result + def write_repr(self, out, visited): + # Write this out as a Python 3 str literal, i.e. without a "u" prefix + + # Get a PyUnicodeObject* within the Python 2 gdb process: + proxy = self.proxyval(visited) + + # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr + # to Python 2: + if "'" in proxy and '"' not in proxy: + quote = '"' + else: + quote = "'" + out.write(quote) + + i = 0 + while i < len(proxy): + ch = proxy[i] + i += 1 + + # Escape quotes and backslashes + if ch == quote or ch == '\\': + out.write('\\') + out.write(ch) + + # Map special whitespace to '\t', \n', '\r' + elif ch == '\t': + out.write('\\t') + elif ch == '\n': + out.write('\\n') + elif ch == '\r': + out.write('\\r') + + # Map non-printable US ASCII to '\xhh' */ + elif ch < ' ' or ch == 0x7F: + out.write('\\x') + out.write(hexdigits[(ord(ch) >> 4) & 0x000F]) + out.write(hexdigits[ord(ch) & 0x000F]) + + # Copy ASCII characters as-is + elif ord(ch) < 0x7F: + out.write(ch) + + # Non-ASCII characters + else: + ucs = ch + ch2 = None + if sys.maxunicode < 0x10000: + # If sizeof(Py_UNICODE) is 2 here (in gdb), join + # surrogate pairs before calling _unichr_is_printable. + if (i < len(proxy) + and 0xD800 <= ord(ch) < 0xDC00 \ + and 0xDC00 <= ord(proxy[i]) <= 0xDFFF): + ch2 = proxy[i] + ucs = ch + ch2 + i += 1 + + # Unfortuately, Python 2's unicode type doesn't seem + # to expose the "isprintable" method + printable = _unichr_is_printable(ucs) + if printable: + try: + ucs.encode(ENCODING) + except UnicodeEncodeError: + printable = False + + # Map Unicode whitespace and control characters + # (categories Z* and C* except ASCII space) + if not printable: + if ch2 is not None: + # Match Python 3's representation of non-printable + # wide characters. + code = (ord(ch) & 0x03FF) << 10 + code |= ord(ch2) & 0x03FF + code += 0x00010000 + else: + code = ord(ucs) + + # Map 8-bit characters to '\\xhh' + if code <= 0xff: + out.write('\\x') + out.write(hexdigits[(code >> 4) & 0x000F]) + out.write(hexdigits[code & 0x000F]) + # Map 21-bit characters to '\U00xxxxxx' + elif code >= 0x10000: + out.write('\\U') + out.write(hexdigits[(code >> 28) & 0x0000000F]) + out.write(hexdigits[(code >> 24) & 0x0000000F]) + out.write(hexdigits[(code >> 20) & 0x0000000F]) + out.write(hexdigits[(code >> 16) & 0x0000000F]) + out.write(hexdigits[(code >> 12) & 0x0000000F]) + out.write(hexdigits[(code >> 8) & 0x0000000F]) + out.write(hexdigits[(code >> 4) & 0x0000000F]) + out.write(hexdigits[code & 0x0000000F]) + # Map 16-bit characters to '\uxxxx' + else: + out.write('\\u') + out.write(hexdigits[(code >> 12) & 0x000F]) + out.write(hexdigits[(code >> 8) & 0x000F]) + out.write(hexdigits[(code >> 4) & 0x000F]) + out.write(hexdigits[code & 0x000F]) + else: + # Copy characters as-is + out.write(ch) + if ch2 is not None: + out.write(ch2) + + out.write(quote) + + + def int_from_int(gdbval): return int(str(gdbval)) @@ -1108,7 +1307,7 @@ def pretty_printer_lookup(gdbval): if type.code == gdb.TYPE_CODE_PTR: type = type.target().unqualified() t = str(type) - if t in ("PyObject", "PyFrameObject"): + if t in ("PyObject", "PyFrameObject", "PyUnicodeObject"): return PyObjectPtrPrinter(gdbval) """ @@ -1250,15 +1449,31 @@ class Frame(object): if self.is_evalframeex(): pyop = self.get_pyop() if pyop: - sys.stdout.write('#%i %s\n' % (self.get_index(), pyop.get_truncated_repr(MAX_OUTPUT_LEN))) + line = pyop.get_truncated_repr(MAX_OUTPUT_LEN) + write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line)) if not pyop.is_optimized_out(): line = pyop.current_line() - sys.stdout.write(' %s\n' % line.strip()) + if line is not None: + sys.stdout.write(' %s\n' % line.strip()) else: sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index()) else: sys.stdout.write('#%i\n' % self.get_index()) + def print_traceback(self): + if self.is_evalframeex(): + pyop = self.get_pyop() + if pyop: + pyop.print_traceback() + if not pyop.is_optimized_out(): + line = pyop.current_line() + if line is not None: + sys.stdout.write(' %s\n' % line.strip()) + else: + sys.stdout.write(' (unable to read python frame information)\n') + else: + sys.stdout.write(' (not a python frame)\n') + class PyList(gdb.Command): '''List the current Python source code, if any @@ -1313,7 +1528,13 @@ class PyList(gdb.Command): if start<1: start = 1 - with open(filename, 'r') as f: + try: + f = open(os_fsencode(filename), 'r') + except IOError as err: + sys.stdout.write('Unable to open %s: %s\n' + % (filename, err)) + return + with f: all_lines = f.readlines() # start and end are 1-based, all_lines is 0-based; # so [start-1:end] as a python slice gives us [start, end] as a @@ -1383,6 +1604,24 @@ if hasattr(gdb.Frame, 'select'): PyUp() PyDown() +class PyBacktraceFull(gdb.Command): + 'Display the current python frame and all the frames within its call stack (if any)' + def __init__(self): + gdb.Command.__init__ (self, + "py-bt-full", + gdb.COMMAND_STACK, + gdb.COMPLETE_NONE) + + + def invoke(self, args, from_tty): + frame = Frame.get_selected_python_frame() + while frame: + if frame.is_evalframeex(): + frame.print_summary() + frame = frame.older() + +PyBacktraceFull() + class PyBacktrace(gdb.Command): 'Display the current python frame and all the frames within its call stack (if any)' def __init__(self): @@ -1393,10 +1632,11 @@ class PyBacktrace(gdb.Command): def invoke(self, args, from_tty): + sys.stdout.write('Traceback (most recent call first):\n') frame = Frame.get_selected_python_frame() while frame: if frame.is_evalframeex(): - frame.print_summary() + frame.print_traceback() frame = frame.older() PyBacktrace() |