aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Tools/gdb/libpython.py
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/gdb/libpython.py')
-rw-r--r--Tools/gdb/libpython.py340
1 files changed, 290 insertions, 50 deletions
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py
index e12769d7d27..8bbbb1048b6 100644
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@@ -19,9 +19,10 @@ giving file/line information and the state of local variables
In particular, given a gdb.Value corresponding to a PyObject* in the inferior
process, we can generate a "proxy value" within the gdb process. For example,
given a PyObject* in the inferior process that is in fact a PyListObject*
-holding three PyObject* that turn out to be PyStringObject* instances, we can
-generate a proxy value within the gdb process that is a list of strings:
- ["foo", "bar", "baz"]
+holding three PyObject* that turn out to be PyBytesObject* instances, we can
+generate a proxy value within the gdb process that is a list of bytes
+instances:
+ [b"foo", b"bar", b"baz"]
Doing so can be expensive for complicated graphs of objects, and could take
some time, so we also have a "write_repr" method that writes a representation
@@ -41,6 +42,7 @@ The module also extends gdb with some python-specific commands.
'''
from __future__ import with_statement
import gdb
+import locale
import sys
# Look up the gdb.Type for some standard types:
@@ -53,11 +55,10 @@ SIZEOF_VOID_P = _type_void_ptr.sizeof
Py_TPFLAGS_HEAPTYPE = (1L << 9)
-Py_TPFLAGS_INT_SUBCLASS = (1L << 23)
Py_TPFLAGS_LONG_SUBCLASS = (1L << 24)
Py_TPFLAGS_LIST_SUBCLASS = (1L << 25)
Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26)
-Py_TPFLAGS_STRING_SUBCLASS = (1L << 27)
+Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27)
Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28)
Py_TPFLAGS_DICT_SUBCLASS = (1L << 29)
Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
@@ -66,6 +67,10 @@ Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31)
MAX_OUTPUT_LEN=1024
+hexdigits = "0123456789abcdef"
+
+ENCODING = locale.getpreferredencoding()
+
class NullPyObjectPtr(RuntimeError):
pass
@@ -82,6 +87,30 @@ def safe_range(val):
# threshold in case the data was corrupted
return xrange(safety_limit(val))
+def write_unicode(file, text):
+ # Write a byte or unicode string to file. Unicode strings are encoded to
+ # ENCODING encoding with 'backslashreplace' error handler to avoid
+ # UnicodeEncodeError.
+ if isinstance(text, unicode):
+ text = text.encode(ENCODING, 'backslashreplace')
+ file.write(text)
+
+def os_fsencode(filename):
+ if not isinstance(filename, unicode):
+ return filename
+ encoding = sys.getfilesystemencoding()
+ if encoding == 'mbcs':
+ # mbcs doesn't support surrogateescape
+ return filename.encode(encoding)
+ encoded = []
+ for char in filename:
+ # surrogateescape error handler
+ if 0xDC80 <= ord(char) <= 0xDCFF:
+ byte = chr(ord(char) - 0xDC00)
+ else:
+ byte = char.encode(encoding)
+ encoded.append(byte)
+ return ''.join(encoded)
class StringTruncated(RuntimeError):
pass
@@ -108,7 +137,7 @@ class TruncatedStringIO(object):
class PyObjectPtr(object):
"""
Class wrapping a gdb.Value that's a either a (PyObject*) within the
- inferior process, or some subclass pointer e.g. (PyStringObject*)
+ inferior process, or some subclass pointer e.g. (PyBytesObject*)
There will be a subclass for every refined PyObject type that we care
about.
@@ -148,12 +177,8 @@ class PyObjectPtr(object):
return pyo_ptr.dereference()[name]
if name == 'ob_size':
- try:
- # Python 2:
- return self._gdbval.dereference()[name]
- except RuntimeError:
- # Python 3:
- return self._gdbval.dereference()['ob_base'][name]
+ pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
+ return pyo_ptr.dereference()[name]
# General case: look it up inside the object:
return self._gdbval.dereference()[name]
@@ -310,16 +335,14 @@ class PyObjectPtr(object):
if tp_flags & Py_TPFLAGS_HEAPTYPE:
return HeapTypeObjectPtr
- if tp_flags & Py_TPFLAGS_INT_SUBCLASS:
- return PyIntObjectPtr
if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
return PyLongObjectPtr
if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
return PyListObjectPtr
if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
return PyTupleObjectPtr
- if tp_flags & Py_TPFLAGS_STRING_SUBCLASS:
- return PyStringObjectPtr
+ if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
+ return PyBytesObjectPtr
if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
return PyUnicodeObjectPtr
if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
@@ -355,6 +378,8 @@ class PyObjectPtr(object):
def as_address(self):
return long(self._gdbval)
+class PyVarObjectPtr(PyObjectPtr):
+ _typename = 'PyVarObject'
class ProxyAlreadyVisited(object):
'''
@@ -519,20 +544,6 @@ class PyBaseExceptionObjectPtr(PyObjectPtr):
out.write(self.safe_tp_name())
self.write_field_repr('args', out, visited)
-class PyBoolObjectPtr(PyObjectPtr):
- """
- Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
- <bool> instances (Py_True/Py_False) within the process being debugged.
- """
- _typename = 'PyBoolObject'
-
- def proxyval(self, visited):
- if int_from_int(self.field('ob_ival')):
- return True
- else:
- return False
-
-
class PyClassObjectPtr(PyObjectPtr):
"""
Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
@@ -695,13 +706,6 @@ class PyInstanceObjectPtr(PyObjectPtr):
_write_instance_repr(out, visited,
cl_name, pyop_in_dict, self.as_address())
-class PyIntObjectPtr(PyObjectPtr):
- _typename = 'PyIntObject'
-
- def proxyval(self, visited):
- result = int_from_int(self.field('ob_ival'))
- return result
-
class PyListObjectPtr(PyObjectPtr):
_typename = 'PyListObject'
@@ -774,6 +778,22 @@ class PyLongObjectPtr(PyObjectPtr):
result = -result
return result
+ def write_repr(self, out, visited):
+ # Write this out as a Python 3 int literal, i.e. without the "L" suffix
+ proxy = self.proxyval(visited)
+ out.write("%s" % proxy)
+
+
+class PyBoolObjectPtr(PyLongObjectPtr):
+ """
+ Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
+ <bool> instances (Py_True/Py_False) within the process being debugged.
+ """
+ def proxyval(self, visited):
+ if PyLongObjectPtr.proxyval(self, visited):
+ return True
+ else:
+ return False
class PyNoneStructPtr(PyObjectPtr):
"""
@@ -887,7 +907,12 @@ class PyFrameObjectPtr(PyObjectPtr):
newline character'''
if self.is_optimized_out():
return '(frame information optimized out)'
- with open(self.filename(), 'r') as f:
+ filename = self.filename()
+ try:
+ f = open(os_fsencode(filename), 'r')
+ except IOError:
+ return None
+ with f:
all_lines = f.readlines()
# Convert from 1-based current_line_num to 0-based list offset:
return all_lines[self.current_line_num()-1]
@@ -898,9 +923,9 @@ class PyFrameObjectPtr(PyObjectPtr):
return
out.write('Frame 0x%x, for file %s, line %i, in %s ('
% (self.as_address(),
- self.co_filename,
+ self.co_filename.proxyval(visited),
self.current_line_num(),
- self.co_name))
+ self.co_name.proxyval(visited)))
first = True
for pyop_name, pyop_value in self.iter_locals():
if not first:
@@ -913,6 +938,16 @@ class PyFrameObjectPtr(PyObjectPtr):
out.write(')')
+ def print_traceback(self):
+ if self.is_optimized_out():
+ sys.stdout.write(' (frame information optimized out)\n')
+ return
+ visited = set()
+ sys.stdout.write(' File "%s", line %i, in %s\n'
+ % (self.co_filename.proxyval(visited),
+ self.current_line_num(),
+ self.co_name.proxyval(visited)))
+
class PySetObjectPtr(PyObjectPtr):
_typename = 'PySetObject'
@@ -937,7 +972,8 @@ class PySetObjectPtr(PyObjectPtr):
return set(members)
def write_repr(self, out, visited):
- out.write(self.safe_tp_name())
+ # Emulate Python 3's set_repr
+ tp_name = self.safe_tp_name()
# Guard against infinite loops:
if self.as_address() in visited:
@@ -945,7 +981,18 @@ class PySetObjectPtr(PyObjectPtr):
return
visited.add(self.as_address())
- out.write('([')
+ # Python 3's set_repr special-cases the empty set:
+ if not self.field('used'):
+ out.write(tp_name)
+ out.write('()')
+ return
+
+ # Python 3 uses {} for set literals:
+ if tp_name != 'set':
+ out.write(tp_name)
+ out.write('(')
+
+ out.write('{')
first = True
table = self.field('table')
for i in safe_range(self.field('mask')+1):
@@ -959,11 +1006,14 @@ class PySetObjectPtr(PyObjectPtr):
out.write(', ')
first = False
pyop_key.write_repr(out, visited)
- out.write('])')
+ out.write('}')
+
+ if tp_name != 'set':
+ out.write(')')
-class PyStringObjectPtr(PyObjectPtr):
- _typename = 'PyStringObject'
+class PyBytesObjectPtr(PyObjectPtr):
+ _typename = 'PyBytesObject'
def __str__(self):
field_ob_size = self.field('ob_size')
@@ -974,6 +1024,37 @@ class PyStringObjectPtr(PyObjectPtr):
def proxyval(self, visited):
return str(self)
+ def write_repr(self, out, visited):
+ # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
+
+ # Get a PyStringObject* within the Python 2 gdb process:
+ proxy = self.proxyval(visited)
+
+ # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
+ # to Python 2 code:
+ quote = "'"
+ if "'" in proxy and not '"' in proxy:
+ quote = '"'
+ out.write('b')
+ out.write(quote)
+ for byte in proxy:
+ if byte == quote or byte == '\\':
+ out.write('\\')
+ out.write(byte)
+ elif byte == '\t':
+ out.write('\\t')
+ elif byte == '\n':
+ out.write('\\n')
+ elif byte == '\r':
+ out.write('\\r')
+ elif byte < ' ' or ord(byte) >= 0x7f:
+ out.write('\\x')
+ out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
+ out.write(hexdigits[ord(byte) & 0xf])
+ else:
+ out.write(byte)
+ out.write(quote)
+
class PyTupleObjectPtr(PyObjectPtr):
_typename = 'PyTupleObject'
@@ -1014,6 +1095,13 @@ class PyTypeObjectPtr(PyObjectPtr):
_typename = 'PyTypeObject'
+def _unichr_is_printable(char):
+ # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
+ if char == u" ":
+ return True
+ import unicodedata
+ return unicodedata.category(char) not in ("C", "Z")
+
if sys.maxunicode >= 0x10000:
_unichr = unichr
else:
@@ -1026,6 +1114,7 @@ else:
ch2 = 0xDC00 | (x & 0x3FF)
return unichr(ch1) + unichr(ch2)
+
class PyUnicodeObjectPtr(PyObjectPtr):
_typename = 'PyUnicodeObject'
@@ -1072,6 +1161,116 @@ class PyUnicodeObjectPtr(PyObjectPtr):
result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
return result
+ def write_repr(self, out, visited):
+ # Write this out as a Python 3 str literal, i.e. without a "u" prefix
+
+ # Get a PyUnicodeObject* within the Python 2 gdb process:
+ proxy = self.proxyval(visited)
+
+ # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
+ # to Python 2:
+ if "'" in proxy and '"' not in proxy:
+ quote = '"'
+ else:
+ quote = "'"
+ out.write(quote)
+
+ i = 0
+ while i < len(proxy):
+ ch = proxy[i]
+ i += 1
+
+ # Escape quotes and backslashes
+ if ch == quote or ch == '\\':
+ out.write('\\')
+ out.write(ch)
+
+ # Map special whitespace to '\t', \n', '\r'
+ elif ch == '\t':
+ out.write('\\t')
+ elif ch == '\n':
+ out.write('\\n')
+ elif ch == '\r':
+ out.write('\\r')
+
+ # Map non-printable US ASCII to '\xhh' */
+ elif ch < ' ' or ch == 0x7F:
+ out.write('\\x')
+ out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
+ out.write(hexdigits[ord(ch) & 0x000F])
+
+ # Copy ASCII characters as-is
+ elif ord(ch) < 0x7F:
+ out.write(ch)
+
+ # Non-ASCII characters
+ else:
+ ucs = ch
+ ch2 = None
+ if sys.maxunicode < 0x10000:
+ # If sizeof(Py_UNICODE) is 2 here (in gdb), join
+ # surrogate pairs before calling _unichr_is_printable.
+ if (i < len(proxy)
+ and 0xD800 <= ord(ch) < 0xDC00 \
+ and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
+ ch2 = proxy[i]
+ ucs = ch + ch2
+ i += 1
+
+ # Unfortuately, Python 2's unicode type doesn't seem
+ # to expose the "isprintable" method
+ printable = _unichr_is_printable(ucs)
+ if printable:
+ try:
+ ucs.encode(ENCODING)
+ except UnicodeEncodeError:
+ printable = False
+
+ # Map Unicode whitespace and control characters
+ # (categories Z* and C* except ASCII space)
+ if not printable:
+ if ch2 is not None:
+ # Match Python 3's representation of non-printable
+ # wide characters.
+ code = (ord(ch) & 0x03FF) << 10
+ code |= ord(ch2) & 0x03FF
+ code += 0x00010000
+ else:
+ code = ord(ucs)
+
+ # Map 8-bit characters to '\\xhh'
+ if code <= 0xff:
+ out.write('\\x')
+ out.write(hexdigits[(code >> 4) & 0x000F])
+ out.write(hexdigits[code & 0x000F])
+ # Map 21-bit characters to '\U00xxxxxx'
+ elif code >= 0x10000:
+ out.write('\\U')
+ out.write(hexdigits[(code >> 28) & 0x0000000F])
+ out.write(hexdigits[(code >> 24) & 0x0000000F])
+ out.write(hexdigits[(code >> 20) & 0x0000000F])
+ out.write(hexdigits[(code >> 16) & 0x0000000F])
+ out.write(hexdigits[(code >> 12) & 0x0000000F])
+ out.write(hexdigits[(code >> 8) & 0x0000000F])
+ out.write(hexdigits[(code >> 4) & 0x0000000F])
+ out.write(hexdigits[code & 0x0000000F])
+ # Map 16-bit characters to '\uxxxx'
+ else:
+ out.write('\\u')
+ out.write(hexdigits[(code >> 12) & 0x000F])
+ out.write(hexdigits[(code >> 8) & 0x000F])
+ out.write(hexdigits[(code >> 4) & 0x000F])
+ out.write(hexdigits[code & 0x000F])
+ else:
+ # Copy characters as-is
+ out.write(ch)
+ if ch2 is not None:
+ out.write(ch2)
+
+ out.write(quote)
+
+
+
def int_from_int(gdbval):
return int(str(gdbval))
@@ -1108,7 +1307,7 @@ def pretty_printer_lookup(gdbval):
if type.code == gdb.TYPE_CODE_PTR:
type = type.target().unqualified()
t = str(type)
- if t in ("PyObject", "PyFrameObject"):
+ if t in ("PyObject", "PyFrameObject", "PyUnicodeObject"):
return PyObjectPtrPrinter(gdbval)
"""
@@ -1250,15 +1449,31 @@ class Frame(object):
if self.is_evalframeex():
pyop = self.get_pyop()
if pyop:
- sys.stdout.write('#%i %s\n' % (self.get_index(), pyop.get_truncated_repr(MAX_OUTPUT_LEN)))
+ line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
+ write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
if not pyop.is_optimized_out():
line = pyop.current_line()
- sys.stdout.write(' %s\n' % line.strip())
+ if line is not None:
+ sys.stdout.write(' %s\n' % line.strip())
else:
sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
else:
sys.stdout.write('#%i\n' % self.get_index())
+ def print_traceback(self):
+ if self.is_evalframeex():
+ pyop = self.get_pyop()
+ if pyop:
+ pyop.print_traceback()
+ if not pyop.is_optimized_out():
+ line = pyop.current_line()
+ if line is not None:
+ sys.stdout.write(' %s\n' % line.strip())
+ else:
+ sys.stdout.write(' (unable to read python frame information)\n')
+ else:
+ sys.stdout.write(' (not a python frame)\n')
+
class PyList(gdb.Command):
'''List the current Python source code, if any
@@ -1313,7 +1528,13 @@ class PyList(gdb.Command):
if start<1:
start = 1
- with open(filename, 'r') as f:
+ try:
+ f = open(os_fsencode(filename), 'r')
+ except IOError as err:
+ sys.stdout.write('Unable to open %s: %s\n'
+ % (filename, err))
+ return
+ with f:
all_lines = f.readlines()
# start and end are 1-based, all_lines is 0-based;
# so [start-1:end] as a python slice gives us [start, end] as a
@@ -1383,6 +1604,24 @@ if hasattr(gdb.Frame, 'select'):
PyUp()
PyDown()
+class PyBacktraceFull(gdb.Command):
+ 'Display the current python frame and all the frames within its call stack (if any)'
+ def __init__(self):
+ gdb.Command.__init__ (self,
+ "py-bt-full",
+ gdb.COMMAND_STACK,
+ gdb.COMPLETE_NONE)
+
+
+ def invoke(self, args, from_tty):
+ frame = Frame.get_selected_python_frame()
+ while frame:
+ if frame.is_evalframeex():
+ frame.print_summary()
+ frame = frame.older()
+
+PyBacktraceFull()
+
class PyBacktrace(gdb.Command):
'Display the current python frame and all the frames within its call stack (if any)'
def __init__(self):
@@ -1393,10 +1632,11 @@ class PyBacktrace(gdb.Command):
def invoke(self, args, from_tty):
+ sys.stdout.write('Traceback (most recent call first):\n')
frame = Frame.get_selected_python_frame()
while frame:
if frame.is_evalframeex():
- frame.print_summary()
+ frame.print_traceback()
frame = frame.older()
PyBacktrace()