diff options
Diffstat (limited to 'Lib/idlelib/IOBinding.py')
-rw-r--r-- | Lib/idlelib/IOBinding.py | 308 |
1 files changed, 130 insertions, 178 deletions
diff --git a/Lib/idlelib/IOBinding.py b/Lib/idlelib/IOBinding.py index e429c100ba3..9528c9acaa0 100644 --- a/Lib/idlelib/IOBinding.py +++ b/Lib/idlelib/IOBinding.py @@ -1,28 +1,17 @@ -# changes by dscherer@cmu.edu -# - IOBinding.open() replaces the current window with the opened file, -# if the current window is both unmodified and unnamed -# - IOBinding.loadfile() interprets Windows, UNIX, and Macintosh -# end-of-line conventions, instead of relying on the standard library, -# which will only understand the local convention. - import os import types import sys import codecs import tempfile -import tkFileDialog -import tkMessageBox +import tkinter.filedialog as tkFileDialog +import tkinter.messagebox as tkMessageBox import re -from Tkinter import * -from SimpleDialog import SimpleDialog +from tkinter import * +from tkinter.simpledialog import askstring from idlelib.configHandler import idleConf -try: - from codecs import BOM_UTF8 -except ImportError: - # only available since Python 2.3 - BOM_UTF8 = '\xef\xbb\xbf' +from codecs import BOM_UTF8 # Try setting the locale, so that we can find out # what encoding to use @@ -33,15 +22,15 @@ except (ImportError, locale.Error): pass # Encoding for file names -filesystemencoding = sys.getfilesystemencoding() +filesystemencoding = sys.getfilesystemencoding() ### currently unused -encoding = "ascii" +locale_encoding = 'ascii' if sys.platform == 'win32': # On Windows, we could use "mbcs". However, to give the user # a portable encoding name, we need to find the code page try: - encoding = locale.getdefaultlocale()[1] - codecs.lookup(encoding) + locale_encoding = locale.getdefaultlocale()[1] + codecs.lookup(locale_encoding) except LookupError: pass else: @@ -50,94 +39,65 @@ else: # loaded, it may not offer nl_langinfo, or CODESET, or the # resulting codeset may be unknown to Python. We ignore all # these problems, falling back to ASCII - encoding = locale.nl_langinfo(locale.CODESET) - if encoding is None or encoding is '': + locale_encoding = locale.nl_langinfo(locale.CODESET) + if locale_encoding is None or locale_encoding is '': # situation occurs on Mac OS X - encoding = 'ascii' - codecs.lookup(encoding) + locale_encoding = 'ascii' + codecs.lookup(locale_encoding) except (NameError, AttributeError, LookupError): - # Try getdefaultlocale well: it parses environment variables, + # Try getdefaultlocale: it parses environment variables, # which may give a clue. Unfortunately, getdefaultlocale has # bugs that can cause ValueError. try: - encoding = locale.getdefaultlocale()[1] - if encoding is None or encoding is '': + locale_encoding = locale.getdefaultlocale()[1] + if locale_encoding is None or locale_encoding is '': # situation occurs on Mac OS X - encoding = 'ascii' - codecs.lookup(encoding) + locale_encoding = 'ascii' + codecs.lookup(locale_encoding) except (ValueError, LookupError): pass -encoding = encoding.lower() +locale_encoding = locale_encoding.lower() + +encoding = locale_encoding ### KBK 07Sep07 This is used all over IDLE, check! + ### 'encoding' is used below in encode(), check! coding_re = re.compile("coding[:=]\s*([-\w_.]+)") -class EncodingMessage(SimpleDialog): - "Inform user that an encoding declaration is needed." - def __init__(self, master, enc): - self.should_edit = False - - self.root = top = Toplevel(master) - top.bind("<Return>", self.return_event) - top.bind("<Escape>", self.do_ok) - top.protocol("WM_DELETE_WINDOW", self.wm_delete_window) - top.wm_title("I/O Warning") - top.wm_iconname("I/O Warning") - self.top = top - - l1 = Label(top, - text="Non-ASCII found, yet no encoding declared. Add a line like") - l1.pack(side=TOP, anchor=W) - l2 = Entry(top, font="courier") - l2.insert(0, "# -*- coding: %s -*-" % enc) - # For some reason, the text is not selectable anymore if the - # widget is disabled. - # l2['state'] = DISABLED - l2.pack(side=TOP, anchor = W, fill=X) - l3 = Label(top, text="to your file\n" - "Choose OK to save this file as %s\n" - "Edit your general options to silence this warning" % enc) - l3.pack(side=TOP, anchor = W) - - buttons = Frame(top) - buttons.pack(side=TOP, fill=X) - # Both return and cancel mean the same thing: do nothing - self.default = self.cancel = 0 - b1 = Button(buttons, text="Ok", default="active", - command=self.do_ok) - b1.pack(side=LEFT, fill=BOTH, expand=1) - b2 = Button(buttons, text="Edit my file", - command=self.do_edit) - b2.pack(side=LEFT, fill=BOTH, expand=1) - - self._set_transient(master) - - def do_ok(self): - self.done(0) - - def do_edit(self): - self.done(1) - -def coding_spec(str): +def coding_spec(data): """Return the encoding declaration according to PEP 263. - Raise LookupError if the encoding is declared but unknown. - """ - # Only consider the first two lines - str = str.split("\n")[:2] - str = "\n".join(str) + When checking encoded data, only the first two lines should be passed + in to avoid a UnicodeDecodeError if the rest of the data is not unicode. + The first two lines would contain the encoding specification. + Raise a LookupError if the encoding is declared but unknown. + """ + if isinstance(data, bytes): + # This encoding might be wrong. However, the coding + # spec must be ASCII-only, so any non-ASCII characters + # around here will be ignored. Decoding to Latin-1 should + # never fail (except for memory outage) + lines = data.decode('iso-8859-1') + else: + lines = data + # consider only the first two lines + if '\n' in lines: + lst = lines.split('\n')[:2] + elif '\r' in lines: + lst = lines.split('\r')[:2] + else: + lst = list(lines) + str = '\n'.join(lst) match = coding_re.search(str) if not match: return None name = match.group(1) - # Check whether the encoding is known - import codecs try: codecs.lookup(name) except LookupError: # The standard encoding error does not indicate the encoding - raise LookupError, "Unknown encoding "+name + raise LookupError("Unknown encoding: "+name) return name @@ -241,83 +201,112 @@ class IOBinding: eol = r"(\r\n)|\n|\r" # \r\n (Windows), \n (UNIX), or \r (Mac) eol_re = re.compile(eol) - eol_convention = os.linesep # Default + eol_convention = os.linesep # default def loadfile(self, filename): try: # open the file in binary mode so that we can handle - # end-of-line convention ourselves. + # end-of-line convention ourselves. f = open(filename,'rb') - chars = f.read() + two_lines = f.readline() + f.readline() + f.seek(0) + bytes = f.read() f.close() - except IOError, msg: + except IOError as msg: tkMessageBox.showerror("I/O Error", str(msg), master=self.text) return False - - chars = self.decode(chars) + chars, converted = self._decode(two_lines, bytes) + if chars is None: + tkMessageBox.showerror("Decoding Error", + "File %s\nFailed to Decode" % filename, + parent=self.text) + return False # We now convert all end-of-lines to '\n's firsteol = self.eol_re.search(chars) if firsteol: self.eol_convention = firsteol.group(0) - if isinstance(self.eol_convention, unicode): - # Make sure it is an ASCII string - self.eol_convention = self.eol_convention.encode("ascii") chars = self.eol_re.sub(r"\n", chars) - self.text.delete("1.0", "end") self.set_filename(None) self.text.insert("1.0", chars) self.reset_undo() self.set_filename(filename) + if converted: + # We need to save the conversion results first + # before being able to execute the code + self.set_saved(False) self.text.mark_set("insert", "1.0") self.text.yview("insert") self.updaterecentfileslist(filename) return True - def decode(self, chars): - """Create a Unicode string - - If that fails, let Tcl try its best - """ + def _decode(self, two_lines, bytes): + "Create a Unicode string." + chars = None # Check presence of a UTF-8 signature first - if chars.startswith(BOM_UTF8): + if bytes.startswith(BOM_UTF8): try: - chars = chars[3:].decode("utf-8") - except UnicodeError: + chars = bytes[3:].decode("utf-8") + except UnicodeDecodeError: # has UTF-8 signature, but fails to decode... - return chars + return None, False else: # Indicates that this file originally had a BOM - self.fileencoding = BOM_UTF8 - return chars + self.fileencoding = 'BOM' + return chars, False # Next look for coding specification try: - enc = coding_spec(chars) - except LookupError, name: + enc = coding_spec(two_lines) + except LookupError as name: tkMessageBox.showerror( title="Error loading the file", message="The encoding '%s' is not known to this Python "\ "installation. The file may not display correctly" % name, master = self.text) enc = None + except UnicodeDecodeError: + return None, False if enc: try: - return unicode(chars, enc) - except UnicodeError: + chars = str(bytes, enc) + self.fileencoding = enc + return chars, False + except UnicodeDecodeError: pass - # If it is ASCII, we need not to record anything + # Try ascii: try: - return unicode(chars, 'ascii') - except UnicodeError: + chars = str(bytes, 'ascii') + self.fileencoding = None + return chars, False + except UnicodeDecodeError: + pass + # Try utf-8: + try: + chars = str(bytes, 'utf-8') + self.fileencoding = 'utf-8' + return chars, False + except UnicodeDecodeError: pass # Finally, try the locale's encoding. This is deprecated; # the user should declare a non-ASCII encoding try: - chars = unicode(chars, encoding) - self.fileencoding = encoding - except UnicodeError: + # Wait for the editor window to appear + self.editwin.text.update() + enc = askstring( + "Specify file encoding", + "The file's encoding is invalid for Python 3.x.\n" + "IDLE will convert it to UTF-8.\n" + "What is the current encoding of the file?", + initialvalue = locale_encoding, + parent = self.editwin.text) + + if enc: + chars = str(bytes, enc) + self.fileencoding = None + return chars, True + except (UnicodeDecodeError, LookupError): pass - return chars + return None, False # None on failure def maybesave(self): if self.get_saved(): @@ -378,93 +367,59 @@ class IOBinding: def writefile(self, filename): self.fixlastline() - chars = self.encode(self.text.get("1.0", "end-1c")) + text = self.text.get("1.0", "end-1c") if self.eol_convention != "\n": - chars = chars.replace("\n", self.eol_convention) + text = text.replace("\n", self.eol_convention) + chars = self.encode(text) try: f = open(filename, "wb") f.write(chars) f.flush() f.close() return True - except IOError, msg: + except IOError as msg: tkMessageBox.showerror("I/O Error", str(msg), master=self.text) return False def encode(self, chars): - if isinstance(chars, types.StringType): + if isinstance(chars, bytes): # This is either plain ASCII, or Tk was returning mixed-encoding # text to us. Don't try to guess further. return chars + # Preserve a BOM that might have been present on opening + if self.fileencoding == 'BOM': + return BOM_UTF8 + chars.encode("utf-8") # See whether there is anything non-ASCII in it. # If not, no need to figure out the encoding. try: return chars.encode('ascii') except UnicodeError: pass - # If there is an encoding declared, try this first. + # Check if there is an encoding declared try: + # a string, let coding_spec slice it to the first two lines enc = coding_spec(chars) failed = None - except LookupError, msg: + except LookupError as msg: failed = msg enc = None + else: + if not enc: + # PEP 3120: default source encoding is UTF-8 + enc = 'utf-8' if enc: try: return chars.encode(enc) except UnicodeError: failed = "Invalid encoding '%s'" % enc - if failed: - tkMessageBox.showerror( - "I/O Error", - "%s. Saving as UTF-8" % failed, - master = self.text) - # If there was a UTF-8 signature, use that. This should not fail - if self.fileencoding == BOM_UTF8 or failed: - return BOM_UTF8 + chars.encode("utf-8") - # Try the original file encoding next, if any - if self.fileencoding: - try: - return chars.encode(self.fileencoding) - except UnicodeError: - tkMessageBox.showerror( - "I/O Error", - "Cannot save this as '%s' anymore. Saving as UTF-8" \ - % self.fileencoding, - master = self.text) - return BOM_UTF8 + chars.encode("utf-8") - # Nothing was declared, and we had not determined an encoding - # on loading. Recommend an encoding line. - config_encoding = idleConf.GetOption("main","EditorWindow", - "encoding") - if config_encoding == 'utf-8': - # User has requested that we save files as UTF-8 - return BOM_UTF8 + chars.encode("utf-8") - ask_user = True - try: - chars = chars.encode(encoding) - enc = encoding - if config_encoding == 'locale': - ask_user = False - except UnicodeError: - chars = BOM_UTF8 + chars.encode("utf-8") - enc = "utf-8" - if not ask_user: - return chars - dialog = EncodingMessage(self.editwin.top, enc) - dialog.go() - if dialog.num == 1: - # User asked us to edit the file - encline = "# -*- coding: %s -*-\n" % enc - firstline = self.text.get("1.0", "2.0") - if firstline.startswith("#!"): - # Insert encoding after #! line - self.text.insert("2.0", encline) - else: - self.text.insert("1.0", encline) - return self.encode(self.text.get("1.0", "end-1c")) - return chars + tkMessageBox.showerror( + "I/O Error", + "%s.\nSaving as UTF-8" % failed, + master = self.text) + # Fallback: save as UTF-8, with BOM - ignoring the incorrect + # declared encoding + return BOM_UTF8 + chars.encode("utf-8") def fixlastline(self): c = self.text.get("end-2c") @@ -536,8 +491,6 @@ class IOBinding: self.opendialog = tkFileDialog.Open(master=self.text, filetypes=self.filetypes) filename = self.opendialog.show(initialdir=dir, initialfile=base) - if isinstance(filename, unicode): - filename = filename.encode(filesystemencoding) return filename def defaultfilename(self, mode="open"): @@ -558,13 +511,12 @@ class IOBinding: self.savedialog = tkFileDialog.SaveAs(master=self.text, filetypes=self.filetypes) filename = self.savedialog.show(initialdir=dir, initialfile=base) - if isinstance(filename, unicode): - filename = filename.encode(filesystemencoding) return filename def updaterecentfileslist(self,filename): "Update recent file list on all editor windows" - self.editwin.update_recent_files_list(filename) + if self.editwin.flist: + self.editwin.update_recent_files_list(filename) def test(): root = Tk() |