aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Lib/idlelib/IOBinding.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/idlelib/IOBinding.py')
-rw-r--r--Lib/idlelib/IOBinding.py308
1 files changed, 130 insertions, 178 deletions
diff --git a/Lib/idlelib/IOBinding.py b/Lib/idlelib/IOBinding.py
index e429c100ba3..9528c9acaa0 100644
--- a/Lib/idlelib/IOBinding.py
+++ b/Lib/idlelib/IOBinding.py
@@ -1,28 +1,17 @@
-# changes by dscherer@cmu.edu
-# - IOBinding.open() replaces the current window with the opened file,
-# if the current window is both unmodified and unnamed
-# - IOBinding.loadfile() interprets Windows, UNIX, and Macintosh
-# end-of-line conventions, instead of relying on the standard library,
-# which will only understand the local convention.
-
import os
import types
import sys
import codecs
import tempfile
-import tkFileDialog
-import tkMessageBox
+import tkinter.filedialog as tkFileDialog
+import tkinter.messagebox as tkMessageBox
import re
-from Tkinter import *
-from SimpleDialog import SimpleDialog
+from tkinter import *
+from tkinter.simpledialog import askstring
from idlelib.configHandler import idleConf
-try:
- from codecs import BOM_UTF8
-except ImportError:
- # only available since Python 2.3
- BOM_UTF8 = '\xef\xbb\xbf'
+from codecs import BOM_UTF8
# Try setting the locale, so that we can find out
# what encoding to use
@@ -33,15 +22,15 @@ except (ImportError, locale.Error):
pass
# Encoding for file names
-filesystemencoding = sys.getfilesystemencoding()
+filesystemencoding = sys.getfilesystemencoding() ### currently unused
-encoding = "ascii"
+locale_encoding = 'ascii'
if sys.platform == 'win32':
# On Windows, we could use "mbcs". However, to give the user
# a portable encoding name, we need to find the code page
try:
- encoding = locale.getdefaultlocale()[1]
- codecs.lookup(encoding)
+ locale_encoding = locale.getdefaultlocale()[1]
+ codecs.lookup(locale_encoding)
except LookupError:
pass
else:
@@ -50,94 +39,65 @@ else:
# loaded, it may not offer nl_langinfo, or CODESET, or the
# resulting codeset may be unknown to Python. We ignore all
# these problems, falling back to ASCII
- encoding = locale.nl_langinfo(locale.CODESET)
- if encoding is None or encoding is '':
+ locale_encoding = locale.nl_langinfo(locale.CODESET)
+ if locale_encoding is None or locale_encoding is '':
# situation occurs on Mac OS X
- encoding = 'ascii'
- codecs.lookup(encoding)
+ locale_encoding = 'ascii'
+ codecs.lookup(locale_encoding)
except (NameError, AttributeError, LookupError):
- # Try getdefaultlocale well: it parses environment variables,
+ # Try getdefaultlocale: it parses environment variables,
# which may give a clue. Unfortunately, getdefaultlocale has
# bugs that can cause ValueError.
try:
- encoding = locale.getdefaultlocale()[1]
- if encoding is None or encoding is '':
+ locale_encoding = locale.getdefaultlocale()[1]
+ if locale_encoding is None or locale_encoding is '':
# situation occurs on Mac OS X
- encoding = 'ascii'
- codecs.lookup(encoding)
+ locale_encoding = 'ascii'
+ codecs.lookup(locale_encoding)
except (ValueError, LookupError):
pass
-encoding = encoding.lower()
+locale_encoding = locale_encoding.lower()
+
+encoding = locale_encoding ### KBK 07Sep07 This is used all over IDLE, check!
+ ### 'encoding' is used below in encode(), check!
coding_re = re.compile("coding[:=]\s*([-\w_.]+)")
-class EncodingMessage(SimpleDialog):
- "Inform user that an encoding declaration is needed."
- def __init__(self, master, enc):
- self.should_edit = False
-
- self.root = top = Toplevel(master)
- top.bind("<Return>", self.return_event)
- top.bind("<Escape>", self.do_ok)
- top.protocol("WM_DELETE_WINDOW", self.wm_delete_window)
- top.wm_title("I/O Warning")
- top.wm_iconname("I/O Warning")
- self.top = top
-
- l1 = Label(top,
- text="Non-ASCII found, yet no encoding declared. Add a line like")
- l1.pack(side=TOP, anchor=W)
- l2 = Entry(top, font="courier")
- l2.insert(0, "# -*- coding: %s -*-" % enc)
- # For some reason, the text is not selectable anymore if the
- # widget is disabled.
- # l2['state'] = DISABLED
- l2.pack(side=TOP, anchor = W, fill=X)
- l3 = Label(top, text="to your file\n"
- "Choose OK to save this file as %s\n"
- "Edit your general options to silence this warning" % enc)
- l3.pack(side=TOP, anchor = W)
-
- buttons = Frame(top)
- buttons.pack(side=TOP, fill=X)
- # Both return and cancel mean the same thing: do nothing
- self.default = self.cancel = 0
- b1 = Button(buttons, text="Ok", default="active",
- command=self.do_ok)
- b1.pack(side=LEFT, fill=BOTH, expand=1)
- b2 = Button(buttons, text="Edit my file",
- command=self.do_edit)
- b2.pack(side=LEFT, fill=BOTH, expand=1)
-
- self._set_transient(master)
-
- def do_ok(self):
- self.done(0)
-
- def do_edit(self):
- self.done(1)
-
-def coding_spec(str):
+def coding_spec(data):
"""Return the encoding declaration according to PEP 263.
- Raise LookupError if the encoding is declared but unknown.
- """
- # Only consider the first two lines
- str = str.split("\n")[:2]
- str = "\n".join(str)
+ When checking encoded data, only the first two lines should be passed
+ in to avoid a UnicodeDecodeError if the rest of the data is not unicode.
+ The first two lines would contain the encoding specification.
+ Raise a LookupError if the encoding is declared but unknown.
+ """
+ if isinstance(data, bytes):
+ # This encoding might be wrong. However, the coding
+ # spec must be ASCII-only, so any non-ASCII characters
+ # around here will be ignored. Decoding to Latin-1 should
+ # never fail (except for memory outage)
+ lines = data.decode('iso-8859-1')
+ else:
+ lines = data
+ # consider only the first two lines
+ if '\n' in lines:
+ lst = lines.split('\n')[:2]
+ elif '\r' in lines:
+ lst = lines.split('\r')[:2]
+ else:
+ lst = list(lines)
+ str = '\n'.join(lst)
match = coding_re.search(str)
if not match:
return None
name = match.group(1)
- # Check whether the encoding is known
- import codecs
try:
codecs.lookup(name)
except LookupError:
# The standard encoding error does not indicate the encoding
- raise LookupError, "Unknown encoding "+name
+ raise LookupError("Unknown encoding: "+name)
return name
@@ -241,83 +201,112 @@ class IOBinding:
eol = r"(\r\n)|\n|\r" # \r\n (Windows), \n (UNIX), or \r (Mac)
eol_re = re.compile(eol)
- eol_convention = os.linesep # Default
+ eol_convention = os.linesep # default
def loadfile(self, filename):
try:
# open the file in binary mode so that we can handle
- # end-of-line convention ourselves.
+ # end-of-line convention ourselves.
f = open(filename,'rb')
- chars = f.read()
+ two_lines = f.readline() + f.readline()
+ f.seek(0)
+ bytes = f.read()
f.close()
- except IOError, msg:
+ except IOError as msg:
tkMessageBox.showerror("I/O Error", str(msg), master=self.text)
return False
-
- chars = self.decode(chars)
+ chars, converted = self._decode(two_lines, bytes)
+ if chars is None:
+ tkMessageBox.showerror("Decoding Error",
+ "File %s\nFailed to Decode" % filename,
+ parent=self.text)
+ return False
# We now convert all end-of-lines to '\n's
firsteol = self.eol_re.search(chars)
if firsteol:
self.eol_convention = firsteol.group(0)
- if isinstance(self.eol_convention, unicode):
- # Make sure it is an ASCII string
- self.eol_convention = self.eol_convention.encode("ascii")
chars = self.eol_re.sub(r"\n", chars)
-
self.text.delete("1.0", "end")
self.set_filename(None)
self.text.insert("1.0", chars)
self.reset_undo()
self.set_filename(filename)
+ if converted:
+ # We need to save the conversion results first
+ # before being able to execute the code
+ self.set_saved(False)
self.text.mark_set("insert", "1.0")
self.text.yview("insert")
self.updaterecentfileslist(filename)
return True
- def decode(self, chars):
- """Create a Unicode string
-
- If that fails, let Tcl try its best
- """
+ def _decode(self, two_lines, bytes):
+ "Create a Unicode string."
+ chars = None
# Check presence of a UTF-8 signature first
- if chars.startswith(BOM_UTF8):
+ if bytes.startswith(BOM_UTF8):
try:
- chars = chars[3:].decode("utf-8")
- except UnicodeError:
+ chars = bytes[3:].decode("utf-8")
+ except UnicodeDecodeError:
# has UTF-8 signature, but fails to decode...
- return chars
+ return None, False
else:
# Indicates that this file originally had a BOM
- self.fileencoding = BOM_UTF8
- return chars
+ self.fileencoding = 'BOM'
+ return chars, False
# Next look for coding specification
try:
- enc = coding_spec(chars)
- except LookupError, name:
+ enc = coding_spec(two_lines)
+ except LookupError as name:
tkMessageBox.showerror(
title="Error loading the file",
message="The encoding '%s' is not known to this Python "\
"installation. The file may not display correctly" % name,
master = self.text)
enc = None
+ except UnicodeDecodeError:
+ return None, False
if enc:
try:
- return unicode(chars, enc)
- except UnicodeError:
+ chars = str(bytes, enc)
+ self.fileencoding = enc
+ return chars, False
+ except UnicodeDecodeError:
pass
- # If it is ASCII, we need not to record anything
+ # Try ascii:
try:
- return unicode(chars, 'ascii')
- except UnicodeError:
+ chars = str(bytes, 'ascii')
+ self.fileencoding = None
+ return chars, False
+ except UnicodeDecodeError:
+ pass
+ # Try utf-8:
+ try:
+ chars = str(bytes, 'utf-8')
+ self.fileencoding = 'utf-8'
+ return chars, False
+ except UnicodeDecodeError:
pass
# Finally, try the locale's encoding. This is deprecated;
# the user should declare a non-ASCII encoding
try:
- chars = unicode(chars, encoding)
- self.fileencoding = encoding
- except UnicodeError:
+ # Wait for the editor window to appear
+ self.editwin.text.update()
+ enc = askstring(
+ "Specify file encoding",
+ "The file's encoding is invalid for Python 3.x.\n"
+ "IDLE will convert it to UTF-8.\n"
+ "What is the current encoding of the file?",
+ initialvalue = locale_encoding,
+ parent = self.editwin.text)
+
+ if enc:
+ chars = str(bytes, enc)
+ self.fileencoding = None
+ return chars, True
+ except (UnicodeDecodeError, LookupError):
pass
- return chars
+ return None, False # None on failure
def maybesave(self):
if self.get_saved():
@@ -378,93 +367,59 @@ class IOBinding:
def writefile(self, filename):
self.fixlastline()
- chars = self.encode(self.text.get("1.0", "end-1c"))
+ text = self.text.get("1.0", "end-1c")
if self.eol_convention != "\n":
- chars = chars.replace("\n", self.eol_convention)
+ text = text.replace("\n", self.eol_convention)
+ chars = self.encode(text)
try:
f = open(filename, "wb")
f.write(chars)
f.flush()
f.close()
return True
- except IOError, msg:
+ except IOError as msg:
tkMessageBox.showerror("I/O Error", str(msg),
master=self.text)
return False
def encode(self, chars):
- if isinstance(chars, types.StringType):
+ if isinstance(chars, bytes):
# This is either plain ASCII, or Tk was returning mixed-encoding
# text to us. Don't try to guess further.
return chars
+ # Preserve a BOM that might have been present on opening
+ if self.fileencoding == 'BOM':
+ return BOM_UTF8 + chars.encode("utf-8")
# See whether there is anything non-ASCII in it.
# If not, no need to figure out the encoding.
try:
return chars.encode('ascii')
except UnicodeError:
pass
- # If there is an encoding declared, try this first.
+ # Check if there is an encoding declared
try:
+ # a string, let coding_spec slice it to the first two lines
enc = coding_spec(chars)
failed = None
- except LookupError, msg:
+ except LookupError as msg:
failed = msg
enc = None
+ else:
+ if not enc:
+ # PEP 3120: default source encoding is UTF-8
+ enc = 'utf-8'
if enc:
try:
return chars.encode(enc)
except UnicodeError:
failed = "Invalid encoding '%s'" % enc
- if failed:
- tkMessageBox.showerror(
- "I/O Error",
- "%s. Saving as UTF-8" % failed,
- master = self.text)
- # If there was a UTF-8 signature, use that. This should not fail
- if self.fileencoding == BOM_UTF8 or failed:
- return BOM_UTF8 + chars.encode("utf-8")
- # Try the original file encoding next, if any
- if self.fileencoding:
- try:
- return chars.encode(self.fileencoding)
- except UnicodeError:
- tkMessageBox.showerror(
- "I/O Error",
- "Cannot save this as '%s' anymore. Saving as UTF-8" \
- % self.fileencoding,
- master = self.text)
- return BOM_UTF8 + chars.encode("utf-8")
- # Nothing was declared, and we had not determined an encoding
- # on loading. Recommend an encoding line.
- config_encoding = idleConf.GetOption("main","EditorWindow",
- "encoding")
- if config_encoding == 'utf-8':
- # User has requested that we save files as UTF-8
- return BOM_UTF8 + chars.encode("utf-8")
- ask_user = True
- try:
- chars = chars.encode(encoding)
- enc = encoding
- if config_encoding == 'locale':
- ask_user = False
- except UnicodeError:
- chars = BOM_UTF8 + chars.encode("utf-8")
- enc = "utf-8"
- if not ask_user:
- return chars
- dialog = EncodingMessage(self.editwin.top, enc)
- dialog.go()
- if dialog.num == 1:
- # User asked us to edit the file
- encline = "# -*- coding: %s -*-\n" % enc
- firstline = self.text.get("1.0", "2.0")
- if firstline.startswith("#!"):
- # Insert encoding after #! line
- self.text.insert("2.0", encline)
- else:
- self.text.insert("1.0", encline)
- return self.encode(self.text.get("1.0", "end-1c"))
- return chars
+ tkMessageBox.showerror(
+ "I/O Error",
+ "%s.\nSaving as UTF-8" % failed,
+ master = self.text)
+ # Fallback: save as UTF-8, with BOM - ignoring the incorrect
+ # declared encoding
+ return BOM_UTF8 + chars.encode("utf-8")
def fixlastline(self):
c = self.text.get("end-2c")
@@ -536,8 +491,6 @@ class IOBinding:
self.opendialog = tkFileDialog.Open(master=self.text,
filetypes=self.filetypes)
filename = self.opendialog.show(initialdir=dir, initialfile=base)
- if isinstance(filename, unicode):
- filename = filename.encode(filesystemencoding)
return filename
def defaultfilename(self, mode="open"):
@@ -558,13 +511,12 @@ class IOBinding:
self.savedialog = tkFileDialog.SaveAs(master=self.text,
filetypes=self.filetypes)
filename = self.savedialog.show(initialdir=dir, initialfile=base)
- if isinstance(filename, unicode):
- filename = filename.encode(filesystemencoding)
return filename
def updaterecentfileslist(self,filename):
"Update recent file list on all editor windows"
- self.editwin.update_recent_files_list(filename)
+ if self.editwin.flist:
+ self.editwin.update_recent_files_list(filename)
def test():
root = Tk()