1 files changed, 66 insertions, 61 deletions
diff --git a/Lib/difflib.py b/Lib/difflib.py
index 3bbcb76b7ec..e6cc6ee4425 100644
--- a/Lib/difflib.py
+++ b/Lib/difflib.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 
 """
 Module difflib -- helpers for computing deltas between objects.
@@ -32,9 +32,9 @@ __all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
            'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
            'unified_diff', 'HtmlDiff', 'Match']
 
+import warnings
 import heapq
 from collections import namedtuple as _namedtuple
-from functools import reduce
 
 Match = _namedtuple('Match', 'a b size')
 
@@ -80,7 +80,7 @@ class SequenceMatcher:
     sequences.  As a rule of thumb, a .ratio() value over 0.6 means the
     sequences are close matches:
 
-    >>> print round(s.ratio(), 3)
+    >>> print(round(s.ratio(), 3))
     0.866
     >>>
 
@@ -88,7 +88,7 @@ class SequenceMatcher:
     .get_matching_blocks() is handy:
 
     >>> for block in s.get_matching_blocks():
-    ...     print "a[%d] and b[%d] match for %d elements" % block
+    ...     print("a[%d] and b[%d] match for %d elements" % block)
     a[0] and b[0] match for 8 elements
     a[8] and b[17] match for 21 elements
     a[29] and b[38] match for 0 elements
@@ -101,7 +101,7 @@ class SequenceMatcher:
     use .get_opcodes():
 
     >>> for opcode in s.get_opcodes():
-    ...     print "%6s a[%d:%d] b[%d:%d]" % opcode
+    ...     print("%6s a[%d:%d] b[%d:%d]" % opcode)
      equal a[0:8] b[0:8]
     insert a[8:8] b[8:17]
      equal a[8:29] b[17:38]
@@ -183,7 +183,7 @@ class SequenceMatcher:
         #      we need to do to 'a' to change it into 'b'?"
         # b2j
         #      for x in b, b2j[x] is a list of the indices (into b)
-        #      at which x appears; junk elements do not appear
+        #      at which x appears; junk and popular elements do not appear
         # fullbcount
         #      for x in b, fullbcount[x] == the number of times x
         #      appears in b; only materialized if really needed (used
@@ -205,15 +205,10 @@ class SequenceMatcher:
         #      subtle but helpful effects on the algorithm, which I'll
         #      get around to writing up someday <0.9 wink>.
         #      DON'T USE!  Only __chain_b uses this.  Use isbjunk.
-        # isbjunk
-        #      for x in b, isbjunk(x) == isjunk(x) but much faster;
-        #      it's really the __contains__ method of a hidden dict.
-        #      DOES NOT WORK for x in a!
-        # isbpopular
-        #      for x in b, isbpopular(x) is true iff b is reasonably long
-        #      (at least 200 elements) and x accounts for more than 1 + 1% of
-        #      its elements (when autojunk is enabled).
-        #      DOES NOT WORK for x in a!
+        # bjunk
+        #      the items in b for which isjunk is True.
+        # bpopular
+        #      nonjunk items in b treated as junk by the heuristic (if used).
 
         self.isjunk = isjunk
         self.a = self.b = None
@@ -322,30 +317,39 @@ class SequenceMatcher:
             indices.append(i)
 
         # Purge junk elements
-        junk = set()
+        self.bjunk = junk = set()
         isjunk = self.isjunk
         if isjunk:
-            for elt in list(b2j.keys()):  # using list() since b2j is modified
+            for elt in b2j.keys():
                 if isjunk(elt):
                     junk.add(elt)
-                    del b2j[elt]
+            for elt in junk: # separate loop avoids separate list of keys
+                del b2j[elt]
 
         # Purge popular elements that are not junk
-        popular = set()
+        self.bpopular = popular = set()
         n = len(b)
         if self.autojunk and n >= 200:
             ntest = n // 100 + 1
-            for elt, idxs in list(b2j.items()):
+            for elt, idxs in b2j.items():
                 if len(idxs) > ntest:
                     popular.add(elt)
-                    del b2j[elt]
-
-        # Now for x in b, isjunk(x) == x in junk, but the latter is much faster.
-        # Sicne the number of *unique* junk elements is probably small, the
-        # memory burden of keeping this set alive is likely trivial compared to
-        # the size of b2j.
-        self.isbjunk = junk.__contains__
-        self.isbpopular = popular.__contains__
+            for elt in popular: # ditto; as fast for 1% deletion
+                del b2j[elt]
+
+    def isbjunk(self, item):
+        "Deprecated; use 'item in SequenceMatcher().bjunk'."
+        warnings.warn("'SequenceMatcher().isbjunk(item)' is deprecated;\n"
+                      "use 'item in SMinstance.bjunk' instead.",
+                      DeprecationWarning, 2)
+        return item in self.bjunk
+
+    def isbpopular(self, item):
+        "Deprecated; use 'item in SequenceMatcher().bpopular'."
+        warnings.warn("'SequenceMatcher().isbpopular(item)' is deprecated;\n"
+                      "use 'item in SMinstance.bpopular' instead.",
+                      DeprecationWarning, 2)
+        return item in self.bpopular
 
     def find_longest_match(self, alo, ahi, blo, bhi):
         """Find longest matching block in a[alo:ahi] and b[blo:bhi].
@@ -403,14 +407,14 @@ class SequenceMatcher:
         # Windiff ends up at the same place as diff, but by pairing up
         # the unique 'b's and then matching the first two 'a's.
 
-        a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk
+        a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.bjunk.__contains__
         besti, bestj, bestsize = alo, blo, 0
         # find longest junk-free match
         # during an iteration of the loop, j2len[j] = length of longest
         # junk-free match ending with a[i-1] and b[j]
         j2len = {}
         nothing = []
-        for i in xrange(alo, ahi):
+        for i in range(alo, ahi):
             # look at all instances of a[i] in b; note that because
             # b2j has no junk keys, the loop is skipped if a[i] is junk
             j2lenget = j2len.get
@@ -472,7 +476,7 @@ class SequenceMatcher:
         triple with n==0.
 
         >>> s = SequenceMatcher(None, "abxcd", "abcd")
-        >>> s.get_matching_blocks()
+        >>> list(s.get_matching_blocks())
         [Match(a=0, b=0, size=2), Match(a=3, b=2, size=2), Match(a=5, b=4, size=0)]
         """
 
@@ -548,8 +552,8 @@ class SequenceMatcher:
         >>> b = "abycdf"
         >>> s = SequenceMatcher(None, a, b)
         >>> for tag, i1, i2, j1, j2 in s.get_opcodes():
-        ...    print ("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
-        ...           (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2]))
+        ...    print(("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
+        ...           (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])))
          delete a[0:1] (q) b[0:0] ()
           equal a[1:3] (ab) b[0:2] (ab)
         replace a[3:4] (x) b[2:3] (y)
@@ -590,7 +594,7 @@ class SequenceMatcher:
         Each group is in the same format as returned by get_opcodes().
 
         >>> from pprint import pprint
-        >>> a = map(str, range(1,40))
+        >>> a = list(map(str, range(1,40)))
         >>> b = a[:]
         >>> b[8:8] = ['i']     # Make an insertion
         >>> b[20] += 'x'       # Make a replacement
@@ -655,8 +659,7 @@ class SequenceMatcher:
         1.0
         """
 
-        matches = reduce(lambda sum, triple: sum + triple[-1],
-                         self.get_matching_blocks(), 0)
+        matches = sum(triple[-1] for triple in self.get_matching_blocks())
         return _calculate_ratio(matches, len(self.a) + len(self.b))
 
     def quick_ratio(self):
@@ -723,7 +726,7 @@ def get_close_matches(word, possibilities, n=3, cutoff=0.6):
     >>> import keyword as _keyword
     >>> get_close_matches("wheel", _keyword.kwlist)
     ['while']
-    >>> get_close_matches("apple", _keyword.kwlist)
+    >>> get_close_matches("Apple", _keyword.kwlist)
     []
     >>> get_close_matches("accept", _keyword.kwlist)
     ['except']
@@ -836,7 +839,7 @@ class Differ:
 
     As a single multi-line string it looks like this:
 
-    >>> print ''.join(result),
+    >>> print(''.join(result), end="")
         1. Beautiful is better than ugly.
     -   2. Explicit is better than implicit.
     -   3. Simple is better than complex.
@@ -893,8 +896,9 @@ class Differ:
 
         Example:
 
-        >>> print ''.join(Differ().compare('one\ntwo\nthree\n'.splitlines(1),
+        >>> print(''.join(Differ().compare('one\ntwo\nthree\n'.splitlines(1),
         ...                                'ore\ntree\nemu\n'.splitlines(1))),
+        ...       end="")
         - one
         ?  ^
         + ore
@@ -917,14 +921,14 @@ class Differ:
             elif tag == 'equal':
                 g = self._dump(' ', a, alo, ahi)
             else:
-                raise ValueError, 'unknown tag %r' % (tag,)
+                raise ValueError('unknown tag %r' % (tag,))
 
             for line in g:
                 yield line
 
     def _dump(self, tag, x, lo, hi):
         """Generate comparison results for a same-tagged range."""
-        for i in xrange(lo, hi):
+        for i in range(lo, hi):
             yield '%s %s' % (tag, x[i])
 
     def _plain_replace(self, a, alo, ahi, b, blo, bhi):
@@ -954,7 +958,7 @@ class Differ:
         >>> d = Differ()
         >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
         ...                            ['abcdefGhijkl\n'], 0, 1)
-        >>> print ''.join(results),
+        >>> print(''.join(results), end="")
         - abcDefghiJkl
         ?    ^  ^  ^
         + abcdefGhijkl
@@ -970,10 +974,10 @@ class Differ:
         # search for the pair that matches best without being identical
         # (identical lines must be junk lines, & we don't want to synch up
         # on junk -- unless we have to)
-        for j in xrange(blo, bhi):
+        for j in range(blo, bhi):
             bj = b[j]
             cruncher.set_seq2(bj)
-            for i in xrange(alo, ahi):
+            for i in range(alo, ahi):
                 ai = a[i]
                 if ai == bj:
                     if eqi is None:
@@ -1029,7 +1033,7 @@ class Differ:
                     atags += ' ' * la
                     btags += ' ' * lb
                 else:
-                    raise ValueError, 'unknown tag %r' % (tag,)
+                    raise ValueError('unknown tag %r' % (tag,))
             for line in self._qformat(aelt, belt, atags, btags):
                 yield line
         else:
@@ -1062,7 +1066,7 @@ class Differ:
         >>> d = Differ()
         >>> results = d._qformat('\tabcDefghiJkl\n', '\tabcdefGhijkl\n',
         ...                      '  ^ ^  ^      ', '  ^ ^  ^      ')
-        >>> for line in results: print repr(line)
+        >>> for line in results: print(repr(line))
         ...
         '- \tabcDefghiJkl\n'
         '? \t ^ ^  ^\n'
@@ -1184,7 +1188,7 @@ def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
     ...             'zero one tree four'.split(), 'Original', 'Current',
     ...             '2005-01-26 23:30:50', '2010-04-02 10:20:52',
     ...             lineterm=''):
-    ...     print line                  # doctest: +NORMALIZE_WHITESPACE
+    ...     print(line)                 # doctest: +NORMALIZE_WHITESPACE
     --- Original        2005-01-26 23:30:50
     +++ Current         2010-04-02 10:20:52
     @@ -1,4 +1,4 @@
@@ -1215,10 +1219,10 @@ def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
                 for line in a[i1:i2]:
                     yield ' ' + line
                 continue
-            if tag in ('replace', 'delete'):
+            if tag in {'replace', 'delete'}:
                 for line in a[i1:i2]:
                     yield '-' + line
-            if tag in ('replace', 'insert'):
+            if tag in {'replace', 'insert'}:
                 for line in b[j1:j2]:
                     yield '+' + line
 
@@ -1265,8 +1269,9 @@ def context_diff(a, b, fromfile='', tofile='',
 
     Example:
 
-    >>> print ''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1),
+    >>> print(''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1),
     ...       'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current')),
+    ...       end="")
     *** Original
     --- Current
     ***************
@@ -1298,7 +1303,7 @@ def context_diff(a, b, fromfile='', tofile='',
         file1_range = _format_range_context(first[1], last[2])
         yield '*** {} ****{}'.format(file1_range, lineterm)
 
-        if any(tag in ('replace', 'delete') for tag, _, _, _, _ in group):
+        if any(tag in {'replace', 'delete'} for tag, _, _, _, _ in group):
             for tag, i1, i2, _, _ in group:
                 if tag != 'insert':
                     for line in a[i1:i2]:
@@ -1307,7 +1312,7 @@ def context_diff(a, b, fromfile='', tofile='',
         file2_range = _format_range_context(first[3], last[4])
         yield '--- {} ----{}'.format(file2_range, lineterm)
 
-        if any(tag in ('replace', 'insert') for tag, _, _, _, _ in group):
+        if any(tag in {'replace', 'insert'} for tag, _, _, _, _ in group):
             for tag, _, _, j1, j2 in group:
                 if tag != 'delete':
                     for line in b[j1:j2]:
@@ -1336,7 +1341,7 @@ def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
 
     >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
     ...              'ore\ntree\nemu\n'.splitlines(1))
-    >>> print ''.join(diff),
+    >>> print(''.join(diff), end="")
     - one
     ?  ^
     + ore
@@ -1469,7 +1474,7 @@ def _mdiff(fromlines, tolines, context=None, linejunk=None,
             # so we can do some very readable comparisons.
             while len(lines) < 4:
                 try:
-                    lines.append(diff_lines_iterator.next())
+                    lines.append(next(diff_lines_iterator))
                 except StopIteration:
                     lines.append('X')
             s = ''.join([line[0] for line in lines])
@@ -1556,7 +1561,7 @@ def _mdiff(fromlines, tolines, context=None, linejunk=None,
         while True:
             # Collecting lines of text until we have a from/to pair
             while (len(fromlines)==0 or len(tolines)==0):
-                from_line, to_line, found_diff =line_iterator.next()
+                from_line, to_line, found_diff = next(line_iterator)
                 if from_line is not None:
                     fromlines.append((from_line,found_diff))
                 if to_line is not None:
@@ -1571,7 +1576,7 @@ def _mdiff(fromlines, tolines, context=None, linejunk=None,
     line_pair_iterator = _line_pair_iterator()
     if context is None:
         while True:
-            yield line_pair_iterator.next()
+            yield next(line_pair_iterator)
     # Handle case where user wants context differencing.  We must do some
     # storage of lines until we know for sure that they are to be yielded.
     else:
@@ -1584,7 +1589,7 @@ def _mdiff(fromlines, tolines, context=None, linejunk=None,
             index, contextLines = 0, [None]*(context)
             found_diff = False
             while(found_diff is False):
-                from_line, to_line, found_diff = line_pair_iterator.next()
+                from_line, to_line, found_diff = next(line_pair_iterator)
                 i = index % context
                 contextLines[i] = (from_line, to_line, found_diff)
                 index += 1
@@ -1604,7 +1609,7 @@ def _mdiff(fromlines, tolines, context=None, linejunk=None,
             # Now yield the context lines after the change
             lines_to_write = context-1
             while(lines_to_write):
-                from_line, to_line, found_diff = line_pair_iterator.next()
+                from_line, to_line, found_diff = next(line_pair_iterator)
                 # If another change within the context, extend the context
                 if found_diff:
                     lines_to_write = context-1
@@ -2032,11 +2037,11 @@ def restore(delta, which):
     >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
     ...              'ore\ntree\nemu\n'.splitlines(1))
     >>> diff = list(diff)
-    >>> print ''.join(restore(diff, 1)),
+    >>> print(''.join(restore(diff, 1)), end="")
     one
     two
     three
-    >>> print ''.join(restore(diff, 2)),
+    >>> print(''.join(restore(diff, 2)), end="")
     ore
     tree
     emu
@@ -2044,7 +2049,7 @@ def restore(delta, which):
     try:
         tag = {1: "- ", 2: "+ "}[int(which)]
     except KeyError:
-        raise ValueError, ('unknown delta choice (must be 1 or 2): %r'
+        raise ValueError('unknown delta choice (must be 1 or 2): %r'
                            % which)
     prefixes = ("  ", tag)
     for line in delta: