aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Lib/csv.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/csv.py')
-rw-r--r--Lib/csv.py67
1 files changed, 32 insertions, 35 deletions
diff --git a/Lib/csv.py b/Lib/csv.py
index 984ed7e581b..8dfc77e3108 100644
--- a/Lib/csv.py
+++ b/Lib/csv.py
@@ -4,7 +4,6 @@ csv.py - read/write/investigate CSV files
"""
import re
-from functools import reduce
from _csv import Error, __version__, writer, reader, register_dialect, \
unregister_dialect, get_dialect, list_dialects, \
field_size_limit, \
@@ -12,10 +11,7 @@ from _csv import Error, __version__, writer, reader, register_dialect, \
__doc__
from _csv import Dialect as _Dialect
-try:
- from cStringIO import StringIO
-except ImportError:
- from StringIO import StringIO
+from io import StringIO
__all__ = [ "QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE",
"Error", "Dialect", "__doc__", "excel", "excel_tab",
@@ -24,7 +20,7 @@ __all__ = [ "QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE",
"unregister_dialect", "__version__", "DictReader", "DictWriter" ]
class Dialect:
- """Describe an Excel dialect.
+ """Describe a CSV dialect.
This must be subclassed (see csv.excel). Valid attributes are:
delimiter, quotechar, escapechar, doublequote, skipinitialspace,
@@ -50,7 +46,7 @@ class Dialect:
def _validate(self):
try:
_Dialect(self)
- except TypeError, e:
+ except TypeError as e:
# We do this for compatibility with py2.3
raise Error(str(e))
@@ -69,6 +65,16 @@ class excel_tab(excel):
delimiter = '\t'
register_dialect("excel-tab", excel_tab)
+class unix_dialect(Dialect):
+ """Describe the usual properties of Unix-generated CSV files."""
+ delimiter = ','
+ quotechar = '"'
+ doublequote = True
+ skipinitialspace = False
+ lineterminator = '\n'
+ quoting = QUOTE_ALL
+register_dialect("unix", unix_dialect)
+
class DictReader:
def __init__(self, f, fieldnames=None, restkey=None, restval=None,
@@ -87,7 +93,7 @@ class DictReader:
def fieldnames(self):
if self._fieldnames is None:
try:
- self._fieldnames = self.reader.next()
+ self._fieldnames = next(self.reader)
except StopIteration:
pass
self.line_num = self.reader.line_num
@@ -97,18 +103,18 @@ class DictReader:
def fieldnames(self, value):
self._fieldnames = value
- def next(self):
+ def __next__(self):
if self.line_num == 0:
# Used only for its side effect.
self.fieldnames
- row = self.reader.next()
+ row = next(self.reader)
self.line_num = self.reader.line_num
# unlike the basic reader, we prefer not to return blanks,
# because we will typically wind up with a dict full of None
# values
while row == []:
- row = self.reader.next()
+ row = next(self.reader)
d = dict(zip(self.fieldnames, row))
lf = len(self.fieldnames)
lr = len(row)
@@ -126,9 +132,8 @@ class DictWriter:
self.fieldnames = fieldnames # list of keys for the dict
self.restval = restval # for writing short dicts
if extrasaction.lower() not in ("raise", "ignore"):
- raise ValueError, \
- ("extrasaction (%s) must be 'raise' or 'ignore'" %
- extrasaction)
+ raise ValueError("extrasaction (%s) must be 'raise' or 'ignore'"
+ % extrasaction)
self.extrasaction = extrasaction
self.writer = writer(f, dialect, *args, **kwds)
@@ -140,8 +145,8 @@ class DictWriter:
if self.extrasaction == "raise":
wrong_fields = [k for k in rowdict if k not in self.fieldnames]
if wrong_fields:
- raise ValueError("dict contains fields not in fieldnames: " +
- ", ".join(wrong_fields))
+ raise ValueError("dict contains fields not in fieldnames: "
+ + ", ".join(wrong_fields))
return [rowdict.get(key, self.restval) for key in self.fieldnames]
def writerow(self, rowdict):
@@ -181,7 +186,7 @@ class Sniffer:
delimiters)
if not delimiter:
- raise Error, "Could not determine delimiter"
+ raise Error("Could not determine delimiter")
class dialect(Dialect):
_name = "sniffed"
@@ -245,12 +250,10 @@ class Sniffer:
if m[n]:
spaces += 1
- quotechar = reduce(lambda a, b, quotes = quotes:
- (quotes[a] > quotes[b]) and a or b, quotes.keys())
+ quotechar = max(quotes, key=quotes.get)
if delims:
- delim = reduce(lambda a, b, delims = delims:
- (delims[a] > delims[b]) and a or b, delims.keys())
+ delim = max(delims, key=delims.get)
skipinitialspace = delims[delim] == spaces
if delim == '\n': # most likely a file with a single column
delim = ''
@@ -293,7 +296,7 @@ class Sniffer:
additional chunks as necessary.
"""
- data = filter(None, data.split('\n'))
+ data = list(filter(None, data.split('\n')))
ascii = [chr(c) for c in range(127)] # 7-bit ASCII
@@ -316,19 +319,17 @@ class Sniffer:
charFrequency[char] = metaFrequency
for char in charFrequency.keys():
- items = charFrequency[char].items()
+ items = list(charFrequency[char].items())
if len(items) == 1 and items[0][0] == 0:
continue
# get the mode of the frequencies
if len(items) > 1:
- modes[char] = reduce(lambda a, b: a[1] > b[1] and a or b,
- items)
+ modes[char] = max(items, key=lambda x: x[1])
# adjust the mode - subtract the sum of all
# other frequencies
items.remove(modes[char])
modes[char] = (modes[char][0], modes[char][1]
- - reduce(lambda a, b: (0, a[1] + b[1]),
- items)[1])
+ - sum(item[1] for item in items))
else:
modes[char] = items[0]
@@ -348,7 +349,7 @@ class Sniffer:
consistency -= 0.01
if len(delims) == 1:
- delim = delims.keys()[0]
+ delim = list(delims.keys())[0]
skipinitialspace = (data[0].count(delim) ==
data[0].count("%c " % delim))
return (delim, skipinitialspace)
@@ -391,7 +392,7 @@ class Sniffer:
rdr = reader(StringIO(sample), self.sniff(sample))
- header = rdr.next() # assume first row is header
+ header = next(rdr) # assume first row is header
columns = len(header)
columnTypes = {}
@@ -407,9 +408,9 @@ class Sniffer:
if len(row) != columns:
continue # skip rows that have irregular number of columns
- for col in columnTypes.keys():
+ for col in list(columnTypes.keys()):
- for thisType in [int, long, float, complex]:
+ for thisType in [int, float, complex]:
try:
thisType(row[col])
break
@@ -419,10 +420,6 @@ class Sniffer:
# fallback to length of string
thisType = len(row[col])
- # treat longs as ints
- if thisType == long:
- thisType = int
-
if thisType != columnTypes[col]:
if columnTypes[col] is None: # add new column type
columnTypes[col] = thisType