aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Modules/_csv.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_csv.c')
-rw-r--r--Modules/_csv.c301
1 files changed, 150 insertions, 151 deletions
diff --git a/Modules/_csv.c b/Modules/_csv.c
index fd6121fccfc..88d4f9774fa 100644
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -6,10 +6,6 @@ This module provides the low-level underpinnings of a CSV reading/writing
module. Users should not use this module directly, but import the csv.py
module instead.
-**** For people modifying this code, please note that as of this writing
-**** (2003-03-23), it is intended that this code should work with Python
-**** 2.2.
-
*/
#define MODULE_VERSION "1.0"
@@ -17,52 +13,8 @@ module instead.
#include "Python.h"
#include "structmember.h"
-
-/* begin 2.2 compatibility macros */
-#ifndef PyDoc_STRVAR
-/* Define macros for inline documentation. */
-#define PyDoc_VAR(name) static char name[]
-#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
-#ifdef WITH_DOC_STRINGS
-#define PyDoc_STR(str) str
-#else
-#define PyDoc_STR(str) ""
-#endif
-#endif /* ifndef PyDoc_STRVAR */
-
-#ifndef PyMODINIT_FUNC
-# if defined(__cplusplus)
-# define PyMODINIT_FUNC extern "C" void
-# else /* __cplusplus */
-# define PyMODINIT_FUNC void
-# endif /* __cplusplus */
-#endif
-
-#ifndef Py_CLEAR
-#define Py_CLEAR(op) \
- do { \
- if (op) { \
- PyObject *tmp = (PyObject *)(op); \
- (op) = NULL; \
- Py_DECREF(tmp); \
- } \
- } while (0)
-#endif
-#ifndef Py_VISIT
-#define Py_VISIT(op) \
- do { \
- if (op) { \
- int vret = visit((PyObject *)(op), arg); \
- if (vret) \
- return vret; \
- } \
- } while (0)
-#endif
-
-/* end 2.2 compatibility macros */
-
#define IS_BASESTRING(o) \
- PyObject_TypeCheck(o, &PyBaseString_Type)
+ PyUnicode_Check(o)
static PyObject *error_obj; /* CSV exception */
static PyObject *dialects; /* Dialect registry */
@@ -95,9 +47,9 @@ typedef struct {
PyObject_HEAD
int doublequote; /* is " represented by ""? */
- char delimiter; /* field separator */
- char quotechar; /* quote character */
- char escapechar; /* escape character */
+ Py_UNICODE delimiter; /* field separator */
+ Py_UNICODE quotechar; /* quote character */
+ Py_UNICODE escapechar; /* escape character */
int skipinitialspace; /* ignore spaces following delimiter? */
PyObject *lineterminator; /* string to write between records */
int quoting; /* style of quoting to write */
@@ -105,7 +57,7 @@ typedef struct {
int strict; /* raise exception on bad CSV */
} DialectObj;
-staticforward PyTypeObject Dialect_Type;
+static PyTypeObject Dialect_Type;
typedef struct {
PyObject_HEAD
@@ -116,14 +68,14 @@ typedef struct {
PyObject *fields; /* field list for current record */
ParserState state; /* current CSV parse state */
- char *field; /* build current field in here */
- int field_size; /* size of allocated buffer */
- int field_len; /* length of current field */
+ Py_UNICODE *field; /* build current field in here */
+ Py_ssize_t field_size; /* size of allocated buffer */
+ Py_ssize_t field_len; /* length of current field */
int numeric_field; /* treat field as numeric */
unsigned long line_num; /* Source-file line number */
} ReaderObj;
-staticforward PyTypeObject Reader_Type;
+static PyTypeObject Reader_Type;
#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
@@ -134,13 +86,13 @@ typedef struct {
DialectObj *dialect; /* parsing dialect */
- char *rec; /* buffer for parser.join */
- int rec_size; /* size of allocated record */
- int rec_len; /* length of record */
+ Py_UNICODE *rec; /* buffer for parser.join */
+ Py_ssize_t rec_size; /* size of allocated record */
+ Py_ssize_t rec_len; /* length of record */
int num_fields; /* number of fields in record */
} WriterObj;
-staticforward PyTypeObject Writer_Type;
+static PyTypeObject Writer_Type;
/*
* DIALECT class
@@ -169,14 +121,14 @@ get_string(PyObject *str)
}
static PyObject *
-get_nullchar_as_None(char c)
+get_nullchar_as_None(Py_UNICODE c)
{
if (c == '\0') {
Py_INCREF(Py_None);
return Py_None;
}
else
- return PyString_FromStringAndSize((char*)&c, 1);
+ return PyUnicode_FromUnicode((Py_UNICODE *)&c, 1);
}
static PyObject *
@@ -186,6 +138,12 @@ Dialect_get_lineterminator(DialectObj *self)
}
static PyObject *
+Dialect_get_delimiter(DialectObj *self)
+{
+ return get_nullchar_as_None(self->delimiter);
+}
+
+static PyObject *
Dialect_get_escapechar(DialectObj *self)
{
return get_nullchar_as_None(self->escapechar);
@@ -200,7 +158,7 @@ Dialect_get_quotechar(DialectObj *self)
static PyObject *
Dialect_get_quoting(DialectObj *self)
{
- return PyInt_FromLong(self->quoting);
+ return PyLong_FromLong(self->quoting);
}
static int
@@ -223,35 +181,47 @@ _set_int(const char *name, int *target, PyObject *src, int dflt)
if (src == NULL)
*target = dflt;
else {
- if (!PyInt_Check(src)) {
+ long value;
+ if (!PyLong_CheckExact(src)) {
PyErr_Format(PyExc_TypeError,
"\"%s\" must be an integer", name);
return -1;
}
- *target = PyInt_AsLong(src);
+ value = PyLong_AsLong(src);
+ if (value == -1 && PyErr_Occurred())
+ return -1;
+#if SIZEOF_LONG > SIZEOF_INT
+ if (value > INT_MAX || value < INT_MIN) {
+ PyErr_Format(PyExc_ValueError,
+ "integer out of range for \"%s\"", name);
+ return -1;
+ }
+#endif
+ *target = (int)value;
}
return 0;
}
static int
-_set_char(const char *name, char *target, PyObject *src, char dflt)
+_set_char(const char *name, Py_UNICODE *target, PyObject *src, Py_UNICODE dflt)
{
if (src == NULL)
*target = dflt;
else {
- if (src == Py_None || PyString_Size(src) == 0)
- *target = '\0';
- else if (!PyString_Check(src) || PyString_Size(src) != 1) {
- PyErr_Format(PyExc_TypeError,
- "\"%s\" must be an 1-character string",
- name);
- return -1;
- }
- else {
- char *s = PyString_AsString(src);
- if (s == NULL)
+ *target = '\0';
+ if (src != Py_None) {
+ Py_UNICODE *buf;
+ Py_ssize_t len;
+ buf = PyUnicode_AsUnicode(src);
+ len = PyUnicode_GetSize(src);
+ if (buf == NULL || len > 1) {
+ PyErr_Format(PyExc_TypeError,
+ "\"%s\" must be an 1-character string",
+ name);
return -1;
- *target = s[0];
+ }
+ if (len > 0)
+ *target = buf[0];
}
}
return 0;
@@ -261,13 +231,13 @@ static int
_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
{
if (src == NULL)
- *target = PyString_FromString(dflt);
+ *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
else {
if (src == Py_None)
*target = NULL;
else if (!IS_BASESTRING(src)) {
PyErr_Format(PyExc_TypeError,
- "\"%s\" must be an string", name);
+ "\"%s\" must be a string", name);
return -1;
}
else {
@@ -282,7 +252,7 @@ _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
static int
dialect_check_quoting(int quoting)
{
- StyleDesc *qs = quote_styles;
+ StyleDesc *qs;
for (qs = quote_styles; qs->name; qs++) {
if (qs->style == quoting)
@@ -295,7 +265,6 @@ dialect_check_quoting(int quoting)
#define D_OFF(x) offsetof(DialectObj, x)
static struct PyMemberDef Dialect_memberlist[] = {
- { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
{ "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
{ "doublequote", T_INT, D_OFF(doublequote), READONLY },
{ "strict", T_INT, D_OFF(strict), READONLY },
@@ -303,6 +272,7 @@ static struct PyMemberDef Dialect_memberlist[] = {
};
static PyGetSetDef Dialect_getsetlist[] = {
+ { "delimiter", (getter)Dialect_get_delimiter},
{ "escapechar", (getter)Dialect_get_escapechar},
{ "lineterminator", (getter)Dialect_get_lineterminator},
{ "quotechar", (getter)Dialect_get_quotechar},
@@ -473,7 +443,7 @@ static PyTypeObject Dialect_Type = {
(printfunc)0, /* tp_print */
(getattrfunc)0, /* tp_getattr */
(setattrfunc)0, /* tp_setattr */
- (cmpfunc)0, /* tp_compare */
+ 0, /* tp_reserved */
(reprfunc)0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
@@ -532,7 +502,7 @@ parse_save_field(ReaderObj *self)
{
PyObject *field;
- field = PyString_FromStringAndSize(self->field, self->field_len);
+ field = PyUnicode_FromUnicode(self->field, self->field_len);
if (field == NULL)
return -1;
self->field_len = 0;
@@ -560,15 +530,16 @@ parse_grow_buff(ReaderObj *self)
self->field_size = 4096;
if (self->field != NULL)
PyMem_Free(self->field);
- self->field = PyMem_Malloc(self->field_size);
+ self->field = PyMem_New(Py_UNICODE, self->field_size);
}
else {
- if (self->field_size > INT_MAX / 2) {
+ if (self->field_size > PY_SSIZE_T_MAX / 2) {
PyErr_NoMemory();
return 0;
}
self->field_size *= 2;
- self->field = PyMem_Realloc(self->field, self->field_size);
+ self->field = PyMem_Resize(self->field, Py_UNICODE,
+ self->field_size);
}
if (self->field == NULL) {
PyErr_NoMemory();
@@ -578,7 +549,7 @@ parse_grow_buff(ReaderObj *self)
}
static int
-parse_add_char(ReaderObj *self, char c)
+parse_add_char(ReaderObj *self, Py_UNICODE c)
{
if (self->field_len >= field_limit) {
PyErr_Format(error_obj, "field larger than field limit (%ld)",
@@ -592,7 +563,7 @@ parse_add_char(ReaderObj *self, char c)
}
static int
-parse_process_char(ReaderObj *self, char c)
+parse_process_char(ReaderObj *self, Py_UNICODE c)
{
DialectObj *dialect = self->dialect;
@@ -779,8 +750,8 @@ Reader_iternext(ReaderObj *self)
{
PyObject *lineobj;
PyObject *fields = NULL;
- char *line, c;
- int linelen;
+ Py_UNICODE *line, c;
+ Py_ssize_t linelen;
if (parse_reset(self) < 0)
return NULL;
@@ -792,16 +763,24 @@ Reader_iternext(ReaderObj *self)
self->state == IN_QUOTED_FIELD)) {
if (self->dialect->strict)
PyErr_SetString(error_obj, "unexpected end of data");
- else if (parse_save_field(self) >= 0 )
+ else if (parse_save_field(self) >= 0)
break;
}
return NULL;
}
+ if (!PyUnicode_Check(lineobj)) {
+ PyErr_Format(error_obj,
+ "iterator should return strings, "
+ "not %.200s "
+ "(did you open the file in text mode?)",
+ lineobj->ob_type->tp_name
+ );
+ Py_DECREF(lineobj);
+ return NULL;
+ }
++self->line_num;
-
- line = PyString_AsString(lineobj);
- linelen = PyString_Size(lineobj);
-
+ line = PyUnicode_AsUnicode(lineobj);
+ linelen = PyUnicode_GetSize(lineobj);
if (line == NULL || linelen < 0) {
Py_DECREF(lineobj);
return NULL;
@@ -873,8 +852,8 @@ static struct PyMethodDef Reader_methods[] = {
#define R_OFF(x) offsetof(ReaderObj, x)
static struct PyMemberDef Reader_memberlist[] = {
- { "dialect", T_OBJECT, R_OFF(dialect), RO },
- { "line_num", T_ULONG, R_OFF(line_num), RO },
+ { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
+ { "line_num", T_ULONG, R_OFF(line_num), READONLY },
{ NULL }
};
@@ -889,7 +868,7 @@ static PyTypeObject Reader_Type = {
(printfunc)0, /*tp_print*/
(getattrfunc)0, /*tp_getattr*/
(setattrfunc)0, /*tp_setattr*/
- (cmpfunc)0, /*tp_compare*/
+ 0, /*tp_reserved*/
(reprfunc)0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
@@ -973,13 +952,14 @@ join_reset(WriterObj *self)
/* Calculate new record length or append field to record. Return new
* record length.
*/
-static int
-join_append_data(WriterObj *self, char *field, int quote_empty,
+static Py_ssize_t
+join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty,
int *quoted, int copy_phase)
{
DialectObj *dialect = self->dialect;
- int i, rec_len;
- char *lineterm;
+ int i;
+ Py_ssize_t rec_len;
+ Py_UNICODE *lineterm;
#define ADDCH(c) \
do {\
@@ -988,7 +968,7 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
rec_len++;\
} while(0)
- lineterm = PyString_AsString(dialect->lineterminator);
+ lineterm = PyUnicode_AsUnicode(dialect->lineterminator);
if (lineterm == NULL)
return -1;
@@ -1003,8 +983,9 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
ADDCH(dialect->quotechar);
/* Copy/count field data */
- for (i = 0;; i++) {
- char c = field[i];
+ /* If field is null just pass over */
+ for (i = 0; field; i++) {
+ Py_UNICODE c = field[i];
int want_escape = 0;
if (c == '\0')
@@ -1012,8 +993,8 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
if (c == dialect->delimiter ||
c == dialect->escapechar ||
- c == dialect->quotechar ||
- strchr(lineterm, c)) {
+ c == dialect->quotechar ||
+ Py_UNICODE_strchr(lineterm, c)) {
if (dialect->quoting == QUOTE_NONE)
want_escape = 1;
else {
@@ -1045,7 +1026,7 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
if (i == 0 && quote_empty) {
if (dialect->quoting == QUOTE_NONE) {
PyErr_Format(error_obj,
- "single empty field record must be quoted");
+ "single empty field record must be quoted");
return -1;
}
else
@@ -1063,10 +1044,10 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
}
static int
-join_check_rec_size(WriterObj *self, int rec_len)
+join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
{
- if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
+ if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
PyErr_NoMemory();
return 0;
}
@@ -1076,13 +1057,14 @@ join_check_rec_size(WriterObj *self, int rec_len)
self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
if (self->rec != NULL)
PyMem_Free(self->rec);
- self->rec = PyMem_Malloc(self->rec_size);
+ self->rec = PyMem_New(Py_UNICODE, self->rec_size);
}
else {
- char *old_rec = self->rec;
+ Py_UNICODE* old_rec = self->rec;
self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
- self->rec = PyMem_Realloc(self->rec, self->rec_size);
+ self->rec = PyMem_Resize(self->rec, Py_UNICODE,
+ self->rec_size);
if (self->rec == NULL)
PyMem_Free(old_rec);
}
@@ -1095,9 +1077,9 @@ join_check_rec_size(WriterObj *self, int rec_len)
}
static int
-join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
+join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty)
{
- int rec_len;
+ Py_ssize_t rec_len;
rec_len = join_append_data(self, field, quote_empty, quoted, 0);
if (rec_len < 0)
@@ -1116,10 +1098,10 @@ join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
static int
join_append_lineterminator(WriterObj *self)
{
- int terminator_len;
- char *terminator;
+ Py_ssize_t terminator_len;
+ Py_UNICODE *terminator;
- terminator_len = PyString_Size(self->dialect->lineterminator);
+ terminator_len = PyUnicode_GetSize(self->dialect->lineterminator);
if (terminator_len == -1)
return 0;
@@ -1127,10 +1109,11 @@ join_append_lineterminator(WriterObj *self)
if (!join_check_rec_size(self, self->rec_len + terminator_len))
return 0;
- terminator = PyString_AsString(self->dialect->lineterminator);
+ terminator = PyUnicode_AsUnicode(self->dialect->lineterminator);
if (terminator == NULL)
return 0;
- memmove(self->rec + self->rec_len, terminator, terminator_len);
+ memmove(self->rec + self->rec_len, terminator,
+ sizeof(Py_UNICODE)*terminator_len);
self->rec_len += terminator_len;
return 1;
@@ -1146,7 +1129,7 @@ static PyObject *
csv_writerow(WriterObj *self, PyObject *seq)
{
DialectObj *dialect = self->dialect;
- int len, i;
+ Py_ssize_t len, i;
if (!PySequence_Check(seq))
return PyErr_Format(error_obj, "sequence expected");
@@ -1179,29 +1162,26 @@ csv_writerow(WriterObj *self, PyObject *seq)
break;
}
- if (PyString_Check(field)) {
+ if (PyUnicode_Check(field)) {
append_ok = join_append(self,
- PyString_AS_STRING(field),
+ PyUnicode_AS_UNICODE(field),
&quoted, len == 1);
Py_DECREF(field);
}
else if (field == Py_None) {
- append_ok = join_append(self, "", &quoted, len == 1);
+ append_ok = join_append(self, NULL,
+ &quoted, len == 1);
Py_DECREF(field);
}
else {
PyObject *str;
- if (PyFloat_Check(field)) {
- str = PyObject_Repr(field);
- } else {
- str = PyObject_Str(field);
- }
+ str = PyObject_Str(field);
Py_DECREF(field);
if (str == NULL)
return NULL;
-
- append_ok = join_append(self, PyString_AS_STRING(str),
+ append_ok = join_append(self,
+ PyUnicode_AS_UNICODE(str),
&quoted, len == 1);
Py_DECREF(str);
}
@@ -1215,7 +1195,8 @@ csv_writerow(WriterObj *self, PyObject *seq)
return 0;
return PyObject_CallFunction(self->writeline,
- "(s#)", self->rec, self->rec_len);
+ "(u#)", self->rec,
+ self->rec_len);
}
PyDoc_STRVAR(csv_writerows_doc,
@@ -1261,7 +1242,7 @@ static struct PyMethodDef Writer_methods[] = {
#define W_OFF(x) offsetof(WriterObj, x)
static struct PyMemberDef Writer_memberlist[] = {
- { "dialect", T_OBJECT, W_OFF(dialect), RO },
+ { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
{ NULL }
};
@@ -1309,7 +1290,7 @@ static PyTypeObject Writer_Type = {
(printfunc)0, /*tp_print*/
(getattrfunc)0, /*tp_getattr*/
(setattrfunc)0, /*tp_setattr*/
- (cmpfunc)0, /*tp_compare*/
+ 0, /*tp_reserved*/
(reprfunc)0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
@@ -1429,14 +1410,18 @@ csv_field_size_limit(PyObject *module, PyObject *args)
if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
return NULL;
if (new_limit != NULL) {
- if (!PyInt_Check(new_limit)) {
+ if (!PyLong_CheckExact(new_limit)) {
PyErr_Format(PyExc_TypeError,
"limit must be an integer");
return NULL;
}
- field_limit = PyInt_AsLong(new_limit);
+ field_limit = PyLong_AsLong(new_limit);
+ if (field_limit == -1 && PyErr_Occurred()) {
+ field_limit = old_limit;
+ return NULL;
+ }
}
- return PyInt_FromLong(old_limit);
+ return PyLong_FromLong(old_limit);
}
/*
@@ -1571,53 +1556,67 @@ static struct PyMethodDef csv_methods[] = {
{ NULL, NULL }
};
+
+static struct PyModuleDef _csvmodule = {
+ PyModuleDef_HEAD_INIT,
+ "_csv",
+ csv_module_doc,
+ -1,
+ csv_methods,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+};
+
PyMODINIT_FUNC
-init_csv(void)
+PyInit__csv(void)
{
PyObject *module;
StyleDesc *style;
if (PyType_Ready(&Dialect_Type) < 0)
- return;
+ return NULL;
if (PyType_Ready(&Reader_Type) < 0)
- return;
+ return NULL;
if (PyType_Ready(&Writer_Type) < 0)
- return;
+ return NULL;
/* Create the module and add the functions */
- module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
+ module = PyModule_Create(&_csvmodule);
if (module == NULL)
- return;
+ return NULL;
/* Add version to the module. */
if (PyModule_AddStringConstant(module, "__version__",
MODULE_VERSION) == -1)
- return;
+ return NULL;
/* Add _dialects dictionary */
dialects = PyDict_New();
if (dialects == NULL)
- return;
+ return NULL;
if (PyModule_AddObject(module, "_dialects", dialects))
- return;
+ return NULL;
/* Add quote styles into dictionary */
for (style = quote_styles; style->name; style++) {
if (PyModule_AddIntConstant(module, style->name,
style->style) == -1)
- return;
+ return NULL;
}
/* Add the Dialect type */
Py_INCREF(&Dialect_Type);
if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
- return;
+ return NULL;
/* Add the CSV exception object to the module. */
error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
if (error_obj == NULL)
- return;
+ return NULL;
PyModule_AddObject(module, "Error", error_obj);
+ return module;
}