From 297a99293785272eeb23769822a04c4e49de70bf Mon Sep 17 00:00:00 2001 From: elnuno Date: Fri, 31 Mar 2017 00:39:25 -0300 Subject: [PATCH 1/2] If importing from CSV fails with guessed dialect, try again with Excel (the default). --- rows/plugins/plugin_csv.py | 14 +++++++++++++- tests/tests_plugin_csv.py | 22 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/rows/plugins/plugin_csv.py b/rows/plugins/plugin_csv.py index a06b1571..e38dacf6 100644 --- a/rows/plugins/plugin_csv.py +++ b/rows/plugins/plugin_csv.py @@ -58,7 +58,9 @@ def import_from_csv(filename_or_fobj, encoding='utf-8', dialect=None, filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='rb') + guessed = False if dialect is None: + guessed = True cursor = fobj.tell() dialect = discover_dialect(fobj.read(sample_size), encoding) fobj.seek(cursor) @@ -68,7 +70,17 @@ def import_from_csv(filename_or_fobj, encoding='utf-8', dialect=None, meta = {'imported_from': 'csv', 'filename': filename, 'encoding': encoding,} - return create_table(reader, meta=meta, *args, **kwargs) + try: + table = create_table(reader, meta=meta, *args, **kwargs) + except ValueError: + if guessed: + fobj.seek(cursor) + reader = unicodecsv.reader(fobj, encoding=encoding, + dialect=unicodecsv.excel) + table = create_table(reader, meta=meta, *args, **kwargs) + else: + raise + return table def export_to_csv(table, filename_or_fobj=None, encoding='utf-8', diff --git a/tests/tests_plugin_csv.py b/tests/tests_plugin_csv.py index 9430f3be..8f062d39 100644 --- a/tests/tests_plugin_csv.py +++ b/tests/tests_plugin_csv.py @@ -250,3 +250,25 @@ def test_export_to_csv_accepts_dialect(self): result_1 = rows.export_to_csv(utils.table, dialect=csv.excel_tab) result_2 = rows.export_to_csv(utils.table, dialect=csv.excel) self.assertEqual(result_1.replace(b'\t', b','), result_2) + + def test_issue_218_revert_guess(self): + + err = b"""problematic_text,cool_number\n,42\n"Problematic text with\ + commas, ""quotes"" and a cool number: 4,2", 84""" + + csv_input = BytesIO(err) + table = rows.import_from_csv(csv_input, dialect='excel') + as_list = list(table) + csv_input = BytesIO(err) + table = rows.import_from_csv(csv_input) + self.assertEqual(as_list, list(table)) + + def test_issue_218_better_msg(self): + err = b"""problematic_text,cool_number\n,42\n"Problematic text with\ + commas, ""quotes"" and a cool number: 4,2", 84""" + + tricky_table = err * 1000 + bytes(range(256)) + self.assertRaises(csv.Error, rows.import_from_csv, BytesIO(tricky_table)) + + with self.assertRaisesRegex(csv.Error, 'Excel'): + tricked = rows.import_from_csv(BytesIO(tricky_table), encoding='utf-8') From d20c90dddbf7143f19f33cb775f80a4841bd0343 Mon Sep 17 00:00:00 2001 From: elnuno Date: Fri, 31 Mar 2017 01:16:45 -0300 Subject: [PATCH 2/2] Add a very rough diagnostic message on CSV import errors. --- rows/plugins/plugin_csv.py | 34 ++++++++++++++++++++++++++-------- tests/tests_plugin_csv.py | 2 +- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/rows/plugins/plugin_csv.py b/rows/plugins/plugin_csv.py index e38dacf6..0e41cdde 100644 --- a/rows/plugins/plugin_csv.py +++ b/rows/plugins/plugin_csv.py @@ -56,6 +56,15 @@ def import_from_csv(filename_or_fobj, encoding='utf-8', dialect=None, `open(filename, mode='rb')`. ''' + error_msg = '''Error importing from CSV. Details: + Filename: {filename} + File object: {fobj} + Dialect (guessed?): {dialect} ({guessed}) + Encoding: {encoding} + Sample size: {sample_size} bytes + Reason: {exc} + ''' + filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='rb') guessed = False @@ -71,15 +80,24 @@ def import_from_csv(filename_or_fobj, encoding='utf-8', dialect=None, 'filename': filename, 'encoding': encoding,} try: - table = create_table(reader, meta=meta, *args, **kwargs) - except ValueError: - if guessed: - fobj.seek(cursor) - reader = unicodecsv.reader(fobj, encoding=encoding, - dialect=unicodecsv.excel) + try: table = create_table(reader, meta=meta, *args, **kwargs) - else: - raise + except ValueError as e: + if guessed: + dialect = unicodecsv.excel + fobj.seek(cursor) + reader = unicodecsv.reader(fobj, encoding=encoding, + dialect=dialect) + table = create_table(reader, meta=meta, *args, **kwargs) + else: + raise + except Exception as e: + name = 'Excel' if dialect is unicodecsv.excel else dialect._name + error = error_msg.format(filename=filename, fobj=fobj, + dialect=name, guessed=guessed, encoding=encoding, + sample_size=sample_size, exc=e) + raise unicodecsv.Error(error) from e + return table diff --git a/tests/tests_plugin_csv.py b/tests/tests_plugin_csv.py index 8f062d39..4bb7a112 100644 --- a/tests/tests_plugin_csv.py +++ b/tests/tests_plugin_csv.py @@ -270,5 +270,5 @@ def test_issue_218_better_msg(self): tricky_table = err * 1000 + bytes(range(256)) self.assertRaises(csv.Error, rows.import_from_csv, BytesIO(tricky_table)) - with self.assertRaisesRegex(csv.Error, 'Excel'): + with self.assertRaisesRegex(csv.Error, 'Details'): tricked = rows.import_from_csv(BytesIO(tricky_table), encoding='utf-8')