From 372007aa3466dd3d4dc2f432364ba8f220d8da32 Mon Sep 17 00:00:00 2001
From: Rhenan Bartels <rhenan.bartels@gmail.com>
Date: Wed, 16 May 2018 14:55:52 -0300
Subject: [PATCH 1/4] Replace null bytes with empty spaces

---
 rows/plugins/plugin_csv.py         |  15 ++++++++++++++-
 tests/data/csv_with_null_bytes.csv | Bin 0 -> 3552 bytes
 tests/tests_plugin_csv.py          |   7 +++++++
 3 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 tests/data/csv_with_null_bytes.csv
diff --git a/rows/plugins/plugin_csv.py b/rows/plugins/plugin_csv.py
index bcd1904a..cd4937f4 100644
--- a/rows/plugins/plugin_csv.py
+++ b/rows/plugins/plugin_csv.py
@@ -17,7 +17,7 @@
 
 from __future__ import unicode_literals
 
-from io import BytesIO
+from io import BytesIO, BufferedReader
 
 import six
 import unicodecsv
@@ -27,6 +27,18 @@
 
 sniffer = unicodecsv.Sniffer()
 
+
+class NotNullBytesWrapper(BufferedReader):
+
+    def read(self, *args, **kwargs):
+        data = super().read(*args, **kwargs)
+        return data.replace(b'\x00', b'')
+
+    def readline(self, *args, **kwargs):
+        data = super().readline(*args, **kwargs)
+        return data.replace(b'\x00', b'')
+
+
 if six.PY2:
 
     def discover_dialect(sample, encoding=None,
@@ -104,6 +116,7 @@ def import_from_csv(filename_or_fobj, encoding='utf-8', dialect=None,
     """
 
     filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='rb')
+    fobj = NotNullBytesWrapper(fobj)
 
     if dialect is None:
         dialect = discover_dialect(sample=read_sample(fobj, sample_size),
diff --git a/tests/data/csv_with_null_bytes.csv b/tests/data/csv_with_null_bytes.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a43b69524800eec9a08beb7ecc8f70aab5c44819
GIT binary patch
literal 3552
zcmeH}L5`a+6ozN(IfV~EH5lTMEIr0ak&=KQi7HiHxPyz30**|Wi}nav^Z?y;RhtkX
z8Ff+3EXm4#@0r*iKffRSo#vT@KUH>?FsYA1<x+u-EW~|`4l26>E<qdLm3hij0hgjW
zNyRS}_rbPysp_MIzNn6(YAbI;Z5=y^LX@Yw(l^nzdQq3Z&hqld?YPY(_4glxWIt{5
zk&>C>MZ1-5)@Dif)Mm8G%dF;^C?GW_K3@SW_^VXnB~Pzg7|j0^Lg`b;m%2F*FpxCw
zh-dr~@cD@krh9&n>d~?F4@5Cb0-tSYK%h@x%%a`%Dxd^7HnP#b(+KI+2qcWgY)fLZ
znnB{PgH4cZV?s&z6ve;=VSQ4(y1tDHQmK@NE-K*KV9ifj5&Qu4o29a>>Oz7EAZd}8
zd8OAGG{|voBU%HBAApB#K#nq(u&+PmuTpPZAFGe)XTX%hRx9@j0qcvhz|2Ea*PIEv
zfJBTkn6P*s&@h2Z7Ly=>tE~p~waI-mcu@E})AhCo5J2e3Mo%UP96WR978>~)@E^f<
zcFWhD%(3N+cyICIx#J<v=-0EsVioGo+I-JswGU&W2aR}H6g9&27e=^~i2>_%!gVpS
zyvYdm{b2DeScEJ*3brxg-Vgpuu#MBYxN+KEuy{H}mOVP{2as(%wkL~b*uo>&JFru`
zx$m`gA{QjlE{2~V4c9EDK?s9&6vkWA_(1lh8yef~dj-d7oWboZ_hEF~TesCW7)F}1
zjocZC!c_p-X2~|6n-25RfZat0t=L4jl=?=Sk9db2^sotg7hdelHD0IhbRQo70+l66
Ar~m)}

literal 0
HcmV?d00001

diff --git a/tests/tests_plugin_csv.py b/tests/tests_plugin_csv.py
index 4c3cdc49..e501a721 100644
--- a/tests/tests_plugin_csv.py
+++ b/tests/tests_plugin_csv.py
@@ -325,3 +325,10 @@ def test_export_callback(self):
             [x[0][0] for x in myfunc.call_args_list],
             [3, 6, 9, 10]
         )
+
+    def test_issue_273(self):
+        filename = 'tests/data/csv_with_null_bytes.csv'
+        # Should not raise Error: line contains NULL byte
+        table = rows.import_from_csv(filename, encoding='latin-1')
+
+        self.assertEqual(len(table), 9)

From 7b1a93412cd945f53495ad98d693ae273ef31cab Mon Sep 17 00:00:00 2001
From: Rhenan Bartels <rhenan.bartels@gmail.com>
Date: Fri, 25 May 2018 14:31:41 -0300
Subject: [PATCH 2/4] Replace null bytes in csv file for Python 2.x

---
 rows/plugins/plugin_csv.py | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/rows/plugins/plugin_csv.py b/rows/plugins/plugin_csv.py
index cd4937f4..f37412cf 100644
--- a/rows/plugins/plugin_csv.py
+++ b/rows/plugins/plugin_csv.py
@@ -18,6 +18,7 @@
 from __future__ import unicode_literals
 
 from io import BytesIO, BufferedReader
+from io import open as io_open
 
 import six
 import unicodecsv
@@ -28,18 +29,17 @@
 sniffer = unicodecsv.Sniffer()
 
 
-class NotNullBytesWrapper(BufferedReader):
-
-    def read(self, *args, **kwargs):
-        data = super().read(*args, **kwargs)
-        return data.replace(b'\x00', b'')
+if six.PY2:
 
-    def readline(self, *args, **kwargs):
-        data = super().readline(*args, **kwargs)
-        return data.replace(b'\x00', b'')
+    class NotNullBytesWrapper(BufferedReader):
 
+        def read(self, *args, **kwargs):
+            data = super(NotNullBytesWrapper, self).read(*args, **kwargs)
+            return data.replace(b'\x00', b'')
 
-if six.PY2:
+        def readline(self, *args, **kwargs):
+            data = super(NotNullBytesWrapper, self).readline(*args, **kwargs)
+            return data.replace(b'\x00', b'')
 
     def discover_dialect(sample, encoding=None,
                          delimiters=(b',', b';', b'\t', b'|')):
@@ -61,6 +61,16 @@ def discover_dialect(sample, encoding=None,
 
 elif six.PY3:
 
+    class NotNullBytesWrapper(BufferedReader):
+
+        def read(self, *args, **kwargs):
+            data = super().read(*args, **kwargs)
+            return data.replace(b'\x00', b'')
+
+        def readline(self, *args, **kwargs):
+            data = super().readline(*args, **kwargs)
+            return data.replace(b'\x00', b'')
+
     def discover_dialect(sample, encoding, delimiters=(',', ';', '\t', '|')):
         """Discover a CSV dialect based on a sample size
 
@@ -116,7 +126,11 @@ def import_from_csv(filename_or_fobj, encoding='utf-8', dialect=None,
     """
 
     filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='rb')
-    fobj = NotNullBytesWrapper(fobj)
+
+    if six.PY2:
+        fobj = NotNullBytesWrapper(io_open(filename, mode='rb'))
+    elif six.PY3:
+        fobj = NotNullBytesWrapper(fobj)
 
     if dialect is None:
         dialect = discover_dialect(sample=read_sample(fobj, sample_size),

From 4f5e836b92ac20475915b8aace377ab276fae47e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?=
 <alvarojusten@gmail.com>
Date: Sat, 26 May 2018 21:21:08 -0300
Subject: [PATCH 3/4] Add first tests for get_filename_and_fobj

Pair programming with @rhenanbartels
---
 rows/plugins/utils.py       | 22 ++++++++++----
 tests/tests_plugin_utils.py | 60 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/rows/plugins/utils.py b/rows/plugins/utils.py
index d5c4181d..599f1c64 100644
--- a/rows/plugins/utils.py
+++ b/rows/plugins/utils.py
@@ -17,6 +17,7 @@
 
 from __future__ import unicode_literals
 
+import io
 from collections import Iterator, OrderedDict
 from itertools import chain, islice
 from unicodedata import normalize
@@ -82,13 +83,24 @@ def ipartition(iterable, partition_size):
             yield data
 
 
-def get_filename_and_fobj(filename_or_fobj, mode='r', dont_open=False):
-    if getattr(filename_or_fobj, 'read', None) is not None:
+def get_filename_and_fobj(filename_or_fobj, mode='r', dont_open=False, **kwargs):
+
+    # TODO: what if fobj is passed, using a different mode from `mode`?
+
+    if getattr(filename_or_fobj, 'read', None) is not None:  # file-like object
+        filename = getattr(filename_or_fobj, 'name', None)
         fobj = filename_or_fobj
-        filename = getattr(fobj, 'name', None)
-    else:
-        fobj = open(filename_or_fobj, mode=mode) if not dont_open else None
+        try:
+            file_number = fobj.fileno()
+        except io.UnsupportedOperation:
+            # Another kind of file object, like `io.BytesIO`
+            fobj = io.BufferedReader(fobj, **kwargs)  # TODO: pass mode
+        else:  # Regular file
+            fobj = io.open(file_number, mode=mode, **kwargs)
+
+    else:  # filename
         filename = filename_or_fobj
+        fobj = io.open(filename_or_fobj, mode=mode, **kwargs) if not dont_open else None
 
     return filename, fobj
 
diff --git a/tests/tests_plugin_utils.py b/tests/tests_plugin_utils.py
index b9798861..576ca19a 100644
--- a/tests/tests_plugin_utils.py
+++ b/tests/tests_plugin_utils.py
@@ -17,6 +17,7 @@
 
 from __future__ import unicode_literals
 
+import io
 import itertools
 import random
 import tempfile
@@ -33,6 +34,8 @@
 from rows import fields
 
 
+get_filename_and_fobj = plugins_utils.get_filename_and_fobj
+
 class GenericUtilsTestCase(unittest.TestCase):
 
     def test_slug(self):
@@ -339,7 +342,62 @@ def test_export_data(self):
         result = plugins_utils.export_data(filename_or_fobj, data)
         self.assertIs(result, data)
 
+
+class FilenameFObjTestCase(unittest.TestCase):
+    # TODO: test other features of this function (example: BytesIO should
+    #       return filename = None)
+
+    def setUp(self):
+        self.filename = 'tests/data/csv_with_null_bytes.csv'
+        self.encoding = 'latin1'
+        self.data = io.open(self.filename, mode='rb').read()
+        self.decoded_data = self.data.decode(self.encoding)
+
+    def test_get_filename_and_fobj_passing_filename(self):
+        mode = 'rb'
+        _, f = get_filename_and_fobj(self.filename, mode=mode)
+        self.assertTrue(hasattr(f, 'readable') and f.readable())
+        self.assertEqual(f.mode, mode)
+        self.assertEqual(f.read(), self.data)
+
+    def test_get_filename_and_fobj_passing_text_fobj(self):
+        if six.PY3:
+            mode = 'r'
+            fobj = open(self.filename, encoding=self.encoding)
+            _, f = get_filename_and_fobj(fobj, mode=mode, encoding=self.encoding)
+            self.assertTrue(hasattr(f, 'readable') and f.readable())
+            self.assertEqual(f.mode, mode)
+            self.assertEqual(f.read(), self.decoded_data)
+
+        mode = 'r'
+        fobj = io.open(self.filename, encoding=self.encoding)
+        _, f = get_filename_and_fobj(fobj, mode=mode, encoding=self.encoding)
+        self.assertTrue(hasattr(f, 'readable') and f.readable())
+        self.assertEqual(f.mode, mode)
+        self.assertEqual(f.read(), self.decoded_data)
+
+    def test_get_filename_and_fobj_passing_bytes_fobj(self):
+        mode = 'rb'
+        fobj = open(self.filename, mode=mode)
+        _, f = get_filename_and_fobj(fobj, mode=mode)
+        self.assertTrue(hasattr(f, 'readable') and f.readable())
+        self.assertEqual(f.mode, mode)
+        self.assertEqual(f.read(), self.data)
+
+        fobj = io.open(self.filename, mode=mode)
+        _, f = get_filename_and_fobj(fobj, mode=mode)
+        self.assertTrue(hasattr(f, 'readable') and f.readable())
+        self.assertEqual(f.mode, mode)
+        self.assertEqual(f.read(), self.data)
+
+    def test_get_filename_and_fobj_passing_BytesIO(self):
+        mode = 'rb'
+        fobj = io.BytesIO(self.data)
+        _, f = get_filename_and_fobj(fobj, mode=mode)
+        self.assertTrue(hasattr(f, 'readable') and f.readable())
+        self.assertEqual(f.mode, mode)
+        self.assertEqual(f.read(), self.data)
+
     # TODO: test make_header
     # TODO: test all features of create_table
     # TODO: test if error is raised if len(row) != len(fields)
-    # TODO: test get_fobj_and_filename (BytesIO should return filename = None)

From b5fa166a78f8c87e7f0fddcd39bdfe11d20763a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?=
 <alvarojusten@gmail.com>
Date: Sat, 26 May 2018 21:21:56 -0300
Subject: [PATCH 4/4] Fix CSV's not-null byte wrapper

---
 rows/plugins/plugin_csv.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/rows/plugins/plugin_csv.py b/rows/plugins/plugin_csv.py
index f37412cf..0a1d4228 100644
--- a/rows/plugins/plugin_csv.py
+++ b/rows/plugins/plugin_csv.py
@@ -17,8 +17,7 @@
 
 from __future__ import unicode_literals
 
-from io import BytesIO, BufferedReader
-from io import open as io_open
+from io import open as io_open, BytesIO, BufferedReader
 
 import six
 import unicodecsv
@@ -163,7 +162,7 @@ def export_to_csv(table, filename_or_fobj=None, encoding='utf-8',
     else:
         fobj = BytesIO()
 
-    # TODO: may use `io.BufferedWriter` instead of `ipartition` so user can
+    # TODO: may use `BufferedWriter` instead of `ipartition` so user can
     # choose the real size (in Bytes) when to flush to the file system, instead
     # number of rows
     writer = unicodecsv.writer(fobj, encoding=encoding, dialect=dialect)