From 0663bc2ca2900bc2d0d5a714aada9ae828b72769 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?=
 <alvarojusten@gmail.com>
Date: Sun, 24 Sep 2017 18:06:52 -0300
Subject: [PATCH 1/7] Create LazyTable class

---
 rows/__init__.py     |  2 +-
 rows/table.py        | 33 ++++++++++++++++++++++++++++
 tests/tests_table.py | 52 +++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/rows/__init__.py b/rows/__init__.py
index 8f9f9374..e5563502 100644
--- a/rows/__init__.py
+++ b/rows/__init__.py
@@ -22,7 +22,7 @@
 import rows.plugins as plugins
 
 from rows.operations import join, transform, transpose  # NOQA
-from rows.table import Table, FlexibleTable  # NOQA
+from rows.table import FlexibleTable, LazyTable, Table  # NOQA
 from rows.localization import locale_context  # NOQA
 
 
diff --git a/rows/table.py b/rows/table.py
index 156961a1..418b0ca2 100644
--- a/rows/table.py
+++ b/rows/table.py
@@ -28,6 +28,39 @@
     from collections.abc import MutableSequence, Sized
 
 
+class LazyTable(object):
+
+    def __init__(self, fields, data, meta=None):
+        self.fields = OrderedDict(fields)
+
+        self.Row = namedtuple('Row', self.field_names)
+        self.meta = dict(meta) if meta is not None else {}
+        self._rows = data
+
+    @property
+    def field_names(self):
+        return list(self.fields.keys())
+
+    @property
+    def field_types(self):
+        return list(self.fields.values())
+
+    def __repr__(self):
+        imported = ''
+        if 'imported_from' in self.meta:
+            imported = ' (from {})'.format(self.meta['imported_from'])
+
+        return '<rows.LazyTable{}, {} fields>'.format(
+                imported, len(self.fields))
+
+    def __iter__(self):
+        fields = list(self.fields.items())
+        for row in self._rows:
+            yield self.Row(*[field_type.deserialize(value)
+                             for value, (field_name, field_type) in
+                             zip(row, fields)])
+
+
 class Table(MutableSequence):
 
     def __init__(self, fields, meta=None):
diff --git a/tests/tests_table.py b/tests/tests_table.py
index a901ab31..3183601e 100644
--- a/tests/tests_table.py
+++ b/tests/tests_table.py
@@ -26,7 +26,7 @@
 
 import rows
 import rows.fields as fields
-from rows.table import FlexibleTable, Table
+from rows.table import FlexibleTable, LazyTable, Table
 
 binary_type_name = six.binary_type.__name__
 
@@ -322,6 +322,56 @@ def test_table_add_should_not_iterate_over_rows(self):
         self.assertFalse(table2._rows.__iter__.called)
 
 
+class LazyTableTestCase(unittest.TestCase):
+
+    def setUp(self):
+        fields = {'name': rows.fields.TextField,
+                  'birthdate': rows.fields.DateField, }
+        data_rows = [
+            ('Álvaro Justen', datetime.date(1987, 4, 29)),
+            ('Somebody',      datetime.date(1990, 2, 1)),
+            ('Douglas Adams', datetime.date(1952, 3, 11)), ]
+        data = (row for row in data_rows)
+        self.table = LazyTable(fields=fields, data=data)
+
+    def test_LazyTable_is_present_on_main_namespace(self):
+        self.assertIn('LazyTable', dir(rows))
+        self.assertIs(LazyTable, rows.LazyTable)
+
+    def test_table_iteration(self):
+        table_rows = list(self.table)
+
+        self.assertEqual(list(self.table), [])
+        self.assertEqual(len(table_rows), 3)
+
+        self.assertEqual(table_rows[0].name, 'Álvaro Justen')
+        self.assertEqual(table_rows[0].birthdate, datetime.date(1987, 4, 29))
+        self.assertEqual(table_rows[1].name, 'Somebody')
+        self.assertEqual(table_rows[1].birthdate, datetime.date(1990, 2, 1))
+        self.assertEqual(table_rows[2].name, 'Douglas Adams')
+        self.assertEqual(table_rows[2].birthdate, datetime.date(1952, 3, 11))
+
+    def test_table_slicing_error(self):
+        with self.assertRaises(TypeError) as context_manager:
+            self.table[0]
+
+    def test_field_names_and_types(self):
+        self.assertEqual(self.table.field_names,
+                         list(self.table.fields.keys()))
+        self.assertEqual(self.table.field_types,
+                         list(self.table.fields.values()))
+
+    def test_table_repr(self):
+        expected = '<rows.LazyTable, 2 fields>'
+        self.assertEqual(expected, repr(self.table))
+
+    # TODO: may be able to setitem (column)
+    # TODO: may be able to delitem (column)
+    # TODO: may be able to getitem (column)
+    # TODO: may be able to append rows (but not insert)
+    # TODO: should it have __add__?
+
+
 class TestFlexibleTable(unittest.TestCase):
 
     def setUp(self):

From 25ad5e5a36a078edc4d3fd3066046dccb5fc8df8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?=
 <alvarojusten@gmail.com>
Date: Mon, 22 May 2017 11:11:18 -0700
Subject: [PATCH 2/7] Add `lazy` parameter to `create_table`

---
 rows/plugins/utils.py       | 37 +++++++++++++++++++++++++-------
 tests/tests_plugin_utils.py | 42 +++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 7 deletions(-)

diff --git a/rows/plugins/utils.py b/rows/plugins/utils.py
index a2d4f1f1..38111db4 100644
--- a/rows/plugins/utils.py
+++ b/rows/plugins/utils.py
@@ -28,7 +28,7 @@
     from collections.abc import Iterator
 
 from rows.fields import detect_types
-from rows.table import FlexibleTable, Table
+from rows.table import FlexibleTable, Table, LazyTable
 
 SLUG_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_'
 
@@ -134,9 +134,21 @@ def make_header(field_names, permit_not=False):
     return result
 
 
+def get_row_data(full_field_names, field_names):
+
+    field_indexes = [full_field_names.index(field_name)
+                     for field_name in field_names]
+
+    def func(rows_data):
+        for row_data in rows_data:
+            yield [row_data[field_index] for field_index in field_indexes]
+
+    return func
+
+
 def create_table(data, meta=None, fields=None, skip_header=True,
                  import_fields=None, samples=None, force_types=None,
-                 *args, **kwargs):
+                 lazy=False, *args, **kwargs):
     # TODO: add auto_detect_types=True parameter
     table_rows = iter(data)
     sample_rows = []
@@ -181,11 +193,22 @@ def create_table(data, meta=None, fields=None, skip_header=True,
             new_fields[field_name] = fields[field_name]
         fields = new_fields
 
-    table = Table(fields=fields, meta=meta)
-    # TODO: put this inside Table.__init__
-    for row in chain(sample_rows, table_rows):
-        table.append({field_name: value
-                      for field_name, value in zip(header, row)})
+    if not lazy:
+        table = Table(fields=fields, meta=meta)
+        # TODO: put this inside Table.__init__
+        for row in chain(sample_rows, table_rows):
+            table.append({field_name: value
+                          for field_name, value in zip(header, row)})
+
+    else:
+        data = chain(sample_rows, table_rows)
+        field_names = fields.keys()
+
+        if header != field_names:
+            rows_data = get_row_data(header, field_names)
+            data = chain(rows_data(sample_rows), rows_data(table_rows))
+
+        table = LazyTable(fields=fields, data=data, meta=meta)
 
     return table
 
diff --git a/tests/tests_plugin_utils.py b/tests/tests_plugin_utils.py
index b9798861..80f729e2 100644
--- a/tests/tests_plugin_utils.py
+++ b/tests/tests_plugin_utils.py
@@ -29,6 +29,9 @@
 
 import rows
 import rows.plugins.utils as plugins_utils
+from rows import fields
+from rows.table import LazyTable
+
 import tests.utils as utils
 from rows import fields
 
@@ -172,6 +175,45 @@ def test_create_table_force_types(self):
         for field_name, field_type in force_types.items():
             self.assertEqual(table.fields[field_name], field_type)
 
+    def test_create_table_returns_LazyTable(self):
+        header = ['field1', 'field2', 'field3']
+        table_rows = [['1', '3.14', 'Álvaro'],
+                      ['2', '2.71', 'turicas'],
+                      ['3', '1.23', 'Justen']]
+        force_types = {'field2': rows.fields.DecimalField}
+
+        table = plugins_utils.create_table([header] + table_rows,
+                                           force_types=force_types, lazy=True)
+        self.assertTrue(isinstance(table, LazyTable))
+
+    def test_create_table_LazyTable_import_fields(self):
+        max_number = 1000
+        data = utils.LazyGenerator(max_number)
+        import_fields = ['number_double', 'number_sq']
+
+        table = plugins_utils.create_table(data,
+                                           import_fields=import_fields,
+                                           samples=None, lazy=True)
+        self.assertEqual(table.field_names, import_fields)
+
+        expected_data = [{'number_double': number * 2,
+                          'number_sq': number ** 2}
+                         for number in range(max_number)]
+        data = [dict(row._asdict()) for row in table]
+        self.assertEqual(data, expected_data)
+
+    def test_create_table_sample_size(self):
+        max_number = 1000
+        samples = 200
+
+        data = utils.LazyGenerator(max_number)
+        table = plugins_utils.create_table(data, lazy=True, samples=samples)
+        self.assertEqual(data.last, samples - 1)
+
+        data = utils.LazyGenerator(max_number)
+        table = plugins_utils.create_table(data, samples=samples)
+        self.assertEqual(data.last, max_number - 1)
+
     def test_prepare_to_export_all_fields(self):
         result = plugins_utils.prepare_to_export(utils.table,
                                                  export_fields=None)

From 8bc534195693d3d075fa8bb34a8bb3ebfca34a29 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?=
 <alvarojusten@gmail.com>
Date: Tue, 23 May 2017 00:57:15 -0700
Subject: [PATCH 3/7] Add `LazyTable` support to `prepare_to_export`

---
 rows/plugins/utils.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/rows/plugins/utils.py b/rows/plugins/utils.py
index 38111db4..71f1467b 100644
--- a/rows/plugins/utils.py
+++ b/rows/plugins/utils.py
@@ -215,15 +215,15 @@ def create_table(data, meta=None, fields=None, skip_header=True,
 
 def prepare_to_export(table, export_fields=None, *args, **kwargs):
     # TODO: optimize for more used cases (export_fields=None)
+
+    # TODO: may create `BaseTable` and use `isinstance` instead
     table_type = type(table)
-    if table_type not in (FlexibleTable, Table):
+    if table_type not in (FlexibleTable, Table, LazyTable):
         raise ValueError('Table type not recognized')
 
-    if export_fields is None:
-        # we use already slugged-fieldnames
+    if export_fields is None:  # Table has slugged fieldnames already
         export_fields = table.field_names
-    else:
-        # we need to slug all the field names
+    else:  # Need to slug all the field names before exporting
         export_fields = make_header(export_fields)
 
     table_field_names = table.field_names
@@ -234,6 +234,7 @@ def prepare_to_export(table, export_fields=None, *args, **kwargs):
 
     yield export_fields
 
+    # TODO: create a standard API on all `Table` classes
     if table_type is Table:
         field_indexes = list(map(table_field_names.index, export_fields))
         for row in table._rows:
@@ -241,6 +242,9 @@ def prepare_to_export(table, export_fields=None, *args, **kwargs):
     elif table_type is FlexibleTable:
         for row in table._rows:
             yield [row[field_name] for field_name in export_fields]
+    elif table_type is LazyTable:
+        for row in table:
+            yield [getattr(row, field_name) for field_name in export_fields]
 
 
 def serialize(table, *args, **kwargs):

From 788e5968ab34a3c9d7f862e576c88830b41535b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?=
 <alvarojusten@gmail.com>
Date: Tue, 23 May 2017 14:14:36 -0700
Subject: [PATCH 4/7] Add lazyness test to CSV plugin (already lazy)

---
 rows/plugins/plugin_csv.py |  1 -
 tests/tests_plugin_csv.py  | 27 +++++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/rows/plugins/plugin_csv.py b/rows/plugins/plugin_csv.py
index 99c19cd4..fc7d3164 100644
--- a/rows/plugins/plugin_csv.py
+++ b/rows/plugins/plugin_csv.py
@@ -130,7 +130,6 @@ def export_to_csv(table, filename_or_fobj=None, encoding='utf-8',
     contents.
     """
     # TODO: will work only if table.fields is OrderedDict
-    # TODO: should use fobj? What about creating a method like json.dumps?
 
     if filename_or_fobj is not None:
         _, fobj = get_filename_and_fobj(filename_or_fobj, mode='wb')
diff --git a/tests/tests_plugin_csv.py b/tests/tests_plugin_csv.py
index 4c3cdc49..5abadcb2 100644
--- a/tests/tests_plugin_csv.py
+++ b/tests/tests_plugin_csv.py
@@ -325,3 +325,30 @@ def test_export_callback(self):
             [x[0][0] for x in myfunc.call_args_list],
             [3, 6, 9, 10]
         )
+
+    def test_import_from_csv_is_lazy(self):
+        temp = tempfile.NamedTemporaryFile(delete=False)
+        filename = '{}.{}'.format(temp.name, self.file_extension)
+        self.files_to_delete.append(filename)
+        encoding = 'utf-8'
+        number_of_rows = 1000
+
+        fobj = open(filename, mode='wb+')
+        fobj.write('field1,field2\r\n'.encode(encoding))
+        for index in range(number_of_rows):
+            row_data = ','.join([str(index), str(index ** 2)]) + '\r\n'
+            fobj.write(row_data.encode(encoding))
+        fobj.flush()
+        total_bytes = fobj.tell()
+
+        fobj.seek(0)
+        table = rows.import_from_csv(fobj,
+                                     encoding=encoding,
+                                     dialect=csv.excel,
+                                     samples=1,  # pre-read only the first row
+                                     lazy=True)
+        self.assertEqual(fobj.tell(), 20)  # 20 = len(1st line) + len(2nd line)
+
+        data = list(table)
+        self.assertEqual(len(data), number_of_rows)
+        self.assertEqual(fobj.tell(), total_bytes)

From c04e95785d828893c324bdd7aa23170cfbf8b938 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?=
 <alvarojusten@gmail.com>
Date: Tue, 23 May 2017 15:21:37 -0700
Subject: [PATCH 5/7] Modify SQLite plugin to be lazy

---
 rows/plugins/sqlite.py       | 12 +++++++-----
 tests/tests_plugin_sqlite.py | 22 ++++++++++++++++++++--
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/rows/plugins/sqlite.py b/rows/plugins/sqlite.py
index 2d179e83..17bff1b2 100644
--- a/rows/plugins/sqlite.py
+++ b/rows/plugins/sqlite.py
@@ -21,6 +21,8 @@
 import sqlite3
 import string
 
+from itertools import chain
+
 import six
 
 import rows.fields as fields
@@ -29,6 +31,7 @@
                                 prepare_to_export)
 
 SQL_TABLE_NAMES = 'SELECT name FROM sqlite_master WHERE type="table"'
+# TODO: may use query args instead of string formatting
 SQL_CREATE_TABLE = 'CREATE TABLE IF NOT EXISTS "{table_name}" ({field_types})'
 SQL_SELECT_ALL = 'SELECT * FROM "{table_name}"'
 SQL_INSERT = 'INSERT INTO "{table_name}" ({field_names}) VALUES ({placeholders})'
@@ -122,13 +125,12 @@ def import_from_sqlite(filename_or_connection, table_name='table1', query=None,
     if query_args is None:
         query_args = tuple()
 
-    table_rows = list(cursor.execute(query, query_args)) # TODO: may be lazy
-    header = [six.text_type(info[0]) for info in cursor.description]
-    cursor.close()
-    # TODO: should close connection also?
+    cursor.execute(query, query_args)
+    data = chain([[six.text_type(info[0]) for info in cursor.description]],
+                 cursor)
 
     meta = {'imported_from': 'sqlite', 'filename': filename_or_connection, }
-    return create_table([header] + table_rows, meta=meta, *args, **kwargs)
+    return create_table(data, meta=meta, *args, **kwargs)
 
 
 def export_to_sqlite(table, filename_or_connection, table_name=None,
diff --git a/tests/tests_plugin_sqlite.py b/tests/tests_plugin_sqlite.py
index 8ed18f82..75e0cc94 100644
--- a/tests/tests_plugin_sqlite.py
+++ b/tests/tests_plugin_sqlite.py
@@ -101,6 +101,24 @@ def test_export_to_sqlite_filename(self):
         table = rows.import_from_sqlite(temp.name)
         self.assert_table_equal(table, utils.table)
 
+    def test_import_from_sqlite_is_lazy(self): #, mocked_create_table):
+        connection = mock.MagicMock()
+        cursor = connection.cursor()
+        cursor.description = [('f1', None), ('f2', None), ('f3', None)]
+        gen = utils.LazyGenerator(max_number=1000)
+        igen = iter(gen)
+        next(igen)  # get header out -- does not happen on SQLite
+        cursor.__iter__.return_value = igen
+        cursor.fetchall = lambda: list(gen)
+
+        table = rows.import_from_sqlite(connection, lazy=True, samples=50)
+        self.assertIs(gen.last, 49)
+
+        for index, _ in enumerate(table):
+            if index == 99:
+                break
+        self.assertIs(gen.last, 99)
+
     def test_export_to_sqlite_connection(self):
         # TODO: may test file contents
         temp = tempfile.NamedTemporaryFile(delete=False, mode='wb')
@@ -124,9 +142,9 @@ def test_export_to_sqlite_create_unique_table_name(self):
         rows.export_to_sqlite(second_table, temp.name)  # table2
 
         result_first_table = rows.import_from_sqlite(temp.name,
-                                                     table_name='table1')
+                table_name='table1')
         result_second_table = rows.import_from_sqlite(temp.name,
-                                                      table_name='table2')
+                table_name='table2')
         self.assert_table_equal(result_first_table, first_table)
         self.assert_table_equal(result_second_table, second_table)
 

From 4bdfc97a1681e487b45685b71cf42157cc0782e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?=
 <alvarojusten@gmail.com>
Date: Wed, 24 May 2017 18:45:21 -0700
Subject: [PATCH 6/7] Add lazyness metadata to plugins

---
 rows/plugins/dicts.py          | 3 +++
 rows/plugins/ods.py            | 5 +++++
 rows/plugins/plugin_csv.py     | 3 +++
 rows/plugins/plugin_html.py    | 4 ++++
 rows/plugins/plugin_json.py    | 6 ++++++
 rows/plugins/plugin_parquet.py | 6 +++++-
 rows/plugins/sqlite.py         | 3 +++
 rows/plugins/txt.py            | 3 +++
 rows/plugins/utils.py          | 7 +++++++
 rows/plugins/xls.py            | 3 +++
 rows/plugins/xlsx.py           | 4 ++++
 rows/plugins/xpath.py          | 4 ++++
 tests/tests_plugin_csv.py      | 1 +
 tests/tests_plugin_dicts.py    | 1 +
 tests/tests_plugin_html.py     | 3 ++-
 tests/tests_plugin_json.py     | 1 +
 tests/tests_plugin_ods.py      | 1 +
 tests/tests_plugin_parquet.py  | 1 +
 tests/tests_plugin_sqlite.py   | 1 +
 tests/tests_plugin_txt.py      | 1 +
 tests/tests_plugin_xls.py      | 1 +
 tests/tests_plugin_xlsx.py     | 1 +
 tests/tests_plugin_xpath.py    | 4 ++--
 23 files changed, 63 insertions(+), 4 deletions(-)

diff --git a/rows/plugins/dicts.py b/rows/plugins/dicts.py
index 713f07e4..12d1543d 100644
--- a/rows/plugins/dicts.py
+++ b/rows/plugins/dicts.py
@@ -52,6 +52,9 @@ def import_from_dicts(data, samples=None, *args, **kwargs):
     return create_table(chain([headers], data_rows), meta=meta, *args, **kwargs)
 
 
+import_from_dicts.is_lazy = False
+
+
 def export_to_dicts(table, *args, **kwargs):
     """Export a `rows.Table` to a list of dicts"""
     field_names = table.field_names
diff --git a/rows/plugins/ods.py b/rows/plugins/ods.py
index af8e00cc..1bcf4d02 100644
--- a/rows/plugins/ods.py
+++ b/rows/plugins/ods.py
@@ -103,5 +103,10 @@ def import_from_ods(filename_or_fobj, index=0, *args, **kwargs):
 
     max_length = max(len(row) for row in table_rows)
     full_rows = complete_with_None(table_rows, max_length)
+
     meta = {'imported_from': 'ods', 'filename': filename,}
+
     return create_table(full_rows, meta=meta, *args, **kwargs)
+
+
+import_from_ods.is_lazy = False
diff --git a/rows/plugins/plugin_csv.py b/rows/plugins/plugin_csv.py
index fc7d3164..e162e9b0 100644
--- a/rows/plugins/plugin_csv.py
+++ b/rows/plugins/plugin_csv.py
@@ -118,6 +118,9 @@ def import_from_csv(filename_or_fobj, encoding='utf-8', dialect=None,
     return create_table(reader, meta=meta, *args, **kwargs)
 
 
+import_from_csv.is_lazy = True
+
+
 def export_to_csv(table, filename_or_fobj=None, encoding='utf-8',
                   dialect=unicodecsv.excel, batch_size=100, callback=None,
                   *args, **kwargs):
diff --git a/rows/plugins/plugin_html.py b/rows/plugins/plugin_html.py
index 9077c6f5..e4872ed2 100644
--- a/rows/plugins/plugin_html.py
+++ b/rows/plugins/plugin_html.py
@@ -97,6 +97,9 @@ def import_from_html(filename_or_fobj, encoding='utf-8', index=0,
     return create_table(table_rows, meta=meta, *args, **kwargs)
 
 
+import_from_html.is_lazy = False
+
+
 def export_to_html(table, filename_or_fobj=None, encoding='utf-8', *args,
                    **kwargs):
     """Export and return rows.Table data to HTML file."""
@@ -106,6 +109,7 @@ def export_to_html(table, filename_or_fobj=None, encoding='utf-8', *args,
     header = ['      <th> {} </th>\n'.format(field) for field in fields]
     result.extend(header)
     result.extend(['    </tr>\n', '  </thead>\n', '\n', '  <tbody>\n', '\n'])
+    # TODO: could be lazy so we don't need to store the whole table into memory
     for index, row in enumerate(serialized_table, start=1):
         css_class = 'odd' if index % 2 == 1 else 'even'
         result.append('    <tr class="{}">\n'.format(css_class))
diff --git a/rows/plugins/plugin_json.py b/rows/plugins/plugin_json.py
index 22628e03..3b767a72 100644
--- a/rows/plugins/plugin_json.py
+++ b/rows/plugins/plugin_json.py
@@ -35,6 +35,7 @@ def import_from_json(filename_or_fobj, encoding='utf-8', *args, **kwargs):
     filename, fobj = get_filename_and_fobj(filename_or_fobj)
 
     json_obj = json.load(fobj, encoding=encoding)
+    # TODO: may use import_from_dicts here
     field_names = list(json_obj[0].keys())
     table_rows = [[item[key] for key in field_names] for item in json_obj]
 
@@ -44,6 +45,9 @@ def import_from_json(filename_or_fobj, encoding='utf-8', *args, **kwargs):
     return create_table([field_names] + table_rows, meta=meta, *args, **kwargs)
 
 
+import_from_json.is_lazy = False
+
+
 def _convert(value, field_type, *args, **kwargs):
     if value is None or field_type in (
                 fields.BinaryField,
@@ -74,6 +78,8 @@ def export_to_json(table, filename_or_fobj=None, encoding='utf-8', indent=None,
     fields = table.fields
     prepared_table = prepare_to_export(table, *args, **kwargs)
     field_names = next(prepared_table)
+
+    # TODO: could be lazy so we don't need to store the whole table into memory
     data = [{field_name: _convert(value, fields[field_name], *args, **kwargs)
              for field_name, value in zip(field_names, row)}
             for row in prepared_table]
diff --git a/rows/plugins/plugin_parquet.py b/rows/plugins/plugin_parquet.py
index 13cb0155..eda7edc8 100644
--- a/rows/plugins/plugin_parquet.py
+++ b/rows/plugins/plugin_parquet.py
@@ -52,8 +52,12 @@ def import_from_parquet(filename_or_fobj, *args, **kwargs):
                          for schema in parquet._read_footer(fobj).schema
                          if schema.type is not None])
     header = list(types.keys())
-    table_rows = list(parquet.reader(fobj))  # TODO: be lazy
+    # TODO: make it lazy
+    table_rows = list(parquet.reader(fobj))
 
     meta = {'imported_from': 'parquet', 'filename': filename,}
     return create_table([header] + table_rows, meta=meta, force_types=types,
                         *args, **kwargs)
+
+
+import_from_parquet.is_lazy = False
diff --git a/rows/plugins/sqlite.py b/rows/plugins/sqlite.py
index 17bff1b2..e3e87464 100644
--- a/rows/plugins/sqlite.py
+++ b/rows/plugins/sqlite.py
@@ -133,6 +133,9 @@ def import_from_sqlite(filename_or_connection, table_name='table1', query=None,
     return create_table(data, meta=meta, *args, **kwargs)
 
 
+import_from_sqlite.is_lazy = True
+
+
 def export_to_sqlite(table, filename_or_connection, table_name=None,
                      table_name_format='table{index}', batch_size=100,
                      callback=None, *args, **kwargs):
diff --git a/rows/plugins/txt.py b/rows/plugins/txt.py
index cd4d0cd8..648f85ff 100644
--- a/rows/plugins/txt.py
+++ b/rows/plugins/txt.py
@@ -175,6 +175,9 @@ def import_from_txt(filename_or_fobj, encoding='utf-8',
     return create_table(table_rows, meta=meta, *args, **kwargs)
 
 
+import_from_txt.is_lazy = False
+
+
 def export_to_txt(table, filename_or_fobj=None, encoding=None,
                   frame_style="ASCII", safe_none_frame=True, *args, **kwargs):
     """Export a `rows.Table` to text.
diff --git a/rows/plugins/utils.py b/rows/plugins/utils.py
index 71f1467b..4ad6ed99 100644
--- a/rows/plugins/utils.py
+++ b/rows/plugins/utils.py
@@ -149,7 +149,10 @@ def func(rows_data):
 def create_table(data, meta=None, fields=None, skip_header=True,
                  import_fields=None, samples=None, force_types=None,
                  lazy=False, *args, **kwargs):
+    # TODO: change samples to be a fixed number
+    # TODO: may change samples logic (`float('inf')` or `all`)
     # TODO: add auto_detect_types=True parameter
+
     table_rows = iter(data)
     sample_rows = []
 
@@ -171,6 +174,9 @@ def create_table(data, meta=None, fields=None, skip_header=True,
         if not isinstance(fields, OrderedDict):
             raise ValueError('`fields` must be an `OrderedDict`')
 
+        # TODO: if `fields` is set, we're going to have the wrong order,
+        # compared to the first row (header).
+
         if skip_header:
             next(table_rows)
 
@@ -195,6 +201,7 @@ def create_table(data, meta=None, fields=None, skip_header=True,
 
     if not lazy:
         table = Table(fields=fields, meta=meta)
+
         # TODO: put this inside Table.__init__
         for row in chain(sample_rows, table_rows):
             table.append({field_name: value
diff --git a/rows/plugins/xls.py b/rows/plugins/xls.py
index acf7b06d..eb089dd5 100644
--- a/rows/plugins/xls.py
+++ b/rows/plugins/xls.py
@@ -163,6 +163,9 @@ def import_from_xls(filename_or_fobj, sheet_name=None, sheet_index=0,
     return create_table(table_rows, meta=meta, *args, **kwargs)
 
 
+import_from_xls.is_lazy = False
+
+
 def export_to_xls(table, filename_or_fobj=None, sheet_name='Sheet1', *args,
                   **kwargs):
     """Export the rows.Table to XLS file and return the saved file."""
diff --git a/rows/plugins/xlsx.py b/rows/plugins/xlsx.py
index 5ca95495..7efad9d6 100644
--- a/rows/plugins/xlsx.py
+++ b/rows/plugins/xlsx.py
@@ -79,12 +79,16 @@ def import_from_xlsx(filename_or_fobj, sheet_name=None, sheet_index=0,
                   for row_index in range(start_row + 1, end_row + 2)]
 
     filename, _ = get_filename_and_fobj(filename_or_fobj, dont_open=True)
+
     metadata = {'imported_from': 'xlsx',
                 'filename': filename,
                 'sheet_name': sheet_name, }
     return create_table(table_rows, meta=metadata, *args, **kwargs)
 
 
+import_from_xlsx.is_lazy = False
+
+
 FORMATTING_STYLES = {
         fields.DateField: 'YYYY-MM-DD',
         fields.DatetimeField: 'YYYY-MM-DD HH:MM:SS',
diff --git a/rows/plugins/xpath.py b/rows/plugins/xpath.py
index 1a89ae55..825cc9e2 100644
--- a/rows/plugins/xpath.py
+++ b/rows/plugins/xpath.py
@@ -68,6 +68,7 @@ def import_from_xpath(filename_or_fobj, rows_xpath, fields_xpath,
 
     filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='rb')
     xml = fobj.read().decode(encoding)
+    # TODO: make it lazy (is it possible with lxml?)
     tree = tree_from_string(xml)
     row_elements = tree.xpath(rows_xpath)
 
@@ -79,3 +80,6 @@ def import_from_xpath(filename_or_fobj, rows_xpath, fields_xpath,
             'filename': filename,
             'encoding': encoding,}
     return create_table([header] + result_rows, meta=meta, *args, **kwargs)
+
+
+import_from_xpath.is_lazy = False
diff --git a/tests/tests_plugin_csv.py b/tests/tests_plugin_csv.py
index 5abadcb2..fe0dedeb 100644
--- a/tests/tests_plugin_csv.py
+++ b/tests/tests_plugin_csv.py
@@ -56,6 +56,7 @@ def test_imports(self):
                       rows.plugins.plugin_csv.import_from_csv)
         self.assertIs(rows.export_to_csv,
                       rows.plugins.plugin_csv.export_to_csv)
+        self.assertTrue(rows.import_from_csv.is_lazy)
 
     @mock.patch('rows.plugins.plugin_csv.create_table')
     def test_import_from_csv_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_dicts.py b/tests/tests_plugin_dicts.py
index 47d97fca..0309bd11 100644
--- a/tests/tests_plugin_dicts.py
+++ b/tests/tests_plugin_dicts.py
@@ -45,6 +45,7 @@ class PluginDictTestCase(utils.RowsTestMixIn, unittest.TestCase):
     def test_imports(self):
         self.assertIs(rows.import_from_dicts, rows.plugins.dicts.import_from_dicts)
         self.assertIs(rows.export_to_dicts, rows.plugins.dicts.export_to_dicts)
+        self.assertTrue(rows.import_from_dicts.is_lazy)
 
     @mock.patch("rows.plugins.dicts.create_table")
     def test_import_from_dicts_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_html.py b/tests/tests_plugin_html.py
index 81ede706..694a81f7 100644
--- a/tests/tests_plugin_html.py
+++ b/tests/tests_plugin_html.py
@@ -52,6 +52,7 @@ def test_imports(self):
         self.assertIs(rows.import_from_html,
                       rows.plugins.plugin_html.import_from_html)
         self.assertIs(rows.export_to_html, rows.plugins.plugin_html.export_to_html)
+        self.assertFalse(rows.import_from_html.is_lazy)
 
     def test_import_from_html_filename(self):
         table = rows.import_from_html(self.filename, encoding=self.encoding)
@@ -87,7 +88,7 @@ def test_import_from_html_uses_create_table(self, mocked_create_table):
         call = mocked_create_table.call_args
         kwargs['meta'] = {'imported_from': 'html',
                           'filename': self.filename,
-                          'encoding': 'iso-8859-1',}
+                          'encoding': 'iso-8859-1', }
         self.assertEqual(call[1], kwargs)
 
     def test_export_to_html_filename(self):
diff --git a/tests/tests_plugin_json.py b/tests/tests_plugin_json.py
index 437a2c3b..b01ab42a 100644
--- a/tests/tests_plugin_json.py
+++ b/tests/tests_plugin_json.py
@@ -42,6 +42,7 @@ def test_imports(self):
                       rows.plugins.plugin_json.import_from_json)
         self.assertIs(rows.export_to_json,
                       rows.plugins.plugin_json.export_to_json)
+        self.assertFalse(rows.import_from_json.is_lazy)
 
     @mock.patch('rows.plugins.plugin_json.create_table')
     def test_import_from_json_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_ods.py b/tests/tests_plugin_ods.py
index 66842c7a..cd60528f 100644
--- a/tests/tests_plugin_ods.py
+++ b/tests/tests_plugin_ods.py
@@ -36,6 +36,7 @@ class PluginOdsTestCase(utils.RowsTestMixIn, unittest.TestCase):
 
     def test_imports(self):
         self.assertIs(rows.import_from_ods, rows.plugins.ods.import_from_ods)
+        self.assertFalse(rows.import_from_ods.is_lazy)
 
     @mock.patch('rows.plugins.ods.create_table')
     def test_import_from_ods_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_parquet.py b/tests/tests_plugin_parquet.py
index c89b47cc..6daeea5e 100644
--- a/tests/tests_plugin_parquet.py
+++ b/tests/tests_plugin_parquet.py
@@ -61,6 +61,7 @@ class PluginParquetTestCase(unittest.TestCase):
     def test_imports(self):
         self.assertIs(rows.import_from_parquet,
                       rows.plugins.plugin_parquet.import_from_parquet)
+        self.assertFalse(rows.import_from_parquet.is_lazy)
 
     @mock.patch('rows.plugins.plugin_parquet.create_table')
     def test_import_from_parquet_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_sqlite.py b/tests/tests_plugin_sqlite.py
index 75e0cc94..ad4e0dee 100644
--- a/tests/tests_plugin_sqlite.py
+++ b/tests/tests_plugin_sqlite.py
@@ -50,6 +50,7 @@ def test_imports(self):
                       rows.plugins.sqlite.import_from_sqlite)
         self.assertIs(rows.export_to_sqlite,
                       rows.plugins.sqlite.export_to_sqlite)
+        self.assertTrue(rows.import_from_sqlite.is_lazy)
 
     @mock.patch('rows.plugins.sqlite.create_table')
     def test_import_from_sqlite_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_txt.py b/tests/tests_plugin_txt.py
index 442a0e92..9e3cbd4c 100644
--- a/tests/tests_plugin_txt.py
+++ b/tests/tests_plugin_txt.py
@@ -41,6 +41,7 @@ class PluginTxtTestCase(utils.RowsTestMixIn, unittest.TestCase):
     def test_imports(self):
         self.assertIs(rows.import_from_txt, rows.plugins.txt.import_from_txt)
         self.assertIs(rows.export_to_txt, rows.plugins.txt.export_to_txt)
+        self.assertFalse(rows.import_from_txt.is_lazy)
 
     @mock.patch('rows.plugins.txt.create_table')
     def test_import_from_txt_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_xls.py b/tests/tests_plugin_xls.py
index c40f64e0..233597b3 100644
--- a/tests/tests_plugin_xls.py
+++ b/tests/tests_plugin_xls.py
@@ -44,6 +44,7 @@ class PluginXlsTestCase(utils.RowsTestMixIn, unittest.TestCase):
     def test_imports(self):
         self.assertIs(rows.import_from_xls, rows.plugins.xls.import_from_xls)
         self.assertIs(rows.export_to_xls, rows.plugins.xls.export_to_xls)
+        self.assertFalse(rows.import_from_xls.is_lazy)
 
     @mock.patch('rows.plugins.xls.create_table')
     def test_import_from_xls_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_xlsx.py b/tests/tests_plugin_xlsx.py
index 701c72b9..08fe85fa 100644
--- a/tests/tests_plugin_xlsx.py
+++ b/tests/tests_plugin_xlsx.py
@@ -43,6 +43,7 @@ def test_imports(self):
                       rows.plugins.xlsx.import_from_xlsx)
         self.assertIs(rows.export_to_xlsx,
                       rows.plugins.xlsx.export_to_xlsx)
+        self.assertFalse(rows.import_from_xlsx.is_lazy)
 
     @mock.patch('rows.plugins.xlsx.create_table')
     def test_import_from_xlsx_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_xpath.py b/tests/tests_plugin_xpath.py
index e5680e5f..1b5fbf1f 100644
--- a/tests/tests_plugin_xpath.py
+++ b/tests/tests_plugin_xpath.py
@@ -106,9 +106,9 @@ def test_import_from_xpath_unescape_and_extract_text(self):
         fields_xpath = OrderedDict([('name', './/text()'),
                                     ('link', './/a/@href')])
         table = rows.import_from_xpath(BytesIO(html),
+                                       encoding='utf-8',
                                        rows_xpath=rows_xpath,
-                                       fields_xpath=fields_xpath,
-                                       encoding='utf-8')
+                                       fields_xpath=fields_xpath)
         self.assertEqual(table[0].name, 'Abadia de Goiás (GO)')
         self.assertEqual(table[1].name, 'Abadiânia (GO)')
 

From 9d2504a1ebc89195e1fcc294982a707c43cd1375 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Justen=20=28=40turicas=29?=
 <alvarojusten@gmail.com>
Date: Thu, 22 Jun 2017 09:52:42 -0300
Subject: [PATCH 7/7] Change `rows {convert,query}` to be lazy

---
 rows/cli.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/rows/cli.py b/rows/cli.py
index aada4425..8e1f631a 100755
--- a/rows/cli.py
+++ b/rows/cli.py
@@ -379,27 +379,30 @@ def query(input_encoding, output_encoding, input_locale, output_locale,
             if input_locale is not None:
                 with rows.locale_context(input_locale):
                     table = import_from_source(source, DEFAULT_INPUT_ENCODING,
-                            samples=samples)
+                            lazy=True, samples=samples)
             else:
                 table = import_from_source(source, DEFAULT_INPUT_ENCODING,
-                        samples=samples)
+                        lazy=True, samples=samples)
 
             sqlite_connection = sqlite3.Connection(':memory:')
             rows.export_to_sqlite(table,
                                   sqlite_connection,
                                   table_name='table1')
-            result = rows.import_from_sqlite(sqlite_connection, query=query)
+            result = rows.import_from_sqlite(sqlite_connection, query=query,
+                    lazy=True, samples=samples)
 
     else:
         # TODO: if all sources are SQLite we can also optimize the import
         if input_locale is not None:
             with rows.locale_context(input_locale):
                 tables = [_import_table(source, encoding=input_encoding,
-                                        verify_ssl=verify_ssl, samples=samples)
+                                        verify_ssl=verify_ssl, lazy=True,
+                                        samples=samples)
                           for source in sources]
         else:
             tables = [_import_table(source, encoding=input_encoding,
-                                    verify_ssl=verify_ssl, samples=samples)
+                                    verify_ssl=verify_ssl, lazy=True,
+                                    samples=samples)
                       for source in sources]
 
         sqlite_connection = sqlite3.Connection(':memory:')
@@ -408,7 +411,8 @@ def query(input_encoding, output_encoding, input_locale, output_locale,
                                   sqlite_connection,
                                   table_name='table{}'.format(index))
 
-        result = rows.import_from_sqlite(sqlite_connection, query=query)
+        result = rows.import_from_sqlite(sqlite_connection, query=query,
+                lazy=True, samples=samples)
 
     # TODO: may use sys.stdout.encoding if output_file = '-'
     output_encoding = output_encoding or sys.stdout.encoding or \