From a8707277df9eaf6e5d18682a7a7e76eb89f4f1da Mon Sep 17 00:00:00 2001
From: Michael Brown <supermathie@gmail.com>
Date: Wed, 14 Mar 2018 15:49:19 -0400
Subject: [PATCH] Be more robust in handling a user-specified list of fields

---
 es2csv.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/es2csv.py b/es2csv.py
index b948843..a8fa8f9 100755
--- a/es2csv.py
+++ b/es2csv.py
@@ -21,6 +21,22 @@
 import progressbar
 from functools import wraps
 
+def normalize_fields(fields):
+    '''Normalize different possible ways to specify the document fields
+
+    Both space and comma are invalid characters to use in field names so we can split on those safely.
+
+    >>> normalize_fields([])
+    []
+    >>> normalize_fields(['a,b,c', 'd', 'e f g'])
+    ['a', 'b', 'c', 'd', 'e', 'f', 'g']
+    >>> normalize_fields(['a,b,c'])
+    ['a', 'b', 'c']
+    >>> normalize_fields(['a','b','c'])
+    ['a', 'b', 'c']
+    '''
+    return [f for e in fields for f in e.replace(' ', ',').split(',')]
+
 FLUSH_BUFFER = 1000  # Chunk of docs to flush in temp file
 CONNECTION_TIMEOUT = 120
 TIMES_TO_TRY = 3
@@ -121,6 +137,7 @@ def next_scroll(scroll_id):
                 self.opts.query, '(%s)' % ' AND '.join(self.opts.tags))
             search_args['q'] = query
 
+        self.opts.fields = normalize_fields(self.opts.fields)
         if '_all' not in self.opts.fields:
             search_args['_source_include'] = ','.join(self.opts.fields)
             self.csv_headers.extend([field for field in self.opts.fields if '*' not in field])