From a8707277df9eaf6e5d18682a7a7e76eb89f4f1da Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Wed, 14 Mar 2018 15:49:19 -0400 Subject: [PATCH] Be more robust in handling a user-specified list of fields --- es2csv.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/es2csv.py b/es2csv.py index b948843..a8fa8f9 100755 --- a/es2csv.py +++ b/es2csv.py @@ -21,6 +21,22 @@ import progressbar from functools import wraps +def normalize_fields(fields): + '''Normalize different possible ways to specify the document fields + + Both space and comma are invalid characters to use in field names so we can split on those safely. + + >>> normalize_fields([]) + [] + >>> normalize_fields(['a,b,c', 'd', 'e f g']) + ['a', 'b', 'c', 'd', 'e', 'f', 'g'] + >>> normalize_fields(['a,b,c']) + ['a', 'b', 'c'] + >>> normalize_fields(['a','b','c']) + ['a', 'b', 'c'] + ''' + return [f for e in fields for f in e.replace(' ', ',').split(',')] + FLUSH_BUFFER = 1000 # Chunk of docs to flush in temp file CONNECTION_TIMEOUT = 120 TIMES_TO_TRY = 3 @@ -121,6 +137,7 @@ def next_scroll(scroll_id): self.opts.query, '(%s)' % ' AND '.join(self.opts.tags)) search_args['q'] = query + self.opts.fields = normalize_fields(self.opts.fields) if '_all' not in self.opts.fields: search_args['_source_include'] = ','.join(self.opts.fields) self.csv_headers.extend([field for field in self.opts.fields if '*' not in field])