Skip to content

Commit c895e00

Browse files
CASSANDRA-19985: Enhance CQLSH to support machine-readable output formatting (csv, json)
1 parent 02352b7 commit c895e00

5 files changed

Lines changed: 209 additions & 17 deletions

File tree

conf/cqlshrc.sample

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@
3737
; version = None
3838

3939
[ui]
40+
;; The format of the output. Valid values are tabular, csv, and json.
41+
; mode = tabular
42+
4043
;; Whether or not to display query results with colors
4144
; color = on
4245

doc/modules/cassandra/pages/managing/tools/cqlsh.adoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ Options:
9898
Collect coverage data
9999
`--encoding=ENCODING`::
100100
Specify a non-default encoding for output. (Default: utf-8)
101+
`--mode=MODE`::
102+
Specify the output display format. Valid values are `tabular` (default), `csv`, and `json`.
101103
`--cqlshrc=CQLSHRC`::
102104
Specify an alternative cqlshrc file location.
103105
`--credentials=CREDENTIALS`::

pylib/cqlshlib/cqlshmain.py

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@
4646
from cqlshlib import cql3handling, pylexotron, sslhandling, cqlshhandling, authproviderhandling
4747
from cqlshlib.copyutil import ExportTask, ImportTask
4848
from cqlshlib.displaying import (ANSI_RESET, BLUE, COLUMN_NAME_COLORS, CYAN,
49-
RED, WHITE, FormattedValue, colorme)
49+
RED, WHITE, FormattedValue, colorme,
50+
TabularTablePrinter, CsvTablePrinter, JsonTablePrinter)
5051
from cqlshlib.formatting import (DEFAULT_DATE_FORMAT, DEFAULT_NANOTIME_FORMAT,
5152
DEFAULT_TIMESTAMP_FORMAT, CqlType, DateTimeFormat,
5253
format_by_type)
@@ -284,13 +285,18 @@ def __init__(self, hostname, port, config_file, color=False,
284285
connect_timeout=DEFAULT_CONNECT_TIMEOUT_SECONDS,
285286
is_subshell=False,
286287
auth_provider=None,
287-
disable_history=False):
288+
disable_history=False,
289+
mode='tabular'):
288290
cmd.Cmd.__init__(self, completekey=completekey)
289291
self.hostname = hostname
290292
self.port = port
291293
self.auth_provider = auth_provider
292294
self.username = username
293295
self.config_file = config_file
296+
self.mode = mode
297+
298+
if self.mode in ('csv', 'json'):
299+
self.color = False
294300

295301
if isinstance(auth_provider, PlainTextAuthProvider):
296302
self.username = auth_provider.username
@@ -956,32 +962,37 @@ def perform_simple_statement(self, statement):
956962
def print_result(self, result, table_meta):
957963
self.decoding_errors = []
958964

959-
self.writeresult("")
965+
if self.mode == 'csv':
966+
printer = CsvTablePrinter(self)
967+
elif self.mode == 'json':
968+
printer = JsonTablePrinter(self)
969+
else:
970+
self.writeresult("")
971+
printer = TabularTablePrinter(self, self.tty)
960972

961-
def print_all(result, table_meta, tty):
962-
# Return the number of rows in total
973+
def print_all(result, table_meta, tty, printer):
963974
num_rows = 0
964975
is_first = True
965976
while True:
966-
# Always print for the first page even it is empty
967977
if result.current_rows or is_first:
968978
with_header = is_first or tty
969-
self.print_static_result(result, table_meta, with_header, tty, num_rows)
979+
self.print_static_result(result, table_meta, with_header, tty, num_rows, printer)
970980
num_rows += len(result.current_rows)
971981
if result.has_more_pages:
972982
if self.shunted_query_out is None and tty:
973-
# Only pause when not capturing.
974983
input("---MORE---")
975984
result.fetch_next_page()
976985
else:
977-
if not tty:
986+
if not tty and self.mode not in ('csv', 'json'):
978987
self.writeresult("")
979988
break
980989
is_first = False
981990
return num_rows
982991

983-
num_rows = print_all(result, table_meta, self.tty)
984-
self.writeresult("(%d rows)" % num_rows)
992+
num_rows = print_all(result, table_meta, self.tty, printer)
993+
printer.finish()
994+
if self.mode not in ('csv', 'json'):
995+
self.writeresult("(%d rows)" % num_rows)
985996

986997
if self.decoding_errors:
987998
for err in self.decoding_errors[:2]:
@@ -990,15 +1001,18 @@ def print_all(result, table_meta, tty):
9901001
self.writeresult('%d more decoding errors suppressed.'
9911002
% (len(self.decoding_errors) - 2), color=RED)
9921003

993-
def print_static_result(self, result, table_meta, with_header, tty, row_count_offset=0):
1004+
def print_static_result(self, result, table_meta, with_header, tty, row_count_offset=0, printer=None):
9941005
if not result.column_names and not table_meta:
9951006
return
9961007

9971008
column_names = result.column_names or list(table_meta.columns.keys())
9981009
formatted_names = [self.myformat_colname(name, table_meta) for name in column_names]
1010+
9991011
if not result.current_rows:
1000-
# print header only
1001-
self.print_formatted_result(formatted_names, None, with_header=True, tty=tty)
1012+
if printer is None or isinstance(printer, TabularTablePrinter):
1013+
self.print_formatted_result(formatted_names, None, with_header=True, tty=tty)
1014+
elif with_header:
1015+
printer.print_header(formatted_names)
10021016
return
10031017

10041018
cql_types = []
@@ -1009,10 +1023,15 @@ def print_static_result(self, result, table_meta, with_header, tty, row_count_of
10091023

10101024
formatted_values = [list(map(self.myformat_value, [row[c] for c in column_names], cql_types)) for row in result.current_rows]
10111025

1012-
if self.expand_enabled:
1013-
self.print_formatted_result_vertically(formatted_names, formatted_values, row_count_offset)
1026+
if printer is None:
1027+
if self.expand_enabled:
1028+
self.print_formatted_result_vertically(formatted_names, formatted_values, row_count_offset)
1029+
else:
1030+
self.print_formatted_result(formatted_names, formatted_values, with_header, tty)
10141031
else:
1015-
self.print_formatted_result(formatted_names, formatted_values, with_header, tty)
1032+
if with_header:
1033+
printer.print_header(formatted_names)
1034+
printer.print_rows(formatted_names, formatted_values)
10161035

10171036
def print_formatted_result(self, formatted_names, formatted_values, with_header, tty):
10181037
# determine column widths
@@ -2026,6 +2045,7 @@ def read_options(cmdlineargs, parser, config_file, cql_dir, environment=os.envir
20262045
argvalues.completekey = option_with_default(configs.get, 'ui', 'completekey',
20272046
DEFAULT_COMPLETEKEY)
20282047
argvalues.color = option_with_default(configs.getboolean, 'ui', 'color')
2048+
argvalues.mode = option_with_default(configs.get, 'ui', 'mode', 'tabular')
20292049
argvalues.time_format = raw_option_with_default(configs, 'ui', 'time_format',
20302050
DEFAULT_TIMESTAMP_FORMAT)
20312051
argvalues.nanotime_format = raw_option_with_default(configs, 'ui', 'nanotime_format',
@@ -2230,6 +2250,8 @@ def main(cmdline, pkgpath):
22302250
help='Force tty mode (command prompt).')
22312251
parser.add_argument('--disable-history', default=False, action='store_true',
22322252
help='Disable saving of history (existing history will still be loaded)')
2253+
parser.add_argument('--mode', choices=['tabular', 'csv', 'json'],
2254+
help='Specify the output format (tabular, csv, json). Default is tabular.')
22332255

22342256
# This is a hidden option to suppress the warning when the -p/--password command line option is used.
22352257
# Power users may use this option if they know no other people has access to the system where cqlsh is run or don't care about security.
@@ -2357,6 +2379,7 @@ def main(cmdline, pkgpath):
23572379
display_double_precision=options.double_precision,
23582380
display_timezone=timezone,
23592381
max_trace_wait=options.max_trace_wait,
2382+
mode=options.mode,
23602383
ssl=options.ssl,
23612384
single_statement=options.execute,
23622385
request_timeout=options.request_timeout,

pylib/cqlshlib/displaying.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,86 @@ def color_ljust(self, width, fill=' '):
126126
)
127127

128128
NO_COLOR_MAP = dict()
129+
130+
class TablePrinter:
131+
def print_header(self, formatted_names):
132+
raise NotImplementedError
133+
134+
def print_rows(self, formatted_names, formatted_values):
135+
raise NotImplementedError
136+
137+
def finish(self):
138+
pass
139+
140+
class TabularTablePrinter(TablePrinter):
141+
def __init__(self, shell, tty, row_count_offset=0):
142+
self._shell = shell
143+
self._tty = tty
144+
self._row_count_offset = row_count_offset
145+
self._with_header = True
146+
147+
def print_header(self, formatted_names):
148+
pass
149+
150+
def print_rows(self, formatted_names, formatted_values, with_header=None):
151+
if with_header is None:
152+
with_header = self._with_header
153+
if self._shell.expand_enabled:
154+
self._shell.print_formatted_result_vertically(
155+
formatted_names, formatted_values, self._row_count_offset)
156+
else:
157+
self._shell.print_formatted_result(
158+
formatted_names, formatted_values, with_header, self._tty)
159+
if formatted_values:
160+
self._row_count_offset += len(formatted_values)
161+
self._with_header = self._tty
162+
163+
def finish(self):
164+
pass
165+
166+
class CsvTablePrinter(TablePrinter):
167+
def __init__(self, shell):
168+
import csv
169+
self._writer = csv.writer(shell.query_out)
170+
self._header_written = False
171+
172+
def print_header(self, formatted_names):
173+
self._colnames = [n.strval for n in formatted_names]
174+
175+
def print_rows(self, formatted_names, formatted_values):
176+
if not self._header_written:
177+
self._writer.writerow(self._colnames)
178+
self._header_written = True
179+
if formatted_values is None:
180+
return
181+
for row in formatted_values:
182+
self._writer.writerow([col.strval for col in row])
183+
184+
def finish(self):
185+
pass
186+
187+
class JsonTablePrinter(TablePrinter):
188+
def __init__(self, shell):
189+
self._shell = shell
190+
self._colnames = None
191+
self._first_row = True
192+
193+
def print_header(self, formatted_names):
194+
self._colnames = [n.strval for n in formatted_names]
195+
self._shell.writeresult('[')
196+
197+
def print_rows(self, formatted_names, formatted_values):
198+
import json
199+
if formatted_values is None:
200+
return
201+
for row in formatted_values:
202+
row_dict = {self._colnames[i]: col.strval for i, col in enumerate(row)}
203+
serialized = json.dumps(row_dict)
204+
if self._first_row:
205+
self._shell.writeresult(' ' + serialized, newline=False)
206+
self._first_row = False
207+
else:
208+
self._shell.writeresult(',\n ' + serialized, newline=False)
209+
210+
def finish(self):
211+
self._shell.writeresult('\n]')

pylib/cqlshlib/test/test_cqlsh_output.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1017,3 +1017,84 @@ def test_quoted_output_text_in_udts(self):
10171017
tty=False, input=query)
10181018
self.assertEqual(0, result)
10191019
self.assertEqual(output.splitlines()[3].strip(), "{data: 'I''m newb'}")
1020+
1021+
def test_csv_output(self):
1022+
ks = get_keyspace()
1023+
query = "SELECT a, b FROM twenty_rows_table WHERE a IN ('1', '2');"
1024+
1025+
output, result = cqlsh_testcall(args=('--mode', 'csv'), prompt=None, env=self.default_env,
1026+
tty=False, input=query + '\n')
1027+
self.assertEqual(0, result)
1028+
1029+
lines = output.strip().splitlines()
1030+
self.assertEqual(lines[0].strip(), 'a,b')
1031+
self.assertIn('1,1', [l.strip() for l in lines])
1032+
self.assertIn('2,2', [l.strip() for l in lines])
1033+
1034+
query2 = "SELECT num, setcol FROM has_all_types WHERE num = 0;"
1035+
output2, result2 = cqlsh_testcall(args=('--mode', 'csv'), prompt=None, env=self.default_env,
1036+
tty=False, input=query2 + '\n')
1037+
self.assertEqual(0, result2)
1038+
import csv, io
1039+
reader = csv.reader(io.StringIO(output2.strip()))
1040+
rows = list(reader)
1041+
self.assertEqual(rows[0], ['num', 'setcol'])
1042+
for row in rows[1:]:
1043+
self.assertEqual(len(row), 2,
1044+
msg='CSV row has wrong field count (commas inside setcol not quoted?): %r' % row)
1045+
1046+
query3 = "SELECT num, varintcol FROM has_all_types WHERE num = 0;"
1047+
output3, result3 = cqlsh_testcall(args=('--mode', 'csv'), prompt=None, env=self.default_env,
1048+
tty=False, input=query3 + '\n')
1049+
self.assertEqual(0, result3)
1050+
reader3 = csv.reader(io.StringIO(output3.strip()))
1051+
rows3 = list(reader3)
1052+
varint_val = rows3[1][1]
1053+
self.assertNotIn(',', varint_val,
1054+
msg='Large varint should not contain thousands separator in CSV: %r' % varint_val)
1055+
1056+
def test_json_output(self):
1057+
ks = get_keyspace()
1058+
query = "SELECT a, b FROM twenty_rows_table WHERE a IN ('1', '2');"
1059+
1060+
output, result = cqlsh_testcall(args=('--mode', 'json'), prompt=None, env=self.default_env,
1061+
tty=False, input=query + '\n')
1062+
self.assertEqual(0, result)
1063+
1064+
import json
1065+
try:
1066+
parsed_json = json.loads(output)
1067+
self.assertEqual(len(parsed_json), 2)
1068+
1069+
results = { (item['a'], item['b']) for item in parsed_json }
1070+
self.assertIn(('1', '1'), results)
1071+
self.assertIn(('2', '2'), results)
1072+
except ValueError as e:
1073+
self.fail("Output is not valid JSON: %s\nOutput was:\n%s" % (e, output))
1074+
1075+
query2 = "SELECT num, setcol, listcol, mapcol FROM has_all_types WHERE num = 0;"
1076+
output2, result2 = cqlsh_testcall(args=('--mode', 'json'), prompt=None, env=self.default_env,
1077+
tty=False, input=query2 + '\n')
1078+
self.assertEqual(0, result2)
1079+
try:
1080+
rows2 = json.loads(output2)
1081+
self.assertEqual(len(rows2), 1)
1082+
row = rows2[0]
1083+
self.assertIsInstance(row['setcol'], str,
1084+
msg='setcol should be a JSON string, got: %r' % type(row['setcol']))
1085+
self.assertIsInstance(row['listcol'], str,
1086+
msg='listcol should be a JSON string, got: %r' % type(row['listcol']))
1087+
self.assertIsInstance(row['mapcol'], str,
1088+
msg='mapcol should be a JSON string, got: %r' % type(row['mapcol']))
1089+
except ValueError as e:
1090+
self.fail("Output is not valid JSON: %s\nOutput was:\n%s" % (e, output2))
1091+
1092+
query3 = "SELECT num, varintcol FROM has_all_types WHERE num = 0;"
1093+
output3, result3 = cqlsh_testcall(args=('--mode', 'json'), prompt=None, env=self.default_env,
1094+
tty=False, input=query3 + '\n')
1095+
self.assertEqual(0, result3)
1096+
try:
1097+
rows3 = json.loads(output3)
1098+
self.assertEqual(rows3[0]['varintcol'], '10000000000000000000000000')
1099+
except ValueError as e:
1100+
self.fail("Output is not valid JSON: %s\nOutput was:\n%s" % (e, output3))

0 commit comments

Comments
 (0)