2525@click .option (
2626 "--format" ,
2727 "-f" ,
28- type = click .Choice (["csv" , "json" , "parquet" , "deltalake" ]),
28+ type = click .Choice (["csv" , "json" , "parquet" , "deltalake" , "iceberg" ]),
2929 default = "csv" ,
3030 help = "Format of the output" ,
3131)
3232@click .option ("--output" , "-o" , type = click .Path (writable = True ))
3333@click .option ("--columns" , "-c" , help = "Column names" , default = None , required = False )
3434@click .option ("--template" , "-t" , help = "Template to use" , type = click .Choice (["s3access" , "cloudfront" ]), default = None )
35+ @click .option ("--catalog" , "-C" , help = "Catalog URI" , default = None , required = False )
3536@click .argument ("column_types" , required = False )
3637@click .option ("--provider" , "-p" , help = "Fake data provider" , type = click .Choice (["faker" , "mimesis" ]), default = "faker" )
37- def main (num_rows , format , output , columns , template , column_types , provider ):
38+ def main (num_rows , format , output , columns , template , catalog , column_types , provider ):
3839 """
3940 Generate fake data, easily.
4041
4142 COLUMN_TYPES is a comma-seperated list of Faker property names, like
42- pyint,username ,date_this_year
43+ pyint,user_name ,date_this_year
4344
4445 You can also use --template for real-world synthetic data.
4546 """
@@ -62,10 +63,12 @@ def main(num_rows, format, output, columns, template, column_types, provider):
6263 raise click .BadArgumentUsage ('templates are only supported with the "faker" provider.' )
6364
6465 # Parquet output requires a filename
65- if format in ["parquet" , "deltalake" ] and output is None :
66- raise click .BadArgumentUsage ("parquet | deltalake formats requires --output/-o filename parameter." )
67- if output is not None and format not in ["parquet" , "deltalake" ]:
66+ if format in ["parquet" , "deltalake" , "iceberg" ] and output is None :
67+ raise click .BadArgumentUsage (f" { format } format requires --output/-o filename parameter." )
68+ if output is not None and format not in ["parquet" , "deltalake" , "iceberg" ]:
6869 raise click .BadArgumentUsage ("output files not supported for csv/json yet." )
70+ if catalog and format not in ['iceberg' ]:
71+ raise click .BadArgumentUsage ("catalog option is only available for Iceberg formats" )
6972
7073 # Optionally load additional features
7174 if format == "parquet" :
@@ -90,6 +93,17 @@ def main(num_rows, format, output, columns, template, column_types, provider):
9093 "Make sure to install faker-cli using `pip install faker-cli[delta]`."
9194 )
9295
96+ if format == "iceberg" :
97+ try :
98+ from faker_cli .writers .iceberg import IcebergWriter
99+
100+ KLAS_MAPPER ["iceberg" ] = IcebergWriter
101+ except ImportError :
102+ raise click .ClickException (
103+ "Using Iceberg writer, but the 'iceberg' package is not installed. "
104+ "Make sure to install faker-cli using `pip install faker-cli[iceberg]`."
105+ )
106+
93107 # If the user provides a template, we use that provider and writer and exit.
94108 # We assume a template has a custom writer that may be different than CSV or JSON
95109 if template :
@@ -108,7 +122,8 @@ def main(num_rows, format, output, columns, template, column_types, provider):
108122 format_klas = KLAS_MAPPER .get (format )
109123 if format_klas is None :
110124 raise click .ClickException (f"Format { format } not supported." )
111- writer = format_klas (sys .stdout , headers , output )
125+ # Fix in a better way - maybe passing **kwargs?
126+ writer = format_klas (sys .stdout , headers , output , catalog )
112127 for i in range (num_rows ):
113128 writer .write (fake .generate_row (col_types ))
114129 writer .close ()
0 commit comments