1- from faker import Faker
2- import click
31import sys
4- from faker_cli .templates import CloudFrontWriter , S3AccessLogs , S3AccessWriter , CloudTrailLogs , CloudFrontLogs
5-
6- from faker_cli .writer import CSVWriter , JSONWriter , ParquetWriter , DeltaLakeWriter
72from typing import List
83
4+ import click
5+ from faker import Faker
6+
7+ from faker_cli .templates import (
8+ CloudFrontLogs ,
9+ CloudFrontWriter ,
10+ S3AccessLogs ,
11+ S3AccessWriter ,
12+ )
13+ from faker_cli .writer import CSVWriter , JSONWriter
14+
15+
916def infer_column_names (col_names , col_types : str ) -> List [str ]:
1017 """
1118 Infer column names from column types
1219 """
1320 # For now, nothing special - but eventually we need to parse things out
1421 if col_names :
1522 return col_names .split ("," )
16-
23+
1724 return col_types .split ("," )
1825
26+
1927KLAS_MAPPER = {
2028 "csv" : CSVWriter ,
2129 "json" : JSONWriter ,
22- "parquet" : ParquetWriter ,
23- "deltalake" : DeltaLakeWriter
2430}
2531
2632TEMPLATE_MAPPER = {
@@ -32,9 +38,16 @@ def infer_column_names(col_names, col_types: str) -> List[str]:
3238fake .add_provider (S3AccessLogs )
3339fake .add_provider (CloudFrontLogs )
3440
41+
3542@click .command ()
3643@click .option ("--num-rows" , "-n" , default = 1 , help = "Number of rows" )
37- @click .option ("--format" , "-f" , type = click .Choice (["csv" , "json" , "parquet" , "deltalake" ]), default = "csv" , help = "Format of the output" )
44+ @click .option (
45+ "--format" ,
46+ "-f" ,
47+ type = click .Choice (["csv" , "json" , "parquet" , "deltalake" ]),
48+ default = "csv" ,
49+ help = "Format of the output" ,
50+ )
3851@click .option ("--output" , "-o" , type = click .Path (writable = True ))
3952@click .option ("--columns" , "-c" , help = "Column names" , default = None , required = False )
4053@click .option ("--template" , "-t" , help = "Template to use" , type = click .Choice (["s3access" , "cloudfront" ]), default = None )
@@ -53,16 +66,37 @@ def main(num_rows, format, output, columns, template, column_types):
5366 ctx = click .get_current_context ()
5467 click .echo (ctx .get_help ())
5568 ctx .exit ()
56- raise click .BadArgumentUsage (
57- "either --template or a list of Faker property names must be provided."
58- )
69+ raise click .BadArgumentUsage ("either --template or a list of Faker property names must be provided." )
5970
6071 # Parquet output requires a filename
6172 if format in ["parquet" , "deltalake" ] and output is None :
6273 raise click .BadArgumentUsage ("parquet | deltalake formats requires --output/-o filename parameter." )
6374 if output is not None and format not in ["parquet" , "deltalake" ]:
6475 raise click .BadArgumentUsage ("output files not supported for csv/json yet." )
65-
76+
77+ # Optionally load additional features
78+ if format == "parquet" :
79+ try :
80+ from faker_cli .writers .parquet import ParquetWriter
81+
82+ KLAS_MAPPER ["parquet" ] = ParquetWriter
83+ except ImportError :
84+ raise click .ClickException (
85+ "Using Parquet writer, but the 'pyarrow' package is not installed. "
86+ "Make sure to install faker-cli using `pip install faker-cli[parquet]`."
87+ )
88+
89+ if format == "deltalake" :
90+ try :
91+ from faker_cli .writers .delta import DeltaLakeWriter
92+
93+ KLAS_MAPPER ["deltalake" ] = DeltaLakeWriter
94+ except ImportError :
95+ raise click .ClickException (
96+ "Using Delta writer, but the 'deltalake' package is not installed. "
97+ "Make sure to install faker-cli using `pip install faker-cli[delta]`."
98+ )
99+
66100 # If the user provides a template, we use that provider and writer and exit.
67101 # We assume a template has a custom writer that may be different than CSV or JSON
68102 if template :
@@ -72,13 +106,13 @@ def main(num_rows, format, output, columns, template, column_types):
72106 row = fake .format (log_entry )
73107 writer .write (row )
74108 return
75-
109+
76110 # Now, if a template hasn't been provided, generate some fake data!
77111 col_types = column_types .split ("," )
78112 headers = infer_column_names (columns , column_types )
79113 writer = KLAS_MAPPER .get (format )(sys .stdout , headers , output )
80114 for i in range (num_rows ):
81115 # TODO: Handle args
82- row = [ fake .format (ctype ) for ctype in col_types ]
116+ row = [fake .format (ctype ) for ctype in col_types ]
83117 writer .write (row )
84118 writer .close ()
0 commit comments