Skip to content

Commit 8305098

Browse files
authored
Add mimesis support (#10)
* Add mimesis support * Remove support for Python 3.9
1 parent ba35f27 commit 8305098

9 files changed

Lines changed: 154 additions & 87 deletions

File tree

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ jobs:
55
strategy:
66
matrix:
77
os: [ubuntu-latest, windows-latest, macos-latest]
8-
python-version: ["3.9", "3.10", "3.11"]
8+
python-version: ["3.10", "3.11", "3.12"]
99
runs-on: ${{ matrix.os }}
1010
defaults:
1111
run:

.github/workflows/pypi.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
- name: Set up Python
1313
uses: actions/setup-python@v4
1414
with:
15-
python-version: 3.9
15+
python-version: 3.12
1616
- name: cache poetry install
1717
uses: actions/cache@v3
1818
with:

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,19 @@ fake -n 10 pyint,user_name,date_this_year -f json -c id,awesome_name,last_attent
8787
{"id": 1967, "awesome_name": "jmendoza", "last_attention_at": "2023-01-23"}
8888
```
8989

90+
### Providers (beta)
91+
92+
While [Faker](https://faker.readthedocs.io) is a sweet library, we all like options don't we? [Mimesis](https://mimesis.name/en/master/) is _also_ awesome and can be quite a bit faster than Faker. 🤫 You can use a different provider by using `-p mimesis`.
93+
94+
> [!NOTE]
95+
> Providers use their own syntax for data types, so you must change out your column names as necessary.
96+
97+
To generate the same dataset above with Mimesis for example:
98+
99+
```bash
100+
fake -p mimesis -n 10 "numeric.integer_number(0),person.username,datetime.date(2024)" -f json -c id,awesome_name,last_attention_at
101+
```
102+
90103
### Provider Arguments
91104

92105
Some [Faker providers](https://faker.readthedocs.io/en/master/providers/baseprovider.html) (like `pyint`) take arguments. You can also specify those if you like, separated by semi-colons (_because some arguments take a comma-separated string :)_)

faker_cli/cli.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,9 @@
44
from faker import Faker
55

66
from faker_cli.parser import infer_column_names, parse_column_types
7-
from faker_cli.templates import (
8-
CloudFrontLogs,
9-
CloudFrontWriter,
10-
S3AccessLogs,
11-
S3AccessWriter,
12-
)
7+
from faker_cli.providers.faker import FakerProvider
8+
from faker_cli.providers.mimesis import MimesisProvider
9+
from faker_cli.templates import CloudFrontWriter, S3AccessWriter
1310
from faker_cli.writer import CSVWriter, JSONWriter
1411

1512
KLAS_MAPPER = {
@@ -22,10 +19,6 @@
2219
"cloudfront": [CloudFrontWriter, "cloudfront_log"],
2320
}
2421

25-
fake = Faker()
26-
fake.add_provider(S3AccessLogs)
27-
fake.add_provider(CloudFrontLogs)
28-
2922

3023
@click.command()
3124
@click.option("--num-rows", "-n", default=1, help="Number of rows")
@@ -40,7 +33,8 @@
4033
@click.option("--columns", "-c", help="Column names", default=None, required=False)
4134
@click.option("--template", "-t", help="Template to use", type=click.Choice(["s3access", "cloudfront"]), default=None)
4235
@click.argument("column_types", required=False)
43-
def main(num_rows, format, output, columns, template, column_types):
36+
@click.option("--provider", "-p", help="Fake data provider", type=click.Choice(["faker", "mimesis"]), default="faker")
37+
def main(num_rows, format, output, columns, template, column_types, provider):
4438
"""
4539
Generate fake data, easily.
4640
@@ -49,13 +43,24 @@ def main(num_rows, format, output, columns, template, column_types):
4943
5044
You can also use --template for real-world synthetic data.
5145
"""
46+
if provider == "faker":
47+
fake = FakerProvider()
48+
elif provider == "mimesis":
49+
fake = MimesisProvider()
50+
else:
51+
pass
52+
5253
# Do some initial validation - we must have either template or column tpes
5354
if not template and not column_types:
5455
ctx = click.get_current_context()
5556
click.echo(ctx.get_help())
5657
ctx.exit()
5758
raise click.BadArgumentUsage("either --template or a list of Faker property names must be provided.")
5859

60+
# Templates are only supported with Faker at the moment
61+
if template and provider != "faker":
62+
raise click.BadArgumentUsage('templates are only supported with the "faker" provider.')
63+
5964
# Parquet output requires a filename
6065
if format in ["parquet", "deltalake"] and output is None:
6166
raise click.BadArgumentUsage("parquet | deltalake formats requires --output/-o filename parameter.")
@@ -105,13 +110,14 @@ def main(num_rows, format, output, columns, template, column_types):
105110
raise click.ClickException(f"Format {format} not supported.")
106111
writer = format_klas(sys.stdout, headers, output)
107112
for i in range(num_rows):
108-
writer.write(generate_row(fake, col_types))
113+
writer.write(fake.generate_row(col_types))
109114
writer.close()
110115

116+
111117
def generate_row(fake: Faker, column_types: list[tuple[str, list]]) -> list[str]:
112118
return [
113119
fake.format(ctype, *args)
114120
if not ctype.startswith("unique.")
115121
else fake.unique.format(ctype.removeprefix("unique."), *args)
116122
for ctype, args in column_types
117-
]
123+
]

faker_cli/providers/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
class BaseProvider:
2+
pass

faker_cli/providers/faker.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from faker import Faker
2+
3+
from faker_cli.templates import CloudFrontLogs, S3AccessLogs
4+
5+
6+
class FakerProvider:
7+
def __init__(self) -> None:
8+
self.fake = Faker()
9+
self.fake.add_provider(S3AccessLogs)
10+
self.fake.add_provider(CloudFrontLogs)
11+
12+
def generate_row(self, column_types: list[tuple[str, list]]) -> list[str]:
13+
return [
14+
self.fake.format(ctype, *args)
15+
if not ctype.startswith("unique.")
16+
else self.fake.unique.format(ctype.removeprefix("unique."), *args)
17+
for ctype, args in column_types
18+
]
19+
20+
def format(self, log_entry) -> list[str]:
21+
return self.fake.format(log_entry)

faker_cli/providers/mimesis.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from mimesis import Field
2+
3+
4+
class MimesisProvider:
5+
def __init__(self) -> None:
6+
self.field = Field()
7+
8+
def generate_row(self, column_types: list[tuple[str, list]]) -> list[str]:
9+
return [self.field._lookup_method(ctype)(*args) for ctype, args in column_types]
10+
11+
def format(self, log_entry) -> list[str]:
12+
raise NotImplementedError
13+
14+
15+
# field("person.username", mask="U_d", drange=(100, 1000))

0 commit comments

Comments
 (0)