Skip to content

Commit 849ed1c

Browse files
authored
Merge pull request #82 from eviltester/69-add-streaming-output-for-all-export-formats
more streaming output
2 parents 9125ca6 + 063c647 commit 849ed1c

11 files changed

Lines changed: 373 additions & 53 deletions

File tree

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,13 @@ Notes:
359359
- for JSON requests, newline characters inside string literals must be escaped as `\\n`
360360
- use `/v1/generate/fromschema` when you want to paste raw multiline text directly
361361

362+
Streaming notes for CLI/core generation:
363+
364+
- stream mode supports: `csv`, `jsonl`, `dsv`, `json`, `xml`
365+
- `json` stream mode emits a valid JSON array payload
366+
- incompatible format options in stream mode are warned and ignored where needed
367+
- REST API generation endpoints currently run in buffered mode
368+
362369
OpenAPI spec:
363370

364371
`GET http://localhost:3000/v1/openapi.json`

apps/api/src/openapi.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ const openApiDocument = {
3232
'/v1/generate': {
3333
post: {
3434
summary: 'Generate data from text spec with selectable response shape',
35+
description:
36+
'Generation requests are currently processed in buffered mode for all formats. Stream-mode behavior is available in the core helper and CLI, not this REST endpoint.',
3537
requestBody: {
3638
required: true,
3739
content: {

apps/cli/README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,15 @@ Streaming is currently supported for:
8383

8484
- `csv`
8585
- `jsonl`
86+
- `dsv`
87+
- `json`
88+
- `xml`
8689

87-
Other formats use buffered generation.
90+
Notes:
91+
92+
- `json` stream mode emits a valid JSON array payload.
93+
- For stream-mode option mismatches, generation continues and warnings are reported when applicable.
94+
- Other formats use buffered generation.
8895

8996
For `amend`, streaming flags are accepted for compatibility but ignored (always buffered).
9097

@@ -93,6 +100,9 @@ Examples:
93100
```bash
94101
anywaydata generate -i ./apps/cli/examples/company-literal.txt -n 100000 -f jsonl -o output.jsonl --stream
95102
anywaydata generate -i ./apps/cli/examples/company-literal.txt -n 100000 -f csv -o output.csv --stream-threshold 1000
103+
anywaydata generate -i ./apps/cli/examples/company-literal.txt -n 100000 -f dsv -o output.dsv --stream
104+
anywaydata generate -i ./apps/cli/examples/company-literal.txt -n 100000 -f json -o output.json --stream
105+
anywaydata generate -i ./apps/cli/examples/company-literal.txt -n 100000 -f xml -o output.xml --stream
96106
```
97107

98108
## Spec File Format

apps/cli/src/cli-options.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ export function parseCliOptions(argvInput = process.argv) {
3737
.option('stream', {
3838
type: 'boolean',
3939
default: false,
40-
describe: 'Use streaming generation path when available (exporting CSV or JSONL)',
40+
describe: 'Use streaming generation path when available (exporting CSV, JSONL, DSV, JSON or XML)',
4141
})
4242
.option('stream-threshold', {
4343
type: 'number',

apps/cli/src/run-cli.js

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ export async function runCliCommand({ options, platform }) {
106106
}
107107
}
108108

109-
if (useStreamMode && (outputFormat === 'csv' || outputFormat === 'jsonl')) {
109+
if (useStreamMode && ['csv', 'jsonl', 'dsv', 'json', 'xml'].includes(outputFormat)) {
110110
const streamedLines = [];
111111
const writer = options.outputFile ? platform.createLineWriter(options.outputFile) : null;
112112
let writerClosed = false;
@@ -133,6 +133,11 @@ export async function runCliCommand({ options, platform }) {
133133
}
134134

135135
progress(streamResult.diagnostics.report);
136+
if (Array.isArray(streamResult.diagnostics?.warnings)) {
137+
for (const warning of streamResult.diagnostics.warnings) {
138+
progress(`WARNING: ${warning}`);
139+
}
140+
}
136141
if (options.testMode && streamResult.diagnostics.firstRow) {
137142
progress('e.g.');
138143
progress(JSON.stringify(streamResult.diagnostics.firstRow));

apps/cli/src/tests/integration.cli-params.test.js

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -102,28 +102,30 @@ test('param --show-progress false suppresses progress logs', () => {
102102
expect(result.stdout).toContain('"Company"');
103103
});
104104

105-
test('params --stream and --show-progress true use streaming path for csv/jsonl', async () => {
105+
test('params --stream and --show-progress true use streaming path for csv/jsonl/dsv/json/xml', async () => {
106106
const inputPath = path.join(repoRoot, 'apps', 'cli', 'examples', 'company-literal.txt');
107-
const outputPath = tempFile('stream');
108-
const result = runCli([
109-
'generate',
110-
'-i',
111-
inputPath,
112-
'-n',
113-
'2',
114-
'-f',
115-
'csv',
116-
'-o',
117-
outputPath,
118-
'--stream',
119-
'--show-progress',
120-
'true',
121-
]);
122-
expect(result.status).toBe(0);
123-
expect(result.stdout).toContain('using stream mode');
124-
125-
const written = await fs.readFile(outputPath, 'utf8');
126-
expect(written).toContain('"Company"');
107+
const formats = ['csv', 'jsonl', 'dsv', 'json', 'xml'];
108+
for (const format of formats) {
109+
const outputPath = tempFile(`stream-${format}`);
110+
const result = runCli([
111+
'generate',
112+
'-i',
113+
inputPath,
114+
'-n',
115+
'2',
116+
'-f',
117+
format,
118+
'-o',
119+
outputPath,
120+
'--stream',
121+
'--show-progress',
122+
'true',
123+
]);
124+
expect(result.status).toBe(0);
125+
expect(result.stdout).toContain('using stream mode');
126+
const written = await fs.readFile(outputPath, 'utf8');
127+
expect(written.length).toBeGreaterThan(0);
128+
}
127129
});
128130

129131
test('param --stream-threshold auto-enables stream mode when threshold reached', () => {

docs-src/docs/070-interfaces-and-deployment/030-rest-api.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,11 @@ Both endpoints generate data from the same schema language and output formats. T
148148
- `stream` is accepted for compatibility and ignored
149149
- `inputFormat` is normalized (trimmed and lower-cased), so values like `" csv "` are accepted
150150

151+
Generation mode behavior:
152+
153+
- REST generation endpoints currently run in buffered mode.
154+
- Stream-mode generation behavior is available via the core helper/CLI paths, not via `/v1/generate`.
155+
151156
## Schema Formatting
152157

153158
Schema text supports:

docs-src/docs/070-interfaces-and-deployment/050-cli-node-and-bun.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ Parameter guide for the examples:
4444
- `-o, --outputfile`: optional output file path. If omitted, output is written to stdout.
4545
- `-t, --testMode`: generate one row and print diagnostics for troubleshooting.
4646
- `--show-progress`: explicitly control progress logs (for example `--show-progress true` or `--show-progress false`).
47-
- `--stream`: enable streaming generation when supported (currently `csv` and `jsonl`).
47+
- `--stream`: enable streaming generation when supported (`csv`, `jsonl`, `dsv`, `json`, `xml`).
4848
- `--stream-threshold`: auto-enable streaming when `rowCount >= threshold` and `--outputfile` is set (default `5000`).
4949
- `--unsafe-faker-expressions`: opt-in to expression-style faker arguments (unsafe for untrusted input).
5050
- `--help`: show CLI usage and options.
@@ -65,7 +65,9 @@ Schema text supports:
6565
- on for stdout mode (no `--outputfile`)
6666
- on for `--testMode`
6767
- off for file output unless `--show-progress true` is provided
68-
- Streaming is currently implemented for `csv` and `jsonl` exports. Other formats use buffered generation.
68+
- Streaming is currently implemented for `csv`, `jsonl`, `dsv`, `json`, and `xml` exports. Other formats use buffered generation.
69+
- JSON stream mode emits a valid JSON array payload.
70+
- If a stream mode cannot honor some format options, generation continues and warnings are reported.
6971
- `amend` always uses buffered generation; stream flags are ignored for this command.
7072
- Auto-streaming (`--stream-threshold`) applies only when writing to a file. For stdout workflows, use `--stream` explicitly.
7173
- For `amend`, if `-n/--numberOfLines` is omitted, all imported rows are amended.

package-lock.json

Lines changed: 12 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)