Skip to content

Commit 786375a

Browse files
tomvdwThe TensorFlow Datasets Authors
authored andcommitted
Do not first store all generated examples to memory before writing them
PiperOrigin-RevId: 820349120
1 parent 74ea045 commit 786375a

1 file changed

Lines changed: 9 additions & 3 deletions

File tree

tensorflow_datasets/core/writer.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -291,10 +291,16 @@ def write(
291291
path: epath.Path,
292292
) -> int:
293293
"""Returns the number of examples written to the given path."""
294-
serialized_examples = [(k, self._serialize_example(v)) for k, v in examples]
295-
self._example_writer.write(path=path, examples=serialized_examples)
294+
(for_writing, for_counting) = itertools.tee(examples, 2)
296295

297-
return len(serialized_examples)
296+
def serialize_examples() -> Iterator[type_utils.KeySerializedExample]:
297+
for k, v in for_writing:
298+
yield k, self._serialize_example(v)
299+
300+
self._example_writer.write(path=path, examples=serialize_examples())
301+
num_examples = sum(1 for _ in for_counting)
302+
303+
return num_examples
298304

299305
def write_with_beam(
300306
self,

0 commit comments

Comments
 (0)