Skip to content

Commit 36f6846

Browse files
author
The TensorFlow Datasets Authors
committed
Conditionally pass run_post_process to hf_builder.as_dataset.
PiperOrigin-RevId: 911883251
1 parent 510d73d commit 36f6846

1 file changed

Lines changed: 13 additions & 3 deletions

File tree

tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from collections.abc import Mapping, Sequence
3030
import dataclasses
3131
import functools
32+
import inspect
3233
import itertools
3334
import multiprocessing
3435
import os
@@ -136,9 +137,18 @@ def _write_shard(
136137
def get_serialized_examples_iter():
137138
nonlocal num_bytes
138139
nonlocal num_exceptions
139-
dataset = hf_builder.as_dataset(
140-
split=shard_spec.shard_split, run_post_process=False
141-
)
140+
as_dataset_kwargs = dict(split=shard_spec.shard_split)
141+
# We dynamically construct the arguments because the 'run_post_process'
142+
# parameter was only added in Hugging Face 'datasets' 2.9.1. In some
143+
# environments, such as GitHub CI, an older version of the library
144+
# may be installed due to dependency resolution conflicts with
145+
# apache-beam.
146+
if (
147+
'run_post_process'
148+
in inspect.signature(hf_builder.as_dataset).parameters
149+
):
150+
as_dataset_kwargs['run_post_process'] = False
151+
dataset = hf_builder.as_dataset(**as_dataset_kwargs)
142152
for i in range(shard_spec.num_examples):
143153
try:
144154
hf_value = dataset[i]

0 commit comments

Comments
 (0)