Skip to content

Commit 20a232c

Browse files
style: improve code style
1 parent 155d896 commit 20a232c

2 files changed

Lines changed: 2 additions & 4 deletions

File tree

graphgen/models/reader/huggingface_reader.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,9 +191,7 @@ def _process_batch(batch: dict[str, "np.ndarray"]) -> dict[str, "np.ndarray"]:
191191

192192
# Rename text_column to 'content' if different
193193
if self.text_column != "content" and self.text_column in batch:
194-
batch["content"] = batch[self.text_column]
195-
# Optional: delete old key to avoid duplication
196-
# del batch[self.text_column]
194+
batch["content"] = batch.pop(self.text_column)
197195

198196
return batch
199197

graphgen/operators/read/read.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,11 @@ def _build_reader(suffix: str, cache_dir: str | None, **reader_kwargs):
5555
def _process_huggingface_datasets(hf_uris: List[str], reader_kwargs: dict) -> list:
5656
"""Process HuggingFace datasets and return list of Ray datasets."""
5757
logger.info("[READ] Processing HuggingFace datasets: %s", hf_uris)
58+
hf_reader = HuggingFaceReader(**reader_kwargs)
5859
read_tasks = []
5960
for hf_uri in hf_uris:
6061
# Parse URI format: "huggingface://dataset_name:subset:split"
6162
uri_part = hf_uri.replace("huggingface://", "")
62-
hf_reader = HuggingFaceReader(**reader_kwargs)
6363
ds = hf_reader.read(uri_part)
6464
read_tasks.append(ds)
6565
logger.info("[READ] Successfully loaded %d HuggingFace dataset(s)", len(hf_uris))

0 commit comments

Comments
 (0)