Skip to content

Commit cd09e6c

Browse files
committed
fix: add path traversal validation to save/load datapoints endpoints
User-supplied paths in _save_datapoints and _load_datapoints are passed directly to file I/O without validation. An attacker can use path traversal sequences to read or write arbitrary files on the server. Add _validate_data_path() that resolves the path and ensures it stays within the configured data_dir.
1 parent 3debb60 commit cd09e6c

1 file changed

Lines changed: 14 additions & 0 deletions

File tree

lit_nlp/app.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,18 @@ def _reconstitute_inputs(
215215
)
216216
return [index[ex] if isinstance(ex, str) else ex for ex in inputs]
217217

218+
def _validate_data_path(self, path: str) -> str:
219+
"""Validate that a user-supplied path does not escape the data directory."""
220+
resolved = os.path.realpath(path)
221+
if self._data_dir:
222+
base = os.path.realpath(self._data_dir)
223+
if not resolved.startswith(base + os.sep) and resolved != base:
224+
raise ValueError(
225+
f'Path must be within data_dir ({self._data_dir})')
226+
elif '..' in os.path.normpath(path).split(os.sep):
227+
raise ValueError('Path traversal is not allowed')
228+
return resolved
229+
218230
def _save_datapoints(
219231
self,
220232
data,
@@ -231,6 +243,7 @@ def _save_datapoints(
231243
if self._demo_mode:
232244
logging.warning('Attempted to save datapoints in demo mode.')
233245
return None
246+
path = self._validate_data_path(path)
234247
return self._datasets[dataset_name].save(data['inputs'], path)
235248

236249
def _load_datapoints(
@@ -249,6 +262,7 @@ def _load_datapoints(
249262
if self._demo_mode:
250263
logging.warning('Attempted to load datapoints in demo mode.')
251264
return None
265+
path = self._validate_data_path(path)
252266
dataset = self._datasets[dataset_name].load(path)
253267
return dataset.indexed_examples
254268

0 commit comments

Comments
 (0)