Skip to content

Commit b76edc8

Browse files
authored
Merge pull request #114 from forcedotcom/library-path
@W-22314458 align file finding functionality
2 parents ef67865 + 39f7c1a commit b76edc8

4 files changed

Lines changed: 214 additions & 114 deletions

File tree

README.md

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,8 @@ The SDK automatically handles all dependency packaging for Data Cloud deployment
124124
├── payload
125125
│ ├── config.json
126126
│ ├── entrypoint.py
127-
├── files
128-
│ ├── data.csv
127+
├── files
128+
├── data.csv
129129
```
130130

131131
## py-files directory
@@ -137,18 +137,18 @@ Your Python dependencies can be packaged as .py files, .zip archives (containing
137137
├── payload
138138
│ ├── config.json
139139
│ ├── entrypoint.py
140-
├── py-files
141-
│ ├── moduleA
142-
│ │ ├── __init__.py
143-
│ │ ├── moduleA.py
140+
├── py-files
141+
├── moduleA
142+
│ │ ├── __init__.py
143+
│ │ ├── moduleA.py
144144
```
145145

146146
## API
147147

148148
Your entry point script will define logic using the `Client` object which wraps data access layers.
149149

150150
You should only need the following methods:
151-
* `find_file_path(file_name)` - Returns a file path
151+
* `find_file_path(file_name)` – Resolve a bundled file (placed under `payload/files/`) to a `pathlib.Path` that exists. Works the same locally and inside Data Cloud — see [Bundled file resolution](#bundled-file-resolution) below for the full lookup order. Raises `FileNotFoundError` if the file isn't found.
152152
* `read_dlo(name)` – Read from a Data Lake Object by name
153153
* `read_dmo(name)` – Read from a Data Model Object by name
154154
* `write_to_dlo(name, spark_dataframe, write_mode)` – Write to a Data Model Object by name with a Spark dataframe
@@ -169,6 +169,24 @@ client.write_to_dlo('output_DLO')
169169
> [!WARNING]
170170
> Currently we only support reading from DMOs and writing to DMOs or reading from DLOs and writing to DLOs, but they cannot mix.
171171
172+
### Bundled file resolution
173+
174+
Place bundled files (CSVs, prompt files, etc.) under `payload/files/`. The same `client.find_file_path("data.csv")` call resolves consistently across all three runtimes:
175+
176+
- `datacustomcode run` (local) → `<cwd>/payload/files/data.csv`
177+
- Data Cloud script package → `$LIBRARY_PATH/files/data.csv`
178+
- Data Cloud function package → `$LIBRARY_PATH/files/data.csv`
179+
180+
Resolution order (first existing path wins):
181+
182+
1. `$LIBRARY_PATH/files/<file_name>`, then `$LIBRARY_PATH/<file_name>` — when `LIBRARY_PATH` is set. Data Cloud sets this for you to the package root.
183+
2. `payload/files/<file_name>` relative to the current working directory.
184+
3. `<config_dir>/files/<file_name>` where `<config_dir>` is the directory of the nearest `config.json` discoverable by walking down from cwd.
185+
186+
If none of these exist, `find_file_path` raises `FileNotFoundError` with the list of paths it tried.
187+
188+
`$LIBRARY_PATH` is set automatically to the root of the package at runtime inside Data Cloud.
189+
172190

173191
## CLI
174192

src/datacustomcode/client.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,8 +289,38 @@ def write_to_dmo(
289289
return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs) # type: ignore[no-any-return]
290290

291291
def find_file_path(self, file_name: str) -> Path:
292-
"""Return a file path"""
292+
"""Resolve a bundled file shipped in the package to an absolute path.
293+
294+
Resolution order (first existing path wins):
295+
296+
1. ``$LIBRARY_PATH/<file_folder>/<file_name>`` then
297+
``$LIBRARY_PATH/<file_name>`` — when the ``LIBRARY_PATH`` environment
298+
variable is set. The Data Cloud runtime sets this to the directory
299+
containing the extracted package.
300+
2. ``<code_package>/<file_folder>/<file_name>`` relative to the current
301+
working directory — the default ``payload/files/<file_name>`` layout
302+
used by ``datacustomcode run`` from a project root.
303+
3. ``<config_dir>/<file_folder>/<file_name>`` where ``<config_dir>`` is
304+
the directory containing the nearest ``config.json`` discoverable
305+
by walking the cwd subtree.
306+
307+
``LIBRARY_PATH`` must point to the directory that *contains*
308+
``files/`` — i.e., the package root, the same directory that holds
309+
``config.json`` and ``entrypoint.py``. See ``Bundled file resolution``
310+
in the ``readme.md`` for more details.
293311
312+
Args:
313+
file_name: A file under the package's ``files/`` folder. Relative
314+
subpaths (e.g., ``"file/data2.csv"``) are supported.
315+
316+
Returns:
317+
A ``pathlib.Path`` that exists.
318+
319+
Raises:
320+
FileNotFoundError: If the file does not exist at any of the
321+
resolution-order locations. The message lists every candidate
322+
path that was tried.
323+
"""
294324
return self._file.find_file_path(file_name) # type: ignore[no-any-return]
295325

296326
def llm_gateway_generate_text(

src/datacustomcode/file/path/default.py

Lines changed: 27 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import os
1818
from pathlib import Path
19-
from typing import Optional
19+
from typing import Iterator, Optional
2020

2121
from datacustomcode.file.base import BaseDataAccessLayer
2222

@@ -66,54 +66,48 @@ def find_file_path(self, file_name: str) -> Path:
6666
file_name: The name of the file to open
6767
6868
Returns:
69-
A file path
69+
A file path that exists
7070
7171
Raises:
7272
FileNotFoundError: If the file cannot be found
7373
"""
7474
if not file_name:
7575
raise ValueError("file_name cannot be empty")
7676

77-
file_path = self._resolve_file_path(file_name)
77+
tried: list[Path] = []
78+
for candidate in self._candidate_paths(file_name):
79+
tried.append(candidate)
80+
if candidate.exists():
81+
return candidate
7882

79-
if not file_path.exists():
80-
raise FileNotFoundError(
81-
f"File '{file_name}' not found in any search location"
82-
)
83+
raise FileNotFoundError(
84+
f"File '{file_name}' not found in any search location. "
85+
f"Tried: {[str(p) for p in tried]}"
86+
)
8387

84-
return file_path
85-
86-
def _resolve_file_path(self, file_name: str) -> Path:
87-
"""Resolve the full path to a file.
88+
def _candidate_paths(self, file_name: str) -> Iterator[Path]:
89+
"""Yield candidate paths for ``file_name`` in resolution order.
8890
8991
Args:
9092
file_name: The name of the file to resolve
9193
9294
Returns:
93-
The full path to the file
95+
An iterator of candidate paths
9496
"""
95-
# First check if environment variable is set
97+
# 1. $LIBRARY_PATH/<file_folder>/<file_name>, then $LIBRARY_PATH/<file_name>
9698
env_path = os.getenv(self.DEFAULT_ENV_VAR)
9799
if env_path:
98-
file_path = Path(env_path) / file_name
99-
if file_path.exists():
100-
return file_path
100+
yield Path(env_path) / self.file_folder / file_name
101+
yield Path(env_path) / file_name
101102

102-
# First try the default code package location
103+
# 2. <code_package>/<file_folder>/<file_name> relative to cwd
103104
if self._code_package_exists():
104-
file_path = self._get_code_package_file_path(file_name)
105-
if file_path.exists():
106-
return file_path
105+
yield self._get_code_package_file_path(file_name)
107106

108-
# Fall back to config.json-based location
107+
# 3. <config_dir>/<file_folder>/<file_name> via config.json discovery
109108
config_path = self._find_config_file()
110-
if config_path:
111-
file_path = self._get_config_based_file_path(file_name, config_path)
112-
if file_path.exists():
113-
return file_path
114-
115-
# Return the file name as a Path if not found in any location
116-
return Path(file_name)
109+
if config_path is not None:
110+
yield self._get_config_based_file_path(file_name, config_path)
117111

118112
def _code_package_exists(self) -> bool:
119113
"""Check if the default code package directory exists.
@@ -146,15 +140,18 @@ def _find_config_file(self) -> Optional[Path]:
146140
def _get_config_based_file_path(self, file_name: str, config_path: Path) -> Path:
147141
"""Get the file path relative to the config file location.
148142
143+
Anchors on the directory containing the discovered ``config.json`` so a
144+
package found by walking up from cwd resolves files relative to its own
145+
root, not the caller's cwd.
146+
149147
Args:
150148
file_name: The name of the file
151149
config_path: The path to the config file
152150
153151
Returns:
154152
The full path to the file
155153
"""
156-
relative_path = f"{self.file_folder}/{file_name}"
157-
return Path(relative_path)
154+
return config_path.parent / self.file_folder / file_name
158155

159156
def _find_file_in_tree(self, filename: str, search_path: Path) -> Optional[Path]:
160157
"""Find a file within a directory tree.

0 commit comments

Comments
 (0)