22"""
33sparse-bootstrap.py
44
5- Given a seed module directory, walks its parent-chain and BOM-import graph,
6- adds the discovered dependency directories to the sparse checkout, and
7- installs them into ~/.m2 in dependency order.
5+ Given a seed module directory, walks its parent chain, BOM-import graph, and
6+ regular <dependency> edges that resolve to other poms in this monorepo, adds
7+ the discovered directories to the sparse checkout, and installs them into
8+ ~/.m2 in dependency order.
89
910Prerequisites:
1011 git clone --sparse git@github.com:googleapis/google-cloud-java.git
@@ -46,6 +47,7 @@ class GitBlob:
4647@dataclass (frozen = True )
4748class ParentRef :
4849 coord : Coord
50+ version : str
4951 relative_path : str # '' means <relativePath/> was explicit → fetch from ~/.m2
5052
5153
@@ -68,7 +70,9 @@ def ls_tree_poms() -> list[GitBlob]:
6870 results : list [GitBlob ] = []
6971 for line in out .splitlines ():
7072 meta , path = line .split ('\t ' , 1 )
71- if not path .endswith ('pom.xml' ):
73+ # Match the basename exactly — endswith('pom.xml') would also accept
74+ # things like src/test/resources/gax-example-pom.xml.
75+ if Path (path ).name != 'pom.xml' :
7276 continue
7377 _ , _ , sha = meta .split ()
7478 results .append (GitBlob (sha = sha , path = Path (path )))
@@ -123,6 +127,11 @@ def get_coordinates(root: ET.Element) -> Coord:
123127 return Coord (group_id = g , artifact_id = a )
124128
125129
130+ def get_version (root : ET .Element ) -> Optional [str ]:
131+ """The pom's own version (or inherited from <parent><version>)."""
132+ return child_text (root , 'version' ) or child_text (root , 'parent/version' )
133+
134+
126135def get_parent (root : ET .Element ) -> Optional [ParentRef ]:
127136 p = root .find (t ('parent' ))
128137 if p is None :
@@ -137,21 +146,41 @@ def get_parent(root: ET.Element) -> Optional[ParentRef]:
137146 group_id = child_text (p , 'groupId' ),
138147 artifact_id = child_text (p , 'artifactId' ),
139148 ),
149+ version = child_text (p , 'version' ) or '' ,
140150 relative_path = rel ,
141151 )
142152
143153
144- def get_bom_imports (root : ET .Element ) -> list [Coord ]:
145- """Coord for every scope=import dependency."""
146- results : list [Coord ] = []
154+ def get_bom_imports (root : ET .Element ) -> list [tuple [ Coord , str ] ]:
155+ """( Coord, version) for every scope=import dependency."""
156+ results : list [tuple [ Coord , str ] ] = []
147157 for dep in root .findall (
148158 f'.//{ t ("dependencyManagement" )} /{ t ("dependencies" )} /{ t ("dependency" )} '
149159 ):
150160 if child_text (dep , 'scope' ) == 'import' :
151161 g = child_text (dep , 'groupId' )
152162 a = child_text (dep , 'artifactId' )
153- if g and a :
154- results .append (Coord (group_id = g , artifact_id = a ))
163+ v = child_text (dep , 'version' )
164+ if g and a and v :
165+ results .append ((Coord (group_id = g , artifact_id = a ), v ))
166+ return results
167+
168+
169+ def get_regular_deps (root : ET .Element ) -> list [tuple [Coord , Optional [str ]]]:
170+ """(Coord, version) for every <dependency> under <dependencies>.
171+
172+ version is None when the dep relies on dependencyManagement to supply it.
173+ """
174+ results : list [tuple [Coord , Optional [str ]]] = []
175+ deps = root .find (t ('dependencies' ))
176+ if deps is None :
177+ return results
178+ for dep in deps .findall (t ('dependency' )):
179+ g = child_text (dep , 'groupId' )
180+ a = child_text (dep , 'artifactId' )
181+ v = child_text (dep , 'version' )
182+ if g and a :
183+ results .append ((Coord (group_id = g , artifact_id = a ), v ))
155184 return results
156185
157186
@@ -182,6 +211,27 @@ def enqueue(pom_path: Path, required_by: Optional[Path] = None) -> None:
182211 needed .add (pom_path )
183212 queue .append (pom_path )
184213
214+ def resolve_local (coord : Coord , declared_version : Optional [str ]) -> Optional [Path ]:
215+ """Local pom whose version matches the declared version, or None.
216+
217+ - No coord match in the monorepo → None.
218+ - declared_version is None (regular dep inheriting from depMgmt) → follow
219+ the local pom optimistically; we'd need full Maven evaluation to know
220+ the resolved version, and in this monorepo unversioned deps virtually
221+ always inherit from a SNAPSHOT BOM.
222+ - Either version contains a Maven property (${…}) → follow optimistically.
223+ - Otherwise the versions must equal.
224+ """
225+ local = coord_to_pom .get (coord )
226+ if local is None :
227+ return None
228+ if declared_version is None :
229+ return local
230+ local_version = get_version (ET .fromstring (pom_contents [local ])) or ''
231+ if '${' in declared_version or '${' in local_version :
232+ return local
233+ return local if local_version == declared_version else None
234+
185235 # Seed: every pom under seed_dir — pre-visited so they won't enter `needed`
186236 for path in pom_contents :
187237 if path .is_relative_to (seed_dir ):
@@ -203,17 +253,41 @@ def enqueue(pom_path: Path, required_by: Optional[Path] = None) -> None:
203253 if local_parent .name != 'pom.xml' :
204254 local_parent = local_parent / 'pom.xml'
205255 if local_parent in pom_contents :
206- resolved = local_parent
207- # fall back to coordinate lookup if relativePath missing or not found locally
208- if resolved is None and parent .coord in coord_to_pom :
209- resolved = coord_to_pom [parent .coord ]
256+ # Maven only uses the pom at <relativePath> if its coords AND
257+ # version match <parent>; otherwise it falls back to the repo.
258+ # The default ../pom.xml routinely resolves to an unrelated
259+ # repo-root pom.
260+ local_root = ET .fromstring (pom_contents [local_parent ])
261+ if (get_coordinates (local_root ) == parent .coord
262+ and (get_version (local_root ) or '' ) == parent .version ):
263+ resolved = local_parent
264+ # fall back to coordinate lookup if relativePath missing, not found
265+ # locally, or the local pom didn't match the declared parent
266+ if resolved is None :
267+ resolved = resolve_local (parent .coord , parent .version )
210268 if resolved is not None :
211269 enqueue (resolved , required_by = pom_path )
212270
213271 # Follow BOM imports
214- for coord in get_bom_imports (root ):
215- if coord in coord_to_pom :
216- enqueue (coord_to_pom [coord ], required_by = pom_path )
272+ for coord , version in get_bom_imports (root ):
273+ local = resolve_local (coord , version )
274+ if local is not None :
275+ enqueue (local , required_by = pom_path )
276+
277+ # Follow regular <dependency> edges that resolve to another pom in this repo
278+ for coord , version in get_regular_deps (root ):
279+ local = resolve_local (coord , version )
280+ if local is not None :
281+ enqueue (local , required_by = pom_path )
282+
283+ # Operational dep on the project's aggregator pom: we'll `cd <project>`
284+ # to run mvn, which forces Maven to load <project>/pom.xml regardless
285+ # of -pl, so its parent chain must already be installed. The aggregator
286+ # is often unrelated to the module's own parent chain (e.g. pubsub-bom
287+ # parents to a shared pom, not java-pubsub/pom.xml).
288+ project_root = Path (pom_path .parts [0 ]) / 'pom.xml'
289+ if project_root != pom_path and project_root in pom_contents :
290+ enqueue (project_root , required_by = pom_path )
217291
218292 return needed , dep_edges
219293
@@ -244,25 +318,29 @@ def visit(n: Path) -> None:
244318# ── install command generation ─────────────────────────────────────────────────
245319
246320def make_install_commands (sorted_poms : list [Path ]) -> list [InstallCommand ]:
247- """One mvn install command per top-level project, in dependency order."""
248- by_project : dict [str , list [str ]] = defaultdict (list )
249- project_order : list [str ] = []
250- seen_projects : set [str ] = set ()
321+ """One mvn install per consecutive run of poms in the same top-level project.
322+
323+ We can't collapse all poms under a project into a single mvn invocation:
324+ the topo order may interleave projects (e.g. A1, A2, B1, A3) when A3 has
325+ a cross-project dep on B1. Flushing on project change preserves the order.
326+ """
327+ groups : list [tuple [str , list [str ]]] = [] # [(project, [module_rel_paths])]
251328
252329 for pom_path in sorted_poms :
253330 project = pom_path .parts [0 ]
254- if project not in seen_projects :
255- seen_projects .add (project )
256- project_order .append (project )
257-
258331 pom_dir = pom_path .parent
259332 rel = str (pom_dir .relative_to (project )) if pom_dir != Path (project ) else '.'
260- if rel not in by_project [project ]:
261- by_project [project ].append (rel )
333+
334+ if groups and groups [- 1 ][0 ] == project :
335+ modules = groups [- 1 ][1 ]
336+ if rel not in modules :
337+ modules .append (rel )
338+ else :
339+ groups .append ((project , [rel ]))
262340
263341 commands : list [InstallCommand ] = []
264- for project in project_order :
265- sub_modules = [m for m in by_project [ project ] if m != '.' ]
342+ for project , modules in groups :
343+ sub_modules = [m for m in modules if m != '.' ]
266344 cmd = ['mvn' , 'install' , '-T' , '1C' , '-DskipTests' , '-P' , 'quick-build' ]
267345 if sub_modules :
268346 for m in sub_modules :
0 commit comments