@@ -8,18 +8,34 @@ In the future the same will be used to:
88- Store PyPI index query results as facts in the MODULE.bazel.lock file
99"""
1010
11- def pypi_cache (store = None ):
11+ load (":version_from_filename.bzl" , "version_from_filename" )
12+
13+ _FACT_VERSION = "v1"
14+
15+ def pypi_cache (module_ctx = None , store = None ):
1216 """The cache for PyPI index queries.
1317
1418 Currently the key is of the following structure:
15- (url, real_url)
19+ (url, real_url, versions)
20+
21+ Args:
22+ module_ctx: The module context
23+ store: The in-memory store, should implement dict interface for get and setdefault
24+
25+ Returns:
26+ A cache struct
1627 """
28+ mcache = memory_cache (store )
29+ facts = {}
30+ fcache = facts_cache (getattr (module_ctx , "facts" , None ), facts )
1731
1832 # buildifier: disable=uninitialized
1933 self = struct (
20- _store = store or {},
34+ _mcache = mcache ,
35+ _facts = fcache ,
2136 setdefault = lambda key , parsed_result : _pypi_cache_setdefault (self , key , parsed_result ),
2237 get = lambda key : _pypi_cache_get (self , key ),
38+ get_facts = lambda : _get_facts (facts ),
2339 )
2440
2541 # buildifier: enable=uninitialized
@@ -40,7 +56,13 @@ def _pypi_cache_setdefault(self, key, parsed_result):
4056 Returns:
4157 The `parse_result`.
4258 """
43- return self ._store .setdefault (key , parsed_result )
59+ index_url , real_url , versions = key
60+ self ._mcache .setdefault (real_url , None , parsed_result )
61+ if not versions or not self ._facts :
62+ return parsed_result
63+
64+ filtered = _filter_packages (parsed_result , versions )
65+ return self ._facts .setdefault (index_url , filtered )
4466
4567def _pypi_cache_get (self , key ):
4668 """Return the parsed result from the cache.
@@ -52,4 +74,163 @@ def _pypi_cache_get(self, key):
5274 Returns:
5375 The {type}`struct` or `None` based on if the result is in the cache or not.
5476 """
55- return self ._store .get (key )
77+ index_url , real_url , versions = key
78+ cached = self ._mcache .get (real_url , versions )
79+ if not self ._facts :
80+ return cached
81+
82+ if not cached and versions :
83+ # Could not get from in-memory, read from lockfile facts
84+ cached = self ._facts .get (index_url , versions )
85+
86+ return cached
87+
88+ def _get_facts (facts ):
89+ return facts
90+
91+ def memory_cache (cache = None ):
92+ """SimpleAPI cache for making fewer calls.
93+
94+ Args:
95+ cache: the storage to store things in memory.
96+
97+ Returns:
98+ struct with 2 methods, `get` and `setdefault`.
99+ """
100+ if cache == None :
101+ cache = {}
102+
103+ return struct (
104+ get = lambda real_url , versions : _filter_packages (cache .get (real_url ), versions ),
105+ setdefault = lambda real_url , versions , value : _filter_packages (cache .get (real_url ), versions ),
106+ )
107+
108+ def _filter_packages (dists , requested_versions ):
109+ if dists == None :
110+ return None
111+
112+ if not requested_versions :
113+ return dists
114+
115+ sha256s_by_version = {}
116+ whls = {}
117+ sdists = {}
118+
119+ for sha256 , d in dists .sdists .items ():
120+ if d .version not in requested_versions :
121+ continue
122+
123+ sdists [sha256 ] = d
124+ sha256s_by_version .setdefault (d .version , []).append (sha256 )
125+
126+ for sha256 , d in dists .whls .items ():
127+ if d .version not in requested_versions :
128+ continue
129+
130+ whls [sha256 ] = d
131+ sha256s_by_version .setdefault (d .version , []).append (sha256 )
132+
133+ if not whls and not sdists :
134+ # TODO @aignas 2026-03-08: add logging
135+ #print("WARN: no dists matched for versions {}".format(requested_versions))
136+ return None
137+
138+ return struct (
139+ whls = whls ,
140+ sdists = sdists ,
141+ sha256s_by_version = sha256s_by_version ,
142+ )
143+
144+ def facts_cache (known_facts , facts , facts_version = _FACT_VERSION ):
145+ if known_facts == None :
146+ return None
147+
148+ return struct (
149+ get = lambda index_url , versions : _get_from_facts (
150+ facts ,
151+ known_facts ,
152+ index_url ,
153+ versions ,
154+ facts_version ,
155+ ),
156+ setdefault = lambda url , value : _store_facts (facts , facts_version , url , value ),
157+ known_facts = known_facts ,
158+ facts = facts ,
159+ )
160+
161+ def _get_from_facts (facts , known_facts , index_url , requested_versions , facts_version ):
162+ if known_facts .get ("fact_version" ) != facts_version :
163+ # cannot trust known facts, different version that we know how to parse
164+ return None
165+
166+ known_sources = {}
167+
168+ root_url , _ , distribution = index_url .rstrip ("/" ).rpartition ("/" )
169+ distribution = distribution .rstrip ("/" )
170+ root_url = root_url .rstrip ("/" )
171+
172+ for url , sha256 in known_facts .get ("dist_hashes" , {}).get (root_url , {}).get (distribution , {}).items ():
173+ filename = known_facts .get ("dist_filenames" , {}).get (root_url , {}).get (distribution , {}).get (sha256 )
174+ if not filename :
175+ _ , _ , filename = url .rpartition ("/" )
176+
177+ version = version_from_filename (filename )
178+ if version not in requested_versions :
179+ # TODO @aignas 2026-01-21: do the check by requested shas at some point
180+ # We don't have sufficient info in the lock file, need to call the API
181+ #
182+ continue
183+
184+ if filename .endswith (".whl" ):
185+ dists = known_sources .setdefault ("whls" , {})
186+ else :
187+ dists = known_sources .setdefault ("sdists" , {})
188+
189+ known_sources .setdefault ("sha256s_by_version" , {}).setdefault (version , []).append (sha256 )
190+
191+ dists .setdefault (sha256 , struct (
192+ sha256 = sha256 ,
193+ filename = filename ,
194+ version = version ,
195+ url = url ,
196+ yanked = known_facts .get ("dist_yanked" , {}).get (root_url , {}).get (distribution , {}).get (sha256 , "" ),
197+ ))
198+
199+ if not known_sources :
200+ # We found nothing in facts
201+ return None
202+
203+ output = struct (
204+ whls = known_sources .get ("whls" , {}),
205+ sdists = known_sources .get ("sdists" , {}),
206+ sha256s_by_version = known_sources .get ("sha256s_by_version" , {}),
207+ )
208+
209+ # Persist these facts for the next run because we have used them.
210+ return _store_facts (facts , facts_version , index_url , output )
211+
212+ def _store_facts (facts , fact_version , index_url , value ):
213+ """Store values as facts in the lock file.
214+
215+ The main idea is to ensure that the lock file is small and it is only storing what
216+ we would need to fetch from the internet. Any derivative information we can
217+ from this that can be achieved using pure Starlark functions should be done in
218+ Starlark.
219+ """
220+ if not value :
221+ return value
222+
223+ facts ["fact_version" ] = fact_version
224+
225+ root_url , _ , distribution = index_url .rstrip ("/" ).rpartition ("/" )
226+ distribution = distribution .rstrip ("/" )
227+ root_url = root_url .rstrip ("/" )
228+
229+ for sha256 , d in (value .sdists | value .whls ).items ():
230+ facts .setdefault ("dist_hashes" , {}).setdefault (root_url , {}).setdefault (distribution , {}).setdefault (d .url , sha256 )
231+ if not d .url .endswith (d .filename ):
232+ facts .setdefault ("dist_filenames" , {}).setdefault (root_url , {}).setdefault (distribution , {}).setdefault (d .url , d .filename )
233+ if d .yanked :
234+ facts .setdefault ("dist_yanked" , {}).setdefault (root_url , {}).setdefault (distribution , {}).setdefault (sha256 , d .yanked )
235+
236+ return value
0 commit comments