Skip to content

Commit 62a2863

Browse files
committed
tugger-debian: refactor HTTP client path handling
Our assumption that the root URL was 2 directories up from the distribution URL was incorrect. In fact, the distribution path can have as many path/subdirectory components as wanted. This commit refactors the HTTP client to support arbitrary path layouts.
1 parent 0a65803 commit 62a2863

1 file changed

Lines changed: 139 additions & 93 deletions

File tree

  • tugger-debian/src/repository

tugger-debian/src/repository/http.rs

Lines changed: 139 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,28 @@
22
// License, v. 2.0. If a copy of the MPL was not distributed with this
33
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
44

5+
/*! Debian repository HTTP client.
6+
7+
This module provides functionality for interfacing with HTTP based Debian
8+
repositories.
9+
10+
See <https://wiki.debian.org/DebianRepository/Format> for a definition of a
11+
Debian repository layout. Essentially, there's a root URL. Under that root URL
12+
are `dists/<distribution>/` directories. Each of these directories (which can
13+
have multiple path separators) has an `InRelease` and/or `Release` file. These
14+
files define the contents of a given *distribution*. This includes which
15+
architectures are supported, what *components* are available, etc.
16+
17+
Our [HttpRepositoryClient] models a client bound to a root URL.
18+
19+
Our [HttpDistributionClient] models a client bound to a virtual sub-directory
20+
under the root URL. You can obtain instances by calling [HttpRepositoryClient.distribution_client()].
21+
22+
The `InRelease`/`Release` files define the contents of a given *distribution*. Our
23+
[HttpReleaseClient] models a client bound to a parsed file. You can obtain instances
24+
by calling [HttpDistributionClient.fetch_inrelease()].
25+
*/
26+
527
use {
628
crate::{
729
binary_package_control::BinaryPackageControlFile,
@@ -46,24 +68,38 @@ pub enum HttpError {
4668
PackagesIndicesEntryNotFound,
4769
}
4870

71+
async fn transform_http_response(
72+
res: Response,
73+
compression: IndexFileCompression,
74+
) -> Result<Pin<Box<dyn AsyncRead>>, HttpError> {
75+
let stream = res
76+
.bytes_stream()
77+
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)));
78+
79+
Ok(match compression {
80+
IndexFileCompression::None => Box::pin(stream.into_async_read()),
81+
IndexFileCompression::Gzip => Box::pin(GzipDecoder::new(stream.into_async_read())),
82+
IndexFileCompression::Xz => Box::pin(XzDecoder::new(stream.into_async_read())),
83+
IndexFileCompression::Bzip2 => Box::pin(BzDecoder::new(stream.into_async_read())),
84+
IndexFileCompression::Lzma => Box::pin(LzmaDecoder::new(stream.into_async_read())),
85+
})
86+
}
87+
4988
/// Client for a Debian repository served via HTTP.
5089
///
5190
/// Instances are bound to a base URL, which represents the base directory.
52-
/// That URL should have an `InRelease` or `Release` file under it. From
53-
/// that main entrypoint, all other repository state can be discovered and
54-
/// retrieved.
91+
///
92+
/// Distributions (typically) exist in a `dists/<distribution>` directory.
93+
/// Distributions have an `InRelease` and/or `Release` file under it.
5594
#[derive(Debug)]
5695
pub struct HttpRepositoryClient {
5796
/// HTTP client to use.
5897
client: Client,
5998

60-
/// Base URL for Debian projects.
99+
/// Base URL for this Debian archive.
61100
///
62-
/// Pool paths are relative to this.
63-
debian_base_url: Url,
64-
65-
/// Base URL for this repository (where the `InRelease` file is).
66-
repository_url: Url,
101+
/// Contains both distributions and the files pool.
102+
root_url: Url,
67103
}
68104

69105
impl HttpRepositoryClient {
@@ -74,110 +110,99 @@ impl HttpRepositoryClient {
74110

75111
/// Construct an instance using the given [Client] and URL.
76112
///
77-
/// The URL should have an `InRelease` or `Release` file under it.
113+
/// The given URL should be the value that follows the
114+
/// `deb` line in apt sources files. e.g. for
115+
/// `deb https://deb.debian.org/debian stable main`, the value would be
116+
/// `https://deb.debian.org/debian`. The URL typically has a `dists/` directory
117+
/// underneath.
78118
pub fn new_client(client: Client, url: impl IntoUrl) -> Result<Self, HttpError> {
79-
let repository_url = url.into_url()?;
80-
81-
// Pool paths are relative to what's known as the Debian base URL, which is
82-
// typically 2 path components up from where we are.
83-
let debian_base_url = repository_url
84-
.join("../..")
85-
.unwrap_or_else(|_| repository_url.clone());
86-
87-
Ok(Self {
88-
client,
89-
debian_base_url,
90-
repository_url,
91-
})
92-
}
93-
94-
/// Debian base URL for this fetcher.
95-
pub fn debian_base_url(&self) -> &Url {
96-
&self.debian_base_url
97-
}
119+
let mut root_url = url.into_url()?;
98120

99-
/// Set the Debian base URL for this fetcher.
100-
///
101-
/// This is the directory from which package/pool paths are relative to. It is
102-
/// typically 2 directory levels up from where the `InRelease` file is located.
103-
pub fn set_debian_base_url(&mut self, url: impl IntoUrl) -> Result<(), HttpError> {
104-
self.debian_base_url = url.into_url()?;
121+
// Trailing URLs are significant to the Url type when we .join(). So ensure
122+
// the URL has a trailing path.
123+
if !root_url.path().ends_with('/') {
124+
root_url.set_path(&format!("{}/", root_url.path()));
125+
}
105126

106-
Ok(())
127+
Ok(Self { client, root_url })
107128
}
108129

109-
/// Repository URL for this fetcher.
110-
pub fn repository_url(&self) -> &Url {
111-
&self.repository_url
130+
/// Base URL for this fetcher.
131+
pub fn root_url(&self) -> &Url {
132+
&self.root_url
112133
}
113134

114-
async fn transform_http_response(
115-
res: Response,
116-
compression: IndexFileCompression,
117-
) -> Result<Pin<Box<dyn AsyncRead>>, HttpError> {
118-
let stream = res
119-
.bytes_stream()
120-
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)));
121-
122-
Ok(match compression {
123-
IndexFileCompression::None => Box::pin(stream.into_async_read()),
124-
IndexFileCompression::Gzip => Box::pin(GzipDecoder::new(stream.into_async_read())),
125-
IndexFileCompression::Xz => Box::pin(XzDecoder::new(stream.into_async_read())),
126-
IndexFileCompression::Bzip2 => Box::pin(BzDecoder::new(stream.into_async_read())),
127-
IndexFileCompression::Lzma => Box::pin(LzmaDecoder::new(stream.into_async_read())),
128-
})
129-
}
130-
131-
/// Perform an HTTP GET for a path relative to the Debian root directory.
132-
///
133-
/// This is typically called to retrieve non-index files (e.g. .deb packages).
134-
pub async fn get_debian_path(&self, path: &str) -> Result<Response, HttpError> {
135-
let url = self.debian_base_url.join(path)?;
136-
let res = self.client.get(url).send().await?;
135+
/// Perform an HTTP GET for a path relative to the root directory/URL.
136+
pub async fn get_path(&self, path: &str) -> Result<Response, HttpError> {
137+
let res = self.client.get(self.root_url.join(path)?).send().await?;
137138

138139
Ok(res.error_for_status()?)
139140
}
140141

141-
/// Perform an HTTP GET for a path relative to the Debian root directory.
142+
/// Perform an HTTP GET for a path relative to the root directory/URL.
142143
///
143144
/// This transforms the response to an async reader for reading the HTTP response body.
144-
pub async fn get_debian_path_reader(
145+
pub async fn get_path_reader(
145146
&self,
146147
path: &str,
147148
compression: IndexFileCompression,
148149
) -> Result<Pin<Box<dyn AsyncRead>>, HttpError> {
149-
let res = self.get_debian_path(path).await?;
150+
let res = self.get_path(path).await?;
150151

151-
Self::transform_http_response(res, compression).await
152+
transform_http_response(res, compression).await
152153
}
153154

154-
/// Perform an HTTP GET for a path relative to the repository root directory.
155-
pub async fn get_repository_path(&self, path: &str) -> Result<Response, HttpError> {
156-
let url = self.repository_url.join(path)?;
157-
let res = self.client.get(url).send().await?;
158-
159-
Ok(res.error_for_status()?)
155+
/// Obtain a [HttpDistributionClient] for a given distribution name/path.
156+
///
157+
/// The returned client has its root URL set to `self.root_url().join("dists/{distribution}")`.
158+
pub fn distribution_client(&self, distribution: &str) -> HttpDistributionClient<'_> {
159+
HttpDistributionClient {
160+
root_client: self,
161+
distribution_path: format!("dists/{}", distribution.trim_matches('/')),
162+
}
160163
}
161164

162-
/// Perform an HTTP GET for a path relative to the repository root directory.
165+
/// Obtain a [HttpDistributionClient] for a given sub-directory.
163166
///
164-
/// Returns a reader that can be used to read HTTP response body payload, with an
165-
/// optional decompression content transformation transparently applied.
166-
pub async fn get_repository_path_reader(
167-
&self,
168-
path: &str,
169-
compression: IndexFileCompression,
170-
) -> Result<Pin<Box<dyn AsyncRead>>, HttpError> {
171-
let res = self.get_repository_path(path).await?;
167+
/// The root URL of the returned client is `self.root_url().join(path)`, without
168+
/// `dists/` prepended. This allows specifying non-standard paths to the distribution.
169+
pub fn distribution_client_raw_path(&self, path: &str) -> HttpDistributionClient<'_> {
170+
HttpDistributionClient {
171+
root_client: self,
172+
distribution_path: path.trim_matches('/').to_string(),
173+
}
174+
}
175+
}
176+
177+
fn join_path(a: &str, b: &str) -> String {
178+
format!("{}/{}", a.trim_matches('/'), b.trim_start_matches('/'))
179+
}
180+
181+
/// An HTTP client bound to a specific distribution.
182+
///
183+
/// Debian repositories have the form `<root>/dists/<distribution>/` where the
184+
/// *distribution* directory contains an `InRelease` and/or `Release` file.
185+
///
186+
/// This type models a client interface to a specific distribution path under a root
187+
/// directory.
188+
pub struct HttpDistributionClient<'client> {
189+
root_client: &'client HttpRepositoryClient,
190+
distribution_path: String,
191+
}
172192

173-
Self::transform_http_response(res, compression).await
193+
impl<'client> HttpDistributionClient<'client> {
194+
/// Perform an HTTP GET for a path relative to the distribution's root directory.
195+
pub async fn get_path(&self, path: &str) -> Result<Response, HttpError> {
196+
self.root_client
197+
.get_path(&join_path(&self.distribution_path, path))
198+
.await
174199
}
175200

176201
/// Fetch and parse the `InRelease` file from the repository.
177202
///
178203
/// Returns a new object bound to the parsed `InRelease` file.
179-
pub async fn fetch_inrelease(&self) -> Result<HttpReleaseClient<'_>, HttpError> {
180-
let res = self.get_repository_path("InRelease").await?;
204+
pub async fn fetch_inrelease(&self) -> Result<HttpReleaseClient<'client>, HttpError> {
205+
let res = self.get_path("InRelease").await?;
181206

182207
let data = res.bytes().await?;
183208

@@ -192,7 +217,8 @@ impl HttpRepositoryClient {
192217
let fetch_compression = IndexFileCompression::Xz;
193218

194219
Ok(HttpReleaseClient {
195-
base_client: self,
220+
root_client: self.root_client,
221+
distribution_path: self.distribution_path.clone(),
196222
release,
197223
fetch_checksum: **fetch_checksum,
198224
fetch_compression,
@@ -202,7 +228,8 @@ impl HttpRepositoryClient {
202228

203229
/// Repository HTTP client bound to a parsed `Release` or `InRelease` file.
204230
pub struct HttpReleaseClient<'client> {
205-
base_client: &'client HttpRepositoryClient,
231+
root_client: &'client HttpRepositoryClient,
232+
distribution_path: String,
206233
release: ReleaseFile<'static>,
207234
/// Which checksum flavor to fetch and verify.
208235
fetch_checksum: ChecksumType,
@@ -216,6 +243,26 @@ impl<'client> AsRef<ReleaseFile<'static>> for HttpReleaseClient<'client> {
216243
}
217244

218245
impl<'client> HttpReleaseClient<'client> {
246+
/// Perform an HTTP GET for a path relative to the distribution's root directory.
247+
pub async fn get_path(&self, path: &str) -> Result<Response, HttpError> {
248+
self.root_client
249+
.get_path(&join_path(&self.distribution_path, path))
250+
.await
251+
}
252+
253+
/// Perform an HTTP GET for a path relative to the distribution's root directory.
254+
///
255+
/// This transforms the response to an async reader for reading the HTTP response body.
256+
pub async fn get_path_reader(
257+
&self,
258+
path: &str,
259+
compression: IndexFileCompression,
260+
) -> Result<Pin<Box<dyn AsyncRead>>, HttpError> {
261+
let res = self.get_path(path).await?;
262+
263+
transform_http_response(res, compression).await
264+
}
265+
219266
/// Fetch a `Packages` file and convert it to a stream of [BinaryPackageControlFile] instances.
220267
pub async fn fetch_packages(
221268
&self,
@@ -239,9 +286,7 @@ impl<'client> HttpReleaseClient<'client> {
239286
// TODO make this stream output.
240287

241288
let mut reader = ControlParagraphAsyncReader::new(futures::io::BufReader::new(
242-
self.base_client
243-
.get_repository_path_reader(path, entry.compression)
244-
.await?,
289+
self.get_path_reader(path, entry.compression).await?,
245290
));
246291

247292
let mut res = BinaryPackageList::default();
@@ -278,14 +323,15 @@ mod test {
278323
},
279324
};
280325

281-
const BULLSEYE_URL: &str =
282-
"http://snapshot.debian.org/archive/debian/20211120T085721Z/dists/bullseye/";
326+
const BULLSEYE_URL: &str = "http://snapshot.debian.org/archive/debian/20211120T085721Z";
283327

284328
#[tokio::test]
285329
async fn bullseye_release() -> Result<()> {
286-
let repo = HttpRepositoryClient::new(BULLSEYE_URL)?;
330+
let root = HttpRepositoryClient::new(BULLSEYE_URL)?;
331+
332+
let dist = root.distribution_client("bullseye");
287333

288-
let release = repo.fetch_inrelease().await?;
334+
let release = dist.fetch_inrelease().await?;
289335

290336
let packages = release.fetch_packages("main", "amd64", false).await?;
291337
assert_eq!(packages.len(), 58606);

0 commit comments

Comments
 (0)