Skip to content

Commit ea17a77

Browse files
johnbattyclaude
andauthored
Fix security and correctness issues in artifacts_download (#780)
- Use path_segments_mut() for URL construction to prevent path injection - Reject manifest entries with .. path components (path traversal) - Normalize blob ID map keys to uppercase for consistent matching - Batch-resolve all file root blob URLs in one request - Simplify nibble_pos type in decompressor (bool was unused) - Cap deserialization error previews at 512 bytes - Guard Vec::with_capacity against i64->usize overflow Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 8d1423d commit ea17a77

3 files changed

Lines changed: 80 additions & 44 deletions

File tree

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Fixed
11+
12+
- `artifacts_download`: use `Url::path_segments_mut` for URL construction to prevent path injection via user-controlled segments.
13+
- `artifacts_download`: reject manifest entries containing `..` path components to prevent path traversal.
14+
- `artifacts_download`: normalize blob ID map keys to uppercase for consistent matching against locally-generated IDs.
15+
- `artifacts_download`: batch-resolve all file root blob URLs in a single request instead of one per file.
16+
- `artifacts_download`: cap deserialization error message previews at 512 bytes.
17+
- `artifacts_download`: guard `Vec::with_capacity` against `i64`-to-`usize` overflow on 32-bit targets.
18+
- `artifacts_download`: simplify `nibble_pos` type in decompressor (unused boolean removed).
19+
1020
### Added
1121

1222
- All generated enum types now implement `std::fmt::Display`.

azure_devops_rust_api/src/artifacts_download/decompress.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ pub fn decompress_chunk(compressed: &[u8]) -> Result<Vec<u8>> {
2727
let mut output = Vec::with_capacity(compressed.len() * 4);
2828
let mut ci = 0usize;
2929
let mut indicator: i32;
30-
let mut nibble_pos: Option<(usize, bool)> = None;
30+
let mut nibble_pos: Option<usize> = None;
3131

3232
// When true, the current indicator MSB is a literal-encoding bit (not a
3333
// decision bit). This happens after reading a fresh indicator whose raw
@@ -152,16 +152,17 @@ fn process_match(
152152
compressed: &[u8],
153153
ci: &mut usize,
154154
output: &mut Vec<u8>,
155-
nibble_pos: &mut Option<(usize, bool)>,
155+
nibble_pos: &mut Option<usize>,
156156
) -> Result<()> {
157+
// Caller ensures ci + 1 < compressed.len(), so this 2-byte read is in bounds.
157158
let v = u16::from_le_bytes(compressed[*ci..*ci + 2].try_into().unwrap());
158159
*ci += 2;
159160

160161
let mut match_len = (v & 7) as usize;
161162
let offset = ((v >> 3) as usize) + 1;
162163

163164
if match_len == 7 {
164-
let nibble_val = if let Some((nib_idx, _)) = nibble_pos.take() {
165+
let nibble_val = if let Some(nib_idx) = nibble_pos.take() {
165166
(compressed[nib_idx] >> 4) as usize
166167
} else {
167168
if *ci >= compressed.len() {
@@ -172,7 +173,7 @@ fn process_match(
172173
}
173174
let nib_idx = *ci;
174175
*ci += 1;
175-
*nibble_pos = Some((nib_idx, true));
176+
*nibble_pos = Some(nib_idx);
176177
(compressed[nib_idx] & 0x0F) as usize
177178
};
178179

azure_devops_rust_api/src/artifacts_download/mod.rs

Lines changed: 65 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -189,12 +189,16 @@ impl Client {
189189
let resp = self.send(&mut req).await?;
190190
let body = resp.into_body();
191191
serde_json::from_slice(&body).map_err(|e| {
192+
const MAX_PREVIEW: usize = 512;
193+
let truncated = body.len() > MAX_PREVIEW;
194+
let preview = String::from_utf8_lossy(&body[..body.len().min(MAX_PREVIEW)]);
192195
Error::with_error(
193196
ErrorKind::DataConversion,
194197
e,
195198
format!(
196-
"Failed to deserialize response:\n{}",
197-
String::from_utf8_lossy(&body)
199+
"Failed to deserialize response:\n{}{}",
200+
preview,
201+
if truncated { "…" } else { "" }
198202
),
199203
)
200204
})
@@ -214,11 +218,10 @@ impl Client {
214218
/// Discover Azure DevOps service URLs via the ResourceAreas API.
215219
/// Returns a map of service name -> location URL.
216220
pub async fn discover_services(&self, organization: &str) -> Result<HashMap<String, String>> {
217-
let url = Url::parse(&format!(
218-
"https://dev.azure.com/{}/_apis/ResourceAreas",
219-
organization
220-
))
221-
.with_context(ErrorKind::DataConversion, "invalid organization URL")?;
221+
let mut url = Url::parse("https://dev.azure.com").expect("hardcoded base URL is valid");
222+
url.path_segments_mut()
223+
.expect("https URL is always a base")
224+
.extend(&[organization, "_apis", "ResourceAreas"]);
222225

223226
let areas: ResourceAreasResponse = self.get_json(url).await?;
224227
let map: HashMap<String, String> = areas
@@ -232,9 +235,9 @@ impl Client {
232235
/// Find the packages service URL from discovered services.
233236
pub fn find_packages_url(services: &HashMap<String, String>, organization: &str) -> String {
234237
services
235-
.values()
236-
.find(|url| url.contains("pkgs."))
238+
.get("packaging")
237239
.cloned()
240+
.or_else(|| services.values().find(|url| url.contains("pkgs.")).cloned())
238241
.unwrap_or_else(|| format!("https://pkgs.dev.azure.com/{}", organization))
239242
}
240243

@@ -259,15 +262,25 @@ impl Client {
259262
name: &str,
260263
version: &str,
261264
) -> Result<PackageMetadata> {
262-
let mut url = Url::parse(&format!(
263-
"{}/{}/_packaging/{}/upack/packages/{}/versions/{}",
264-
packages_url.trim_end_matches('/'),
265-
project,
266-
feed,
267-
name,
268-
version,
269-
))
270-
.with_context(ErrorKind::DataConversion, "invalid package metadata URL")?;
265+
let mut url = Url::parse(packages_url.trim_end_matches('/'))
266+
.with_context(ErrorKind::DataConversion, "invalid packages URL")?;
267+
url.path_segments_mut()
268+
.map_err(|()| {
269+
Error::with_message(
270+
ErrorKind::DataConversion,
271+
"packages URL is not a valid base URL",
272+
)
273+
})?
274+
.extend(&[
275+
project,
276+
"_packaging",
277+
feed,
278+
"upack",
279+
"packages",
280+
name,
281+
"versions",
282+
version,
283+
]);
271284

272285
url.query_pairs_mut().append_pair("intent", "Download");
273286
self.get_json(url).await
@@ -281,11 +294,16 @@ impl Client {
281294
blob_service_url: &str,
282295
blob_ids: &[String],
283296
) -> Result<HashMap<String, String>> {
284-
let mut url = Url::parse(&format!(
285-
"{}/_apis/dedup/urls",
286-
blob_service_url.trim_end_matches('/')
287-
))
288-
.with_context(ErrorKind::DataConversion, "invalid dedup URL")?;
297+
let mut url = Url::parse(blob_service_url.trim_end_matches('/'))
298+
.with_context(ErrorKind::DataConversion, "invalid dedup URL")?;
299+
url.path_segments_mut()
300+
.map_err(|()| {
301+
Error::with_message(
302+
ErrorKind::DataConversion,
303+
"dedup service URL is not a valid base URL",
304+
)
305+
})?
306+
.extend(&["_apis", "dedup", "urls"]);
289307

290308
url.query_pairs_mut().append_pair("allowEdge", "true");
291309

@@ -306,13 +324,18 @@ impl Client {
306324

307325
let resp = self.send(&mut req).await?;
308326
let body = resp.into_body();
309-
serde_json::from_slice(&body).map_err(|e| {
327+
let map: HashMap<String, String> = serde_json::from_slice(&body).map_err(|e| {
310328
Error::with_error(
311329
ErrorKind::DataConversion,
312330
e,
313331
"Failed to parse blob URL response",
314332
)
315-
})
333+
})?;
334+
// Normalize keys to uppercase so they always match locally-generated blob IDs.
335+
Ok(map
336+
.into_iter()
337+
.map(|(k, v)| (k.to_uppercase(), v))
338+
.collect())
316339
}
317340

318341
/// Download a blob from a SAS URL (no auth required).
@@ -385,15 +408,6 @@ impl Client {
385408
chunk_ids.push(format!("{}01", hex_hash));
386409
}
387410

388-
if chunk_ids.is_empty() {
389-
return Err(Error::with_message(
390-
ErrorKind::DataConversion,
391-
format!(
392-
"No chunk references found in dedup node blob ({} bytes)",
393-
data.len()
394-
),
395-
));
396-
}
397411
Ok(chunk_ids)
398412
}
399413

@@ -444,12 +458,15 @@ impl Client {
444458
)
445459
})?;
446460

447-
// Step 5: Download each file
461+
// Step 5: Batch-resolve all file root blob URLs, then download each file
462+
let all_file_blob_ids: Vec<String> =
463+
manifest.items.iter().map(|i| i.blob.id.clone()).collect();
464+
let all_root_urls = self
465+
.resolve_blob_urls(&blob_service_url, &all_file_blob_ids)
466+
.await?;
467+
448468
for item in &manifest.items {
449-
let file_root_urls = self
450-
.resolve_blob_urls(&blob_service_url, std::slice::from_ref(&item.blob.id))
451-
.await?;
452-
let file_root_url = file_root_urls
469+
let file_root_url = all_root_urls
453470
.get(&item.blob.id)
454471
.ok_or_else(|| Error::with_message(ErrorKind::Other, "File root URL not found"))?;
455472
let file_root_data = self.download_blob(file_root_url).await?;
@@ -462,7 +479,9 @@ impl Client {
462479
.resolve_blob_urls(&blob_service_url, &chunk_ids)
463480
.await?;
464481

465-
let mut file_data = Vec::with_capacity(item.blob.size as usize);
482+
let mut file_data = usize::try_from(item.blob.size)
483+
.map(Vec::with_capacity)
484+
.unwrap_or_default();
466485
for chunk_id in &chunk_ids {
467486
let chunk_url = chunk_urls.get(chunk_id).ok_or_else(|| {
468487
Error::with_message(
@@ -480,6 +499,12 @@ impl Client {
480499
};
481500

482501
let relative_path = item.path.trim_start_matches('/');
502+
if relative_path.split('/').any(|c| c == "..") {
503+
return Err(Error::with_message(
504+
ErrorKind::DataConversion,
505+
format!("Invalid path in manifest: {}", item.path),
506+
));
507+
}
483508
let file_path = output_path.join(relative_path);
484509
if let Some(parent) = file_path.parent() {
485510
std::fs::create_dir_all(parent).map_err(|e| {

0 commit comments

Comments
 (0)