Skip to content

Commit 504ba2b

Browse files
axpnetaeroftp[bot]claude
committed
feat(hash): add server-side Tier D checksums for WebDAV and FTP
WebDAV/Nextcloud now expose oc:checksums via a metadata-only Depth:0 PROPFIND in a new supports_checksum()/checksum(), parsed into the canonical lowercase keys every hashsum/lsjson/provider_checksum consumer expects (empty map, never a content download, on servers that do not advertise it). The Depth:1 listing PROPFIND is left unchanged: carrying the prop there 400s some Apache mod_dav configs, so correctness wins over the zero-round-trip optimisation. FTP already issued the FEAT-detected HASH/XMD5/XCRC/XSHA1 but emitted non-canonical labels (SHA-256, MD5, ...) that no consumer matched; they now map through canonical_hash_key, digests lowercased. 5 deterministic unit tests; full lib suite 1603/0, clippy clean. Live-validated byte-identical on a Nextcloud lab box; plain mod_dav cleanly omits. Co-Authored-By: aeroftp[bot] <aeroftp[bot]@users.noreply.github.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 00c354f commit 504ba2b

2 files changed

Lines changed: 212 additions & 5 deletions

File tree

src-tauri/src/providers/ftp.rs

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,6 +1565,33 @@ impl StorageProvider for FtpProvider {
15651565
// FTP Hash/Checksum Commands (B3)
15661566
// =============================================================================
15671567

1568+
/// Map an FTP server's hash-algorithm label (the leading token of a
1569+
/// RFC-draft `HASH` reply, or the implied algo of `XMD5`/`XCRC`/`XSHA1`)
1570+
/// to the canonical lowercase key shared by every
1571+
/// `StorageProvider::checksum()` impl and the `hashsum` / `lsjson --hash`
1572+
/// consumers. Separators and case vary across servers (`SHA-256`,
1573+
/// `sha256`, `SHA256`), so the label is normalised before matching; an
1574+
/// unrecognised label degrades to its lowercased, separator-stripped form
1575+
/// rather than being dropped (still server-side, just an exotic algo).
1576+
fn canonical_hash_key(server_algo: &str) -> String {
1577+
let norm: String = server_algo
1578+
.chars()
1579+
.filter(|c| c.is_ascii_alphanumeric())
1580+
.collect::<String>()
1581+
.to_ascii_uppercase();
1582+
match norm.as_str() {
1583+
"SHA256" => "sha256",
1584+
"SHA512" => "sha512",
1585+
"SHA384" => "sha384",
1586+
"SHA1" => "sha1",
1587+
"MD5" => "md5",
1588+
"CRC32" => "crc32",
1589+
"ADLER32" => "adler32",
1590+
_ => return norm.to_ascii_lowercase(),
1591+
}
1592+
.to_string()
1593+
}
1594+
15681595
impl FtpProvider {
15691596
/// Compute a remote file checksum using the best available command.
15701597
/// Returns a map like {"MD5": "abc123..."} or {"CRC32": "..."} etc.
@@ -1607,15 +1634,22 @@ impl FtpProvider {
16071634
// e.g. "SHA-256 0-EOF abc123def456 /path/to/file.txt"
16081635
let parts: Vec<&str> = body.splitn(4, ' ').collect();
16091636
if parts.len() >= 3 {
1610-
let algo = parts[0]; // actual algorithm from server
1611-
let hash = parts[2];
1612-
result.insert(algo.to_string(), hash.to_string());
1637+
result.insert(
1638+
canonical_hash_key(parts[0]),
1639+
parts[2].trim().to_ascii_lowercase(),
1640+
);
16131641
} else {
1614-
result.insert(default_algo.to_string(), body.trim().to_string());
1642+
result.insert(
1643+
canonical_hash_key(default_algo),
1644+
body.trim().to_ascii_lowercase(),
1645+
);
16151646
}
16161647
} else {
16171648
// XMD5/XCRC/XSHA1: response is just the hex hash
1618-
result.insert(default_algo.to_string(), body.trim().to_string());
1649+
result.insert(
1650+
canonical_hash_key(default_algo),
1651+
body.trim().to_ascii_lowercase(),
1652+
);
16191653
}
16201654

16211655
Ok(result)
@@ -1846,6 +1880,20 @@ mod tests {
18461880
assert_eq!(entry.name, "Projects");
18471881
assert!(entry.is_dir);
18481882
}
1883+
1884+
#[test]
1885+
fn ftp_hash_keys_canonicalised() {
1886+
// RFC-draft HASH labels and the X* family map to the same
1887+
// lowercase keys every checksum() consumer expects.
1888+
assert_eq!(canonical_hash_key("SHA-256"), "sha256");
1889+
assert_eq!(canonical_hash_key("sha256"), "sha256");
1890+
assert_eq!(canonical_hash_key("SHA-512"), "sha512");
1891+
assert_eq!(canonical_hash_key("SHA-1"), "sha1");
1892+
assert_eq!(canonical_hash_key("MD5"), "md5");
1893+
assert_eq!(canonical_hash_key("CRC32"), "crc32");
1894+
// Unknown algo degrades, never dropped.
1895+
assert_eq!(canonical_hash_key("Whirlpool"), "whirlpool");
1896+
}
18491897
}
18501898

18511899
/// Dangerous TLS certificate verifier that accepts all certificates.

src-tauri/src/providers/webdav.rs

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1551,6 +1551,56 @@ fn local_name(raw: &[u8]) -> String {
15511551
}
15521552
}
15531553

1554+
/// Canonical lowercase key for an ownCloud/Nextcloud `oc:checksums`
1555+
/// algorithm label, matching every `StorageProvider::checksum()` impl
1556+
/// and the `hashsum` / `lsjson --hash` consumers. Unknown labels degrade
1557+
/// to a lowercased, separator-stripped form (still a real server-side
1558+
/// digest, just an exotic algo) rather than being dropped.
1559+
fn canonical_checksum_key(algo: &str) -> String {
1560+
let norm: String = algo
1561+
.chars()
1562+
.filter(|c| c.is_ascii_alphanumeric())
1563+
.collect::<String>()
1564+
.to_ascii_uppercase();
1565+
match norm.as_str() {
1566+
"SHA256" => "sha256",
1567+
"SHA512" => "sha512",
1568+
"SHA384" => "sha384",
1569+
"SHA1" => "sha1",
1570+
"MD5" => "md5",
1571+
"CRC32" => "crc32",
1572+
"ADLER32" => "adler32",
1573+
_ => return norm.to_ascii_lowercase(),
1574+
}
1575+
.to_string()
1576+
}
1577+
1578+
/// Parse an `<oc:checksums><oc:checksum>` payload into canonical
1579+
/// `{key: hexdigest}` pairs. The element is a single string of
1580+
/// whitespace-separated `ALGO:HEXDIGEST` tokens, e.g.
1581+
/// `"SHA1:f1d2d2... MD5:900150... ADLER32:024d0127"`. Tokens without a
1582+
/// `:`, with an empty digest, or with a non-hex digest are skipped so a
1583+
/// malformed entry can never poison the map. Returns an empty map for
1584+
/// every WebDAV server that does not emit `oc:checksums` (the
1585+
/// server-side-or-omit contract: no content is ever downloaded).
1586+
fn parse_oc_checksums(raw: &str) -> HashMap<String, String> {
1587+
let mut out = HashMap::new();
1588+
for token in raw.split_whitespace() {
1589+
let Some((algo, digest)) = token.split_once(':') else {
1590+
continue;
1591+
};
1592+
let digest = digest.trim().to_ascii_lowercase();
1593+
if digest.is_empty() || !digest.bytes().all(|b| b.is_ascii_hexdigit()) {
1594+
continue;
1595+
}
1596+
let key = canonical_checksum_key(algo);
1597+
if !key.is_empty() {
1598+
out.entry(key).or_insert(digest);
1599+
}
1600+
}
1601+
out
1602+
}
1603+
15541604
#[async_trait]
15551605
impl StorageProvider for WebDavProvider {
15561606
fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
@@ -2372,6 +2422,71 @@ impl StorageProvider for WebDavProvider {
23722422
}
23732423
}
23742424

2425+
fn supports_checksum(&self) -> bool {
2426+
// ownCloud/Nextcloud expose server-side digests via the
2427+
// `oc:checksums` PROPFIND prop; every other WebDAV server simply
2428+
// omits it, in which case `checksum()` returns an empty map and
2429+
// consumers omit / fall back. The probe is a metadata PROPFIND,
2430+
// never a content download: the server-side-or-omit contract.
2431+
true
2432+
}
2433+
2434+
async fn checksum(&mut self, path: &str) -> Result<HashMap<String, String>, ProviderError> {
2435+
if !self.connected {
2436+
return Err(ProviderError::NotConnected);
2437+
}
2438+
2439+
const PROPFIND_BODY: &str = r#"<?xml version="1.0" encoding="utf-8"?>
2440+
<d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
2441+
<d:prop>
2442+
<oc:checksums/>
2443+
</d:prop>
2444+
</d:propfind>"#;
2445+
2446+
// Mirror `stat()`'s file-first / collection-retry: a file must be
2447+
// requested without a trailing slash, while a slash-less
2448+
// collection can be 301'd by Apache to a scheme-downgraded URL
2449+
// that strips auth (see `collection_path`).
2450+
let collection_form = Self::collection_path(path);
2451+
let mut attempts: Vec<&str> = vec![path];
2452+
if collection_form != path {
2453+
attempts.push(collection_form.as_str());
2454+
}
2455+
2456+
for attempt in attempts {
2457+
let response = self
2458+
.request(webdav_methods::propfind(), attempt)
2459+
.header("Depth", "0")
2460+
.header("Content-Type", "application/xml")
2461+
.body(PROPFIND_BODY)
2462+
.send()
2463+
.await
2464+
.map_err(|e| ProviderError::NetworkError(e.to_string()))?;
2465+
2466+
match response.status() {
2467+
StatusCode::OK | StatusCode::MULTI_STATUS => {
2468+
let xml = response
2469+
.text()
2470+
.await
2471+
.map_err(|e| ProviderError::ParseError(e.to_string()))?;
2472+
let props = self.extract_xml_properties(&xml);
2473+
return Ok(props
2474+
.get("checksum")
2475+
.map(|s| parse_oc_checksums(s))
2476+
.unwrap_or_default());
2477+
}
2478+
// Ambiguous path type: retry in collection form.
2479+
StatusCode::NOT_FOUND | StatusCode::UNAUTHORIZED => continue,
2480+
// Any other status: the server cannot answer the prop.
2481+
// Treat as "no server-side hash" (omit) rather than an
2482+
// error that would fail a listing or trigger a download.
2483+
_ => return Ok(HashMap::new()),
2484+
}
2485+
}
2486+
2487+
Ok(HashMap::new())
2488+
}
2489+
23752490
async fn size(&mut self, path: &str) -> Result<u64, ProviderError> {
23762491
let entry = self.stat(path).await?;
23772492
Ok(entry.size)
@@ -3211,4 +3326,48 @@ mod tests {
32113326
t.server_root = Some("/".to_string());
32123327
assert_eq!(t.resolve_root("/aeroftp-utest"), "/aeroftp-utest");
32133328
}
3329+
3330+
#[test]
3331+
fn oc_checksums_parsed_to_canonical_lowercase_keys() {
3332+
// Real Nextcloud/ownCloud shape: space-separated ALGO:HEX tokens.
3333+
let m = parse_oc_checksums(
3334+
"SHA1:f1d2d2f924e986ac86fdf7b36c94bcdf32beec15 \
3335+
MD5:900150983cd24fb0d6963f7d28e17f72 ADLER32:024d0127",
3336+
);
3337+
assert_eq!(
3338+
m.get("sha1").map(String::as_str),
3339+
Some("f1d2d2f924e986ac86fdf7b36c94bcdf32beec15")
3340+
);
3341+
assert_eq!(
3342+
m.get("md5").map(String::as_str),
3343+
Some("900150983cd24fb0d6963f7d28e17f72")
3344+
);
3345+
assert_eq!(m.get("adler32").map(String::as_str), Some("024d0127"));
3346+
// No canonical key is upper-cased or dash-separated.
3347+
assert!(m.keys().all(|k| k == &k.to_ascii_lowercase()));
3348+
}
3349+
3350+
#[test]
3351+
fn oc_checksums_canonicalises_separators_and_case() {
3352+
let m = parse_oc_checksums("SHA-256:ABCDEF01 sha512:00FF");
3353+
assert_eq!(m.get("sha256").map(String::as_str), Some("abcdef01"));
3354+
assert_eq!(m.get("sha512").map(String::as_str), Some("00ff"));
3355+
}
3356+
3357+
#[test]
3358+
fn oc_checksums_skips_malformed_and_empty() {
3359+
// No colon, empty digest, non-hex digest, and the empty string.
3360+
assert!(parse_oc_checksums("").is_empty());
3361+
assert!(parse_oc_checksums("SHA1 MD5: SHA256:zz_not_hex").is_empty());
3362+
// A single good token among malformed ones still survives.
3363+
let m = parse_oc_checksums("garbage MD5:0a1b BAD:");
3364+
assert_eq!(m.len(), 1);
3365+
assert_eq!(m.get("md5").map(String::as_str), Some("0a1b"));
3366+
}
3367+
3368+
#[test]
3369+
fn unknown_algo_degrades_not_dropped() {
3370+
let m = parse_oc_checksums("WHIRLPOOL:dead");
3371+
assert_eq!(m.get("whirlpool").map(String::as_str), Some("dead"));
3372+
}
32143373
}

0 commit comments

Comments
 (0)