|
6 | 6 | from hashlib import md5 |
7 | 7 | from http import HTTPStatus |
8 | 8 | from http.server import BaseHTTPRequestHandler |
9 | | -from typing import Protocol |
| 9 | +from typing import Literal, Protocol |
10 | 10 |
|
11 | 11 | from .auth import InvalidSignature, S3Auth |
12 | 12 | from .state import S3State |
@@ -112,6 +112,37 @@ def _handle_write(self): |
112 | 112 |
|
113 | 113 | qs = _up.parse_qs(parsed.query, keep_blank_values=True) |
114 | 114 |
|
| 115 | + # S3 CopyObject: PUT to destination key with x-amz-copy-source and empty body. |
| 116 | + copy_src = ( |
| 117 | + self.headers.get("x-amz-copy-source") or self.headers.get("X-Amz-Copy-Source") or "" |
| 118 | + ).strip() |
| 119 | + if copy_src: |
| 120 | + if not bucket: |
| 121 | + self._send_error(HTTPStatus.BAD_REQUEST, "Bucket must be specified") |
| 122 | + return |
| 123 | + if key == "": |
| 124 | + self._send_error(HTTPStatus.BAD_REQUEST, "CopyObject requires an object key") |
| 125 | + return |
| 126 | + try: |
| 127 | + src_bucket, src_key = _parse_copy_source(copy_src) |
| 128 | + except ValueError as err: |
| 129 | + self._send_error(HTTPStatus.BAD_REQUEST, str(err)) |
| 130 | + return |
| 131 | + try: |
| 132 | + data = self.server.state.copy_object(bucket, key, src_bucket, src_key) |
| 133 | + except FileNotFoundError: |
| 134 | + self._send_error(HTTPStatus.NOT_FOUND, "NoSuchKey") |
| 135 | + return |
| 136 | + xml = ( |
| 137 | + '<?xml version="1.0" encoding="UTF-8"?>' |
| 138 | + "<CopyObjectResult>" |
| 139 | + f"<LastModified>{_escape_xml(formatdate(usegmt=True))}</LastModified>" |
| 140 | + f"<ETag>"{_escape_xml(_etag(data))}"</ETag>" |
| 141 | + "</CopyObjectResult>" |
| 142 | + ).encode() |
| 143 | + self._send_bytes(xml, status=HTTPStatus.OK, content_type="application/xml") |
| 144 | + return |
| 145 | + |
115 | 146 | # Multipart: upload part |
116 | 147 | if "uploadId" in qs and "partNumber" in qs: |
117 | 148 | upload_id = qs["uploadId"][0] |
@@ -160,15 +191,15 @@ def _handle_read(self, listing: bool, only_headers: bool = False): |
160 | 191 | self._send_error(HTTPStatus.BAD_REQUEST, "Bucket must be specified") |
161 | 192 | return |
162 | 193 |
|
163 | | - if key == "": # List bucket contents |
| 194 | + if key == "": # List bucket contents (ListObjects / ListObjectsV2) |
164 | 195 | if not listing: |
165 | | - # We treat listing with GET only |
166 | 196 | try: |
167 | 197 | objects = self.server.state.list_objects(bucket) |
168 | 198 | except KeyError: |
169 | 199 | self._send_error(HTTPStatus.NOT_FOUND, "Bucket not found") |
170 | 200 | return |
171 | | - xml_body = self._render_bucket_list(bucket, objects) |
| 201 | + qs = _up.parse_qs(parsed.query, keep_blank_values=True) |
| 202 | + xml_body = self._render_list_bucket_result(bucket, objects, qs) |
172 | 203 | self._send_bytes(xml_body, content_type="application/xml") |
173 | 204 | else: |
174 | 205 | self._send_error(HTTPStatus.NOT_IMPLEMENTED, "Listing not implemented") |
@@ -359,51 +390,135 @@ def _send_bytes( |
359 | 390 | if self.command != "HEAD": |
360 | 391 | self.wfile.write(data) |
361 | 392 |
|
362 | | - @staticmethod |
363 | | - def _render_bucket_list(bucket: str, objects: list[str]) -> bytes: |
364 | | - """Generate an XML listing of objects in a bucket. |
| 393 | + def _render_list_bucket_result( |
| 394 | + self, |
| 395 | + bucket: str, |
| 396 | + all_keys: list[str], |
| 397 | + qs: dict[str, list[str]], |
| 398 | + ) -> bytes: |
| 399 | + """Build ListBucketResult XML (ListObjectsV2-compatible). |
| 400 | +
|
| 401 | + Clients (e.g. MSC) send ``delimiter=/`` and ``prefix=`` and expect |
| 402 | + ``CommonPrefixes`` for nested keys such as ``parts/data-0.tar``, not |
| 403 | + only flat ``Contents``. |
| 404 | + """ |
| 405 | + prefix = (qs.get("prefix") or [""])[0] |
| 406 | + delimiter = (qs.get("delimiter") or [None])[0] |
| 407 | + max_keys_s = (qs.get("max-keys") or qs.get("maxkeys") or ["1000"])[0] |
| 408 | + try: |
| 409 | + max_keys = max(1, min(int(max_keys_s), 1000)) |
| 410 | + except ValueError: |
| 411 | + max_keys = 1000 |
365 | 412 |
|
366 | | - Args: |
367 | | - bucket: The bucket name. |
368 | | - objects: List of object keys in the bucket. |
| 413 | + continuation = (qs.get("continuation-token") or [""])[0] |
| 414 | + start_after = (qs.get("start-after") or [""])[0] |
| 415 | + exclusive_after = continuation or start_after |
369 | 416 |
|
370 | | - Returns: |
371 | | - The XML document as bytes. |
372 | | - """ |
373 | | - entries = [] |
374 | 417 | now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.000Z") |
375 | | - for key in objects: |
376 | | - try: |
377 | | - data = S3RequestHandler.server.state.get_object(bucket, key) # type: ignore[attr-defined] |
378 | | - size = len(data) |
379 | | - etag = _etag(data) |
380 | | - except Exception: # noqa: BLE001 |
381 | | - size = 0 |
382 | | - etag = '""' |
383 | | - entries.append( |
384 | | - "<Contents>" |
385 | | - f"<Key>{_escape_xml(key)}</Key>" |
386 | | - f"<LastModified>{now}</LastModified>" |
387 | | - f"<ETag>{etag}</ETag>" |
388 | | - f"<Size>{size}</Size>" |
389 | | - "</Contents>" |
390 | | - ) |
391 | | - obj_elems = "".join(entries) |
392 | | - xml = ( |
393 | | - '<?xml version="1.0" encoding="UTF-8"?>' |
394 | | - "<ListBucketResult>" |
395 | | - f"<Name>{_escape_xml(bucket)}</Name>" |
396 | | - f"{obj_elems}" |
397 | | - "</ListBucketResult>" |
398 | | - ) |
399 | | - return xml.encode() |
| 418 | + state = self.server.state |
| 419 | + |
| 420 | + items: list[tuple[Literal["cp", "key"], str]] = [] |
| 421 | + if not delimiter: |
| 422 | + for k in sorted(all_keys): |
| 423 | + if k.startswith(prefix): |
| 424 | + items.append(("key", k)) |
| 425 | + else: |
| 426 | + common: set[str] = set() |
| 427 | + contents: list[str] = [] |
| 428 | + for k in sorted(all_keys): |
| 429 | + if not k.startswith(prefix): |
| 430 | + continue |
| 431 | + relative = k[len(prefix) :] |
| 432 | + if delimiter in relative: |
| 433 | + idx = relative.index(delimiter) |
| 434 | + common.add(prefix + relative[: idx + len(delimiter)]) |
| 435 | + else: |
| 436 | + contents.append(k) |
| 437 | + for cp in sorted(common): |
| 438 | + items.append(("cp", cp)) |
| 439 | + for ck in sorted(contents): |
| 440 | + items.append(("key", ck)) |
| 441 | + items.sort(key=lambda x: x[1]) |
| 442 | + |
| 443 | + if exclusive_after: |
| 444 | + items = [it for it in items if it[1] > exclusive_after] |
| 445 | + |
| 446 | + page = items[:max_keys] |
| 447 | + truncated = len(items) > max_keys |
| 448 | + next_token = page[-1][1] if truncated and page else "" |
| 449 | + |
| 450 | + fragments: list[str] = [ |
| 451 | + '<?xml version="1.0" encoding="UTF-8"?>', |
| 452 | + "<ListBucketResult>", |
| 453 | + f"<Name>{_escape_xml(bucket)}</Name>", |
| 454 | + f"<Prefix>{_escape_xml(prefix)}</Prefix>", |
| 455 | + f"<KeyCount>{len(page)}</KeyCount>", |
| 456 | + f"<MaxKeys>{max_keys}</MaxKeys>", |
| 457 | + f"<IsTruncated>{str(truncated).lower()}</IsTruncated>", |
| 458 | + ] |
| 459 | + if delimiter: |
| 460 | + fragments.append(f"<Delimiter>{_escape_xml(delimiter)}</Delimiter>") |
| 461 | + if truncated and next_token: |
| 462 | + fragments.append(f"<NextContinuationToken>{_escape_xml(next_token)}</NextContinuationToken>") |
| 463 | + |
| 464 | + for kind, path in page: |
| 465 | + if kind == "cp": |
| 466 | + fragments.append(f"<CommonPrefixes><Prefix>{_escape_xml(path)}</Prefix></CommonPrefixes>") |
| 467 | + else: |
| 468 | + try: |
| 469 | + data = state.get_object(bucket, path) |
| 470 | + size = len(data) |
| 471 | + etag = _etag(data) |
| 472 | + except Exception: # noqa: BLE001 |
| 473 | + size = 0 |
| 474 | + etag = '""' |
| 475 | + fragments.append( |
| 476 | + "<Contents>" |
| 477 | + f"<Key>{_escape_xml(path)}</Key>" |
| 478 | + f"<LastModified>{now}</LastModified>" |
| 479 | + f"<ETag>{etag}</ETag>" |
| 480 | + f"<Size>{size}</Size>" |
| 481 | + "</Contents>" |
| 482 | + ) |
| 483 | + |
| 484 | + fragments.append("</ListBucketResult>") |
| 485 | + return "".join(fragments).encode() |
400 | 486 |
|
401 | 487 |
|
402 | 488 | class S3ServerProtocol(Protocol): # noqa: D101 |
403 | 489 | state: S3State |
404 | 490 | auth: S3Auth |
405 | 491 |
|
406 | 492 |
|
| 493 | +def _parse_copy_source(raw: str) -> tuple[str, str]: |
| 494 | + """Parse ``x-amz-copy-source`` into ``(bucket, key)``. |
| 495 | +
|
| 496 | + Accepts ``/bucket/key``, ``bucket/key``, URL-encoded keys, and strips ``?versionId=``. |
| 497 | +
|
| 498 | + Args: |
| 499 | + raw: Raw header value. |
| 500 | +
|
| 501 | + Returns: |
| 502 | + Source bucket and object key. |
| 503 | +
|
| 504 | + Raises: |
| 505 | + ValueError: If the value cannot be parsed. |
| 506 | + """ |
| 507 | + s = raw.strip() |
| 508 | + if not s: |
| 509 | + raise ValueError("Empty x-amz-copy-source") |
| 510 | + s = s.split("?", 1)[0] |
| 511 | + s = _up.unquote(s) |
| 512 | + if s.startswith("/"): |
| 513 | + s = s[1:] |
| 514 | + if "/" not in s: |
| 515 | + raise ValueError("x-amz-copy-source must be /bucket/key") |
| 516 | + src_bucket, src_key = s.split("/", 1) |
| 517 | + if not src_bucket or not src_key: |
| 518 | + raise ValueError("Invalid x-amz-copy-source") |
| 519 | + return src_bucket, src_key |
| 520 | + |
| 521 | + |
407 | 522 | def _escape_xml(text: str) -> str: # noqa: D401 |
408 | 523 | """Escape special characters for XML. |
409 | 524 |
|
|
0 commit comments