|
| 1 | +# Copyright (c) Microsoft Corporation. |
| 2 | +# Licensed under the MIT license. |
| 3 | + |
| 4 | +""" |
| 5 | +Presentation-layer formatter for ``ConversationStats.last_message_preview``. |
| 6 | +
|
| 7 | +Lives in the backend mapper package because the formatting it produces |
| 8 | +(``[Image: <basename>]`` etc.) is purely a display concern for the GUI API |
| 9 | +responses — the memory layer stays data-agnostic and just stores the raw |
| 10 | +value + data type. |
| 11 | +
|
| 12 | +The motivating bug: ``converted_value`` for media-path data types |
| 13 | +(``image_path`` / ``audio_path`` / ``video_path`` / ``binary_path``) is a |
| 14 | +filesystem path or blob URL. Rendering it raw in the Attack History preview |
| 15 | +leaks the absolute on-disk location of memory artifacts |
| 16 | +(e.g. ``C:\\Users\\<name>\\git\\PyRIT\\dbdata\\...\\1780.mp3``). |
| 17 | +""" |
| 18 | + |
| 19 | +from pathlib import PureWindowsPath |
| 20 | +from urllib.parse import urlparse |
| 21 | + |
| 22 | +from pyrit.models import MEDIA_PATH_DATA_TYPES, ConversationStats |
| 23 | + |
| 24 | +# Friendly label per media-path data type. Kept here next to the formatter |
| 25 | +# so adding a new media type only requires updating one place. |
| 26 | +_MEDIA_LABEL: dict[str, str] = { |
| 27 | + "image_path": "Image", |
| 28 | + "audio_path": "Audio", |
| 29 | + "video_path": "Video", |
| 30 | + "binary_path": "File", |
| 31 | +} |
| 32 | + |
| 33 | + |
| 34 | +def _derive_basename(value: str) -> str | None: |
| 35 | + """ |
| 36 | + Return a display-safe basename for *value*. |
| 37 | +
|
| 38 | + Args: |
| 39 | + value: A filesystem path, URL, or other reference. |
| 40 | +
|
| 41 | + Returns: |
| 42 | + The basename (filename portion) of *value*, or ``None`` if one can't |
| 43 | + be derived (e.g. data URI, empty value). |
| 44 | + """ |
| 45 | + if not value or value.startswith("data:"): |
| 46 | + return None |
| 47 | + if value.startswith(("http://", "https://")): |
| 48 | + # Strip query string (e.g. SAS tokens) before taking the basename. |
| 49 | + parsed = urlparse(value) |
| 50 | + name = PureWindowsPath(parsed.path).name |
| 51 | + return name or None |
| 52 | + # Local path — PureWindowsPath treats both ``/`` and ``\`` as separators, |
| 53 | + # so Windows-style paths stored from a Windows host are split correctly |
| 54 | + # even when this code runs on a POSIX host (CI, Linux deployments). |
| 55 | + return PureWindowsPath(value).name or None |
| 56 | + |
| 57 | + |
| 58 | +def format_last_message_preview( |
| 59 | + *, |
| 60 | + value: str | None, |
| 61 | + data_type: str | None, |
| 62 | + max_len: int = ConversationStats.PREVIEW_MAX_LEN, |
| 63 | +) -> str | None: |
| 64 | + """ |
| 65 | + Build a display string for ``ConversationStats.last_message_preview``. |
| 66 | +
|
| 67 | + Media-path data types are rendered as ``[Image: <basename>]`` (and |
| 68 | + variants) so the absolute filesystem path of memory artifacts is never |
| 69 | + exposed through API responses or UI previews. Text-like data types pass |
| 70 | + through with truncation and an ellipsis suffix when they exceed |
| 71 | + *max_len*. |
| 72 | +
|
| 73 | + Args: |
| 74 | + value: Raw ``converted_value`` for the last piece (or ``None``). |
| 75 | + data_type: ``converted_value_data_type`` for that piece. ``None`` |
| 76 | + falls back to the text path. |
| 77 | + max_len: Maximum length for text previews before truncation. |
| 78 | +
|
| 79 | + Returns: |
| 80 | + The formatted preview string, or ``None`` when there is nothing |
| 81 | + meaningful to show. |
| 82 | + """ |
| 83 | + if data_type in MEDIA_PATH_DATA_TYPES: |
| 84 | + # MEDIA_PATH_DATA_TYPES guarantees ``data_type`` is a key in |
| 85 | + # ``_MEDIA_LABEL`` — both are derived from the same source list. |
| 86 | + label = _MEDIA_LABEL[data_type] |
| 87 | + basename = _derive_basename(value or "") |
| 88 | + return f"[{label}: {basename}]" if basename else f"[{label}]" |
| 89 | + |
| 90 | + if not value: |
| 91 | + return None |
| 92 | + |
| 93 | + if len(value) > max_len: |
| 94 | + return value[:max_len] + "..." |
| 95 | + return value |
0 commit comments