|
| 1 | +use std::{cmp::max, collections::HashSet, str::from_utf8, sync::LazyLock}; |
| 2 | + |
1 | 3 | use regex::bytes::Regex; |
2 | | -use std::{cmp::max, str::from_utf8}; |
3 | 4 |
|
4 | 5 | use crate::constants::cli::patterns::ANSI_COLOR_PATTERN; |
5 | 6 |
|
| 7 | +/// Characters disallowed in a filename |
| 8 | +static FILENAME_DISALLOWED_CHARS: LazyLock<HashSet<char>> = |
| 9 | + LazyLock::new(|| HashSet::from(['*', '"', '/', '\\', '<', '>', ':', '|', '?', '.'])); |
| 10 | +/// The character to replace disallowed chars with |
| 11 | +const FILENAME_REPLACEMENT_CHAR: char = '_'; |
| 12 | + |
| 13 | +/// Remove unsafe chars in [this list](FILENAME_DISALLOWED_CHARS). |
| 14 | +/// |
| 15 | +/// Does not need to use a `Cow` for optimization because the source is always generated based on chat data |
| 16 | +/// so there is no opportunity for the original input to be passed in from another borrow. |
| 17 | +pub fn sanitize_filename(filename: &str) -> String { |
| 18 | + filename |
| 19 | + .trim() |
| 20 | + .chars() |
| 21 | + .map(|letter| { |
| 22 | + if letter.is_control() || FILENAME_DISALLOWED_CHARS.contains(&letter) { |
| 23 | + FILENAME_REPLACEMENT_CHAR |
| 24 | + } else { |
| 25 | + letter |
| 26 | + } |
| 27 | + }) |
| 28 | + .take(255) |
| 29 | + .collect() |
| 30 | +} |
| 31 | + |
6 | 32 | pub struct LengthFinder { |
7 | 33 | color_pattern: Regex, |
8 | 34 | } |
@@ -34,7 +60,7 @@ impl LengthFinder { |
34 | 60 |
|
35 | 61 | #[cfg(test)] |
36 | 62 | mod tests { |
37 | | - use crate::util::sanitizers::LengthFinder; |
| 63 | + use crate::util::sanitizers::{LengthFinder, sanitize_filename}; |
38 | 64 |
|
39 | 65 | #[test] |
40 | 66 | fn test_length_clean() { |
@@ -81,4 +107,44 @@ mod tests { |
81 | 107 | assert_eq!(rows, 1); |
82 | 108 | assert_eq!(length, 6); |
83 | 109 | } |
| 110 | + |
| 111 | + #[test] |
| 112 | + fn test_sanitize_filename_clean() { |
| 113 | + assert_eq!(sanitize_filename("normal_filename"), "normal_filename"); |
| 114 | + assert_eq!(sanitize_filename("file.txt"), "file_txt"); |
| 115 | + assert_eq!(sanitize_filename("file_123"), "file_123"); |
| 116 | + } |
| 117 | + |
| 118 | + #[test] |
| 119 | + fn test_sanitize_filename_invalid_chars() { |
| 120 | + assert_eq!(sanitize_filename("file<>name"), "file__name"); |
| 121 | + assert_eq!(sanitize_filename("file:name"), "file_name"); |
| 122 | + assert_eq!(sanitize_filename("file\"name"), "file_name"); |
| 123 | + assert_eq!(sanitize_filename("file|name"), "file_name"); |
| 124 | + assert_eq!(sanitize_filename("file?name"), "file_name"); |
| 125 | + assert_eq!(sanitize_filename("file*name"), "file_name"); |
| 126 | + assert_eq!(sanitize_filename("file\\name"), "file_name"); |
| 127 | + assert_eq!(sanitize_filename("file/name"), "file_name"); |
| 128 | + } |
| 129 | + |
| 130 | + #[test] |
| 131 | + fn test_sanitize_filename_control_chars() { |
| 132 | + assert_eq!(sanitize_filename("file\x00name"), "file_name"); |
| 133 | + assert_eq!(sanitize_filename("file\x1fname"), "file_name"); |
| 134 | + assert_eq!(sanitize_filename("file\x7fname"), "file_name"); |
| 135 | + } |
| 136 | + |
| 137 | + #[test] |
| 138 | + fn test_sanitize_filename_trim() { |
| 139 | + assert_eq!(sanitize_filename(" filename "), "filename"); |
| 140 | + assert_eq!(sanitize_filename("..filename.."), "__filename__"); |
| 141 | + assert_eq!(sanitize_filename("\tfilename\t"), "filename"); |
| 142 | + } |
| 143 | + |
| 144 | + #[test] |
| 145 | + fn test_sanitize_filename_long() { |
| 146 | + let long_name = "a".repeat(300); |
| 147 | + let sanitized = sanitize_filename(&long_name); |
| 148 | + assert_eq!(sanitized.len(), 255); |
| 149 | + } |
84 | 150 | } |
0 commit comments