Skip to content

Commit 7e7edb1

Browse files
committed
fix(date): remove UTF-8 validation error for non-UTF-8 format strings
This commit removes the strict UTF-8 validation that was previously enforced on Unix systems when using custom format strings with the date command. Previously, if a format string contained invalid UTF-8 bytes and the locale was set to UTF-8, the command would fail with an error. Now, the command will always attempt to decode format strings in a byte-preserving manner, allowing non-UTF-8 format strings to work regardless of the locale setting. The change simplifies the behavior by removing the conditional UTF-8 validation logic and associated error handling, making the date command more permissive with format strings containing invalid UTF-8 sequences.
1 parent 69b763e commit 7e7edb1

2 files changed

Lines changed: 29 additions & 41 deletions

File tree

src/uu/date/src/date.rs

Lines changed: 2 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,13 @@ use jiff::tz::{TimeZone, TimeZoneDatabase};
1414
use jiff::{Timestamp, Zoned};
1515
use std::borrow::Cow;
1616
use std::collections::HashMap;
17-
use std::env;
1817
use std::ffi::{OsStr, OsString};
1918
use std::fs::File;
2019
use std::io::{BufRead, BufReader, BufWriter, Read, Write};
2120
use std::path::PathBuf;
2221
use std::sync::OnceLock;
2322
use uucore::display::Quotable;
2423
use uucore::error::FromIo;
25-
#[cfg(unix)]
26-
use uucore::error::UUsageError;
2724
use uucore::error::{UResult, USimpleError};
2825
#[cfg(feature = "i18n-datetime")]
2926
use uucore::i18n::datetime::{localize_format_string, should_use_icu_locale};
@@ -128,8 +125,6 @@ struct CustomFormat {
128125

129126
enum CustomFormatError {
130127
MissingPlus(String),
131-
#[cfg(unix)]
132-
InvalidUtf8,
133128
}
134129

135130
enum Iso8601Format {
@@ -373,13 +368,6 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
373368
translate!("date-error-format-missing-plus", "arg" => raw),
374369
));
375370
}
376-
#[cfg(unix)]
377-
Err(CustomFormatError::InvalidUtf8) => {
378-
return Err(UUsageError::new(
379-
1,
380-
"invalid UTF-8 was detected in one or more arguments",
381-
));
382-
}
383371
};
384372
output_encoding = custom.output_encoding;
385373
Format::Custom(custom.format)
@@ -748,7 +736,6 @@ pub fn uu_app() -> Command {
748736
.arg(
749737
Arg::new(OPT_FORMAT)
750738
.num_args(0..)
751-
.trailing_var_arg(true)
752739
.value_parser(clap::builder::ValueParser::os_string()),
753740
)
754741
}
@@ -757,16 +744,14 @@ pub fn uu_app() -> Command {
757744
///
758745
/// - Requires the leading '+' and returns `MissingPlus` otherwise.
759746
/// - On Unix, treats the payload as raw bytes: if UTF-8, use as-is; if not,
760-
/// then either error under UTF-8 locales or decode in a byte-preserving way.
747+
/// decode in a byte-preserving way.
761748
/// - On non-Unix, falls back to a lossy string conversion and strips the '+'.
762749
fn parse_custom_format(raw: &OsStr) -> Result<CustomFormat, CustomFormatError> {
763750
#[cfg(unix)]
764751
{
765752
let bytes = raw.as_bytes();
766753
if bytes.first() != Some(&b'+') {
767-
return Err(CustomFormatError::MissingPlus(
768-
raw.to_string_lossy().into_owned(),
769-
));
754+
return Err(CustomFormatError::MissingPlus(escape_invalid_bytes(bytes)));
770755
}
771756
let payload = &bytes[1..];
772757
if let Ok(utf8) = std::str::from_utf8(payload) {
@@ -775,9 +760,6 @@ fn parse_custom_format(raw: &OsStr) -> Result<CustomFormat, CustomFormatError> {
775760
output_encoding: OutputEncoding::Utf8,
776761
});
777762
}
778-
if locale_output_encoding() == OutputEncoding::Utf8 {
779-
return Err(CustomFormatError::InvalidUtf8);
780-
}
781763
Ok(CustomFormat {
782764
format: decode_byte_preserving(payload),
783765
output_encoding: OutputEncoding::BytePreserving,
@@ -797,27 +779,6 @@ fn parse_custom_format(raw: &OsStr) -> Result<CustomFormat, CustomFormatError> {
797779
}
798780
}
799781

800-
#[cfg(unix)]
801-
/// Determine whether the active locale expects UTF-8 output.
802-
fn locale_output_encoding() -> OutputEncoding {
803-
let locale_var = ["LC_ALL", "LC_TIME", "LANG"]
804-
.iter()
805-
.find_map(|key| env::var(key).ok());
806-
807-
if let Some(locale) = locale_var {
808-
let mut split = locale.split(&['.', '@']);
809-
let _ = split.next();
810-
if let Some(encoding) = split.next() {
811-
let encoding = encoding.to_ascii_lowercase();
812-
if encoding == "utf-8" || encoding == "utf8" {
813-
return OutputEncoding::Utf8;
814-
}
815-
}
816-
}
817-
818-
OutputEncoding::BytePreserving
819-
}
820-
821782
#[cfg(unix)]
822783
/// Losslessly map each byte to the same Unicode code point (0x00..=0xFF).
823784
fn decode_byte_preserving(bytes: &[u8]) -> String {

tests/by-util/test_date.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,33 @@ fn test_date_format_non_utf8_locale_bytes() {
432432
.stdout_is_bytes(b"\xC2\n");
433433
}
434434

435+
#[test]
436+
#[cfg(unix)]
437+
fn test_date_format_utf8_locale_bytes() {
438+
use std::ffi::OsStr;
439+
use std::os::unix::ffi::OsStrExt;
440+
441+
new_ucmd!()
442+
.env("LC_ALL", "en_US.UTF-8")
443+
.arg(OsStr::from_bytes(b"+\xC2"))
444+
.succeeds()
445+
.stdout_is_bytes(b"\xC2\n");
446+
}
447+
448+
#[test]
449+
#[cfg(unix)]
450+
fn test_bad_format_option_missing_leading_plus_after_d_flag_invalid_utf8() {
451+
use std::ffi::OsStr;
452+
use std::os::unix::ffi::OsStrExt;
453+
454+
new_ucmd!()
455+
.arg("--date")
456+
.arg("now")
457+
.arg(OsStr::from_bytes(b"\xB0"))
458+
.fails_with_code(1)
459+
.stderr_contains("the argument \\260 lacks a leading '+';");
460+
}
461+
435462
#[test]
436463
#[cfg(all(unix, not(target_os = "macos")))]
437464
fn test_date_set_valid() {

0 commit comments

Comments
 (0)