Skip to content

Commit 69b763e

Browse files
committed
feat(date): add byte-preserving format output and --resolution support
1 parent 289d701 commit 69b763e

2 files changed

Lines changed: 196 additions & 24 deletions

File tree

src/uu/date/src/date.rs

Lines changed: 183 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,16 @@ use jiff::tz::{TimeZone, TimeZoneDatabase};
1414
use jiff::{Timestamp, Zoned};
1515
use std::borrow::Cow;
1616
use std::collections::HashMap;
17+
use std::env;
18+
use std::ffi::{OsStr, OsString};
1719
use std::fs::File;
1820
use std::io::{BufRead, BufReader, BufWriter, Read, Write};
1921
use std::path::PathBuf;
2022
use std::sync::OnceLock;
2123
use uucore::display::Quotable;
2224
use uucore::error::FromIo;
25+
#[cfg(unix)]
26+
use uucore::error::UUsageError;
2327
use uucore::error::{UResult, USimpleError};
2428
#[cfg(feature = "i18n-datetime")]
2529
use uucore::i18n::datetime::{localize_format_string, should_use_icu_locale};
@@ -28,6 +32,8 @@ use uucore::{format_usage, show};
2832
#[cfg(windows)]
2933
use windows_sys::Win32::{Foundation::SYSTEMTIME, System::SystemInformation::SetSystemTime};
3034

35+
#[cfg(unix)]
36+
use std::os::unix::ffi::OsStrExt;
3137
use uucore::parser::shortcut_value_parser::ShortcutValueParser;
3238

3339
// Options
@@ -58,6 +64,7 @@ struct Settings {
5864
format: Format,
5965
date_source: DateSource,
6066
set_to: Option<Zoned>,
67+
output_encoding: OutputEncoding,
6168
debug: bool,
6269
}
6370

@@ -84,6 +91,7 @@ enum Format {
8491
Iso8601(Iso8601Format),
8592
Rfc5322,
8693
Rfc3339(Rfc3339Format),
94+
// Used by --resolution to emit the clock resolution as "seconds.nanoseconds".
8795
Resolution,
8896
Custom(String),
8997
Default,
@@ -96,9 +104,34 @@ enum DateSource {
96104
FileMtime(PathBuf),
97105
Stdin,
98106
Human(String),
107+
// Used by --resolution to source a Timestamp that represents clock resolution.
99108
Resolution,
100109
}
101110

111+
#[cfg(unix)]
112+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
113+
enum OutputEncoding {
114+
Utf8,
115+
BytePreserving,
116+
}
117+
118+
#[cfg(not(unix))]
119+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
120+
enum OutputEncoding {
121+
Utf8,
122+
}
123+
124+
struct CustomFormat {
125+
format: String,
126+
output_encoding: OutputEncoding,
127+
}
128+
129+
enum CustomFormatError {
130+
MissingPlus(String),
131+
#[cfg(unix)]
132+
InvalidUtf8,
133+
}
134+
102135
enum Iso8601Format {
103136
Date,
104137
Hours,
@@ -285,7 +318,7 @@ fn parse_military_timezone_with_offset(s: &str) -> Option<(i32, DayDelta)> {
285318
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
286319
let matches = uucore::clap_localization::handle_clap_result(uu_app(), args)?;
287320

288-
let date_source = if let Some(date_os) = matches.get_one::<std::ffi::OsString>(OPT_DATE) {
321+
let date_source = if let Some(date_os) = matches.get_one::<OsString>(OPT_DATE) {
289322
// Convert OsString to String, handling invalid UTF-8 with GNU-compatible error
290323
let date = date_os.to_str().ok_or_else(|| {
291324
let bytes = date_os.as_encoded_bytes();
@@ -307,35 +340,49 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
307340
};
308341

309342
// Check for extra operands (multiple positional arguments)
310-
if let Some(formats) = matches.get_many::<String>(OPT_FORMAT) {
311-
let format_args: Vec<&String> = formats.collect();
343+
if let Some(formats) = matches.get_many::<OsString>(OPT_FORMAT) {
344+
let format_args: Vec<&OsString> = formats.collect();
312345
if format_args.len() > 1 {
313346
return Err(USimpleError::new(
314347
1,
315-
translate!("date-error-extra-operand", "operand" => format_args[1]),
348+
translate!(
349+
"date-error-extra-operand",
350+
"operand" => format_args[1].to_string_lossy()
351+
),
316352
));
317353
}
318354
}
319355

320-
let format = if let Some(form) = matches.get_one::<String>(OPT_FORMAT) {
321-
if !form.starts_with('+') {
322-
// if an optional Format String was found but the user has not provided an input date
323-
// GNU prints an invalid date Error
324-
if !matches!(date_source, DateSource::Human(_)) {
356+
let mut output_encoding = OutputEncoding::Utf8;
357+
let format = if let Some(form) = matches.get_one::<OsString>(OPT_FORMAT) {
358+
let custom = match parse_custom_format(form) {
359+
Ok(custom) => custom,
360+
Err(CustomFormatError::MissingPlus(raw)) => {
361+
// if an optional Format String was found but the user has not provided an input date
362+
// GNU prints an invalid date Error
363+
if !matches!(date_source, DateSource::Human(_)) {
364+
return Err(USimpleError::new(
365+
1,
366+
translate!("date-error-invalid-date", "date" => raw),
367+
));
368+
}
369+
// If the user did provide an input date with the --date flag and the Format String is
370+
// not starting with '+' GNU prints the missing '+' error message
325371
return Err(USimpleError::new(
326372
1,
327-
translate!("date-error-invalid-date", "date" => form),
373+
translate!("date-error-format-missing-plus", "arg" => raw),
328374
));
329375
}
330-
// If the user did provide an input date with the --date flag and the Format String is
331-
// not starting with '+' GNU prints the missing '+' error message
332-
return Err(USimpleError::new(
333-
1,
334-
translate!("date-error-format-missing-plus", "arg" => form),
335-
));
336-
}
337-
let form = form[1..].to_string();
338-
Format::Custom(form)
376+
#[cfg(unix)]
377+
Err(CustomFormatError::InvalidUtf8) => {
378+
return Err(UUsageError::new(
379+
1,
380+
"invalid UTF-8 was detected in one or more arguments",
381+
));
382+
}
383+
};
384+
output_encoding = custom.output_encoding;
385+
Format::Custom(custom.format)
339386
} else if let Some(fmt) = matches
340387
.get_many::<String>(OPT_ISO_8601)
341388
.map(|mut iter| iter.next().unwrap_or(&DATE.to_string()).as_str().into())
@@ -383,6 +430,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
383430
format,
384431
date_source,
385432
set_to,
433+
output_encoding,
386434
debug: debug_mode,
387435
};
388436

@@ -559,9 +607,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
559607
&config,
560608
skip_localization,
561609
) {
562-
Ok(s) => writeln!(stdout, "{s}").map_err(|e| {
563-
USimpleError::new(1, translate!("date-error-write", "error" => e))
564-
})?,
610+
Ok(s) => write_formatted_output(settings.output_encoding, &s, &mut stdout)
611+
.map_err(|e| {
612+
USimpleError::new(1, translate!("date-error-write", "error" => e))
613+
})?,
565614
Err(e) => {
566615
let _ = stdout.flush();
567616
return Err(USimpleError::new(
@@ -601,7 +650,7 @@ pub fn uu_app() -> Command {
601650
.value_name("STRING")
602651
.allow_hyphen_values(true)
603652
.overrides_with(OPT_DATE)
604-
.value_parser(clap::value_parser!(std::ffi::OsString))
653+
.value_parser(clap::value_parser!(OsString))
605654
.help(translate!("date-help-date")),
606655
)
607656
.arg(
@@ -696,7 +745,117 @@ pub fn uu_app() -> Command {
696745
.help(translate!("date-help-universal"))
697746
.action(ArgAction::SetTrue),
698747
)
699-
.arg(Arg::new(OPT_FORMAT).num_args(0..))
748+
.arg(
749+
Arg::new(OPT_FORMAT)
750+
.num_args(0..)
751+
.trailing_var_arg(true)
752+
.value_parser(clap::builder::ValueParser::os_string()),
753+
)
754+
}
755+
756+
/// Parse a user-supplied `+FORMAT` argument into a `CustomFormat`.
757+
///
758+
/// - Requires the leading '+' and returns `MissingPlus` otherwise.
759+
/// - On Unix, treats the payload as raw bytes: if UTF-8, use as-is; if not,
760+
/// then either error under UTF-8 locales or decode in a byte-preserving way.
761+
/// - On non-Unix, falls back to a lossy string conversion and strips the '+'.
762+
fn parse_custom_format(raw: &OsStr) -> Result<CustomFormat, CustomFormatError> {
763+
#[cfg(unix)]
764+
{
765+
let bytes = raw.as_bytes();
766+
if bytes.first() != Some(&b'+') {
767+
return Err(CustomFormatError::MissingPlus(
768+
raw.to_string_lossy().into_owned(),
769+
));
770+
}
771+
let payload = &bytes[1..];
772+
if let Ok(utf8) = std::str::from_utf8(payload) {
773+
return Ok(CustomFormat {
774+
format: utf8.to_string(),
775+
output_encoding: OutputEncoding::Utf8,
776+
});
777+
}
778+
if locale_output_encoding() == OutputEncoding::Utf8 {
779+
return Err(CustomFormatError::InvalidUtf8);
780+
}
781+
Ok(CustomFormat {
782+
format: decode_byte_preserving(payload),
783+
output_encoding: OutputEncoding::BytePreserving,
784+
})
785+
}
786+
787+
#[cfg(not(unix))]
788+
{
789+
let s = raw.to_string_lossy();
790+
if !s.starts_with('+') {
791+
return Err(CustomFormatError::MissingPlus(s.into_owned()));
792+
}
793+
Ok(CustomFormat {
794+
format: s[1..].to_string(),
795+
output_encoding: OutputEncoding::Utf8,
796+
})
797+
}
798+
}
799+
800+
#[cfg(unix)]
801+
/// Determine whether the active locale expects UTF-8 output.
802+
fn locale_output_encoding() -> OutputEncoding {
803+
let locale_var = ["LC_ALL", "LC_TIME", "LANG"]
804+
.iter()
805+
.find_map(|key| env::var(key).ok());
806+
807+
if let Some(locale) = locale_var {
808+
let mut split = locale.split(&['.', '@']);
809+
let _ = split.next();
810+
if let Some(encoding) = split.next() {
811+
let encoding = encoding.to_ascii_lowercase();
812+
if encoding == "utf-8" || encoding == "utf8" {
813+
return OutputEncoding::Utf8;
814+
}
815+
}
816+
}
817+
818+
OutputEncoding::BytePreserving
819+
}
820+
821+
#[cfg(unix)]
822+
/// Losslessly map each byte to the same Unicode code point (0x00..=0xFF).
823+
fn decode_byte_preserving(bytes: &[u8]) -> String {
824+
bytes.iter().map(|&b| char::from(b)).collect()
825+
}
826+
827+
#[cfg(unix)]
828+
/// Convert a string back to bytes if all chars fit in a single byte.
829+
fn encode_byte_preserving(s: &str) -> Option<Vec<u8>> {
830+
let mut out = Vec::with_capacity(s.len());
831+
for ch in s.chars() {
832+
if (ch as u32) <= 0xFF {
833+
out.push(ch as u8);
834+
} else {
835+
return None;
836+
}
837+
}
838+
Some(out)
839+
}
840+
841+
/// Write the formatted string using the requested output encoding.
842+
fn write_formatted_output(
843+
output_encoding: OutputEncoding,
844+
s: &str,
845+
stdout: &mut impl Write,
846+
) -> std::io::Result<()> {
847+
match output_encoding {
848+
OutputEncoding::Utf8 => writeln!(stdout, "{s}"),
849+
#[cfg(unix)]
850+
OutputEncoding::BytePreserving => {
851+
if let Some(mut bytes) = encode_byte_preserving(s) {
852+
bytes.push(b'\n');
853+
stdout.write_all(&bytes)
854+
} else {
855+
writeln!(stdout, "{s}")
856+
}
857+
}
858+
}
700859
}
701860

702861
fn format_date_with_locale_aware_months(

tests/by-util/test_date.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,19 @@ fn test_date_format_literal() {
419419
new_ucmd!().arg("+%%N").succeeds().stdout_is("%N\n");
420420
}
421421

422+
#[test]
423+
#[cfg(unix)]
424+
fn test_date_format_non_utf8_locale_bytes() {
425+
use std::ffi::OsStr;
426+
use std::os::unix::ffi::OsStrExt;
427+
428+
new_ucmd!()
429+
.env("LC_ALL", "en_US.ISO-8859-1")
430+
.arg(OsStr::from_bytes(b"+\xC2"))
431+
.succeeds()
432+
.stdout_is_bytes(b"\xC2\n");
433+
}
434+
422435
#[test]
423436
#[cfg(all(unix, not(target_os = "macos")))]
424437
fn test_date_set_valid() {

0 commit comments

Comments
 (0)