Skip to content

Commit 5ccce19

Browse files
authored
Merge pull request #9930 from sylvestre/expand-cognitive_complexity
expand: address a cognitive_complexity warnings
2 parents 25befcd + 676363a commit 5ccce19

1 file changed

Lines changed: 76 additions & 51 deletions

File tree

src/uu/expand/src/expand.rs

Lines changed: 76 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55

6-
// spell-checker:ignore (ToDO) ctype cwidth iflag nbytes nspaces nums tspaces uflag Preprocess
6+
// spell-checker:ignore (ToDO) ctype cwidth iflag nbytes nspaces nums tspaces Preprocess
77

88
use clap::{Arg, ArgAction, ArgMatches, Command};
99
use std::ffi::OsString;
@@ -174,7 +174,7 @@ struct Options {
174174
tabstops: Vec<usize>,
175175
tspaces: String,
176176
iflag: bool,
177-
uflag: bool,
177+
utf8: bool,
178178

179179
/// Strategy for expanding tabs for columns beyond those specified
180180
/// in `tabstops`.
@@ -189,7 +189,7 @@ impl Options {
189189
};
190190

191191
let iflag = matches.get_flag(options::INITIAL);
192-
let uflag = !matches.get_flag(options::NO_UTF8);
192+
let utf8 = !matches.get_flag(options::NO_UTF8);
193193

194194
// avoid allocations when dumping out long sequences of spaces
195195
// by precomputing the longest string of spaces we will ever need
@@ -214,7 +214,7 @@ impl Options {
214214
tabstops,
215215
tspaces,
216216
iflag,
217-
uflag,
217+
utf8,
218218
remaining_mode,
219219
})
220220
}
@@ -349,7 +349,62 @@ enum CharType {
349349
Other,
350350
}
351351

352-
#[allow(clippy::cognitive_complexity)]
352+
/// Classify a character and determine its width and byte length.
353+
///
354+
/// Returns `(CharType, display_width, byte_length)`.
355+
#[inline]
356+
fn classify_char(buf: &[u8], byte: usize, utf8: bool) -> (CharType, usize, usize) {
357+
use self::CharType::{Backspace, Other, Tab};
358+
359+
if utf8 {
360+
let nbytes = char::from(buf[byte]).len_utf8();
361+
362+
if byte + nbytes > buf.len() {
363+
// don't overrun buffer because of invalid UTF-8
364+
return (Other, 1, 1);
365+
}
366+
367+
if let Ok(t) = from_utf8(&buf[byte..byte + nbytes]) {
368+
match t.chars().next() {
369+
Some('\t') => (Tab, 0, 1),
370+
Some('\x08') => (Backspace, 0, 1),
371+
Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes),
372+
None => {
373+
// no valid char at start of t, so take 1 byte
374+
(Other, 1, 1)
375+
}
376+
}
377+
} else {
378+
(Other, 1, 1) // implicit assumption: non-UTF-8 char is 1 col wide
379+
}
380+
} else {
381+
(
382+
match buf.get(byte) {
383+
// always take exactly 1 byte in strict ASCII mode
384+
Some(0x09) => Tab,
385+
Some(0x08) => Backspace,
386+
_ => Other,
387+
},
388+
0,
389+
1,
390+
)
391+
}
392+
}
393+
394+
/// Write spaces for a tab expansion.
395+
#[inline]
396+
fn write_tab_spaces(
397+
output: &mut BufWriter<std::io::Stdout>,
398+
nts: usize,
399+
tspaces: &str,
400+
) -> std::io::Result<()> {
401+
if nts <= tspaces.len() {
402+
output.write_all(&tspaces.as_bytes()[..nts])
403+
} else {
404+
output.write_all(" ".repeat(nts).as_bytes())
405+
}
406+
}
407+
353408
fn expand_line(
354409
buf: &mut Vec<u8>,
355410
output: &mut BufWriter<std::io::Stdout>,
@@ -360,8 +415,7 @@ fn expand_line(
360415

361416
// Fast path: if there are no tabs, backspaces, and (in UTF-8 mode or no carriage returns),
362417
// we can write the buffer directly without character-by-character processing
363-
if !buf.contains(&b'\t') && !buf.contains(&b'\x08') && (options.uflag || !buf.contains(&b'\r'))
364-
{
418+
if !buf.contains(&b'\t') && !buf.contains(&b'\x08') && (options.utf8 || !buf.contains(&b'\r')) {
365419
output.write_all(buf)?;
366420
buf.truncate(0);
367421
return Ok(());
@@ -372,37 +426,7 @@ fn expand_line(
372426
let mut init = true;
373427

374428
while byte < buf.len() {
375-
let (ctype, cwidth, nbytes) = if options.uflag {
376-
let nbytes = char::from(buf[byte]).len_utf8();
377-
378-
if byte + nbytes > buf.len() {
379-
// don't overrun buffer because of invalid UTF-8
380-
(Other, 1, 1)
381-
} else if let Ok(t) = from_utf8(&buf[byte..byte + nbytes]) {
382-
match t.chars().next() {
383-
Some('\t') => (Tab, 0, nbytes),
384-
Some('\x08') => (Backspace, 0, nbytes),
385-
Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes),
386-
None => {
387-
// no valid char at start of t, so take 1 byte
388-
(Other, 1, 1)
389-
}
390-
}
391-
} else {
392-
(Other, 1, 1) // implicit assumption: non-UTF-8 char is 1 col wide
393-
}
394-
} else {
395-
(
396-
match buf.get(byte) {
397-
// always take exactly 1 byte in strict ASCII mode
398-
Some(0x09) => Tab,
399-
Some(0x08) => Backspace,
400-
_ => Other,
401-
},
402-
1,
403-
1,
404-
)
405-
};
429+
let (ctype, cwidth, nbytes) = classify_char(buf, byte, options.utf8);
406430

407431
// figure out how many columns this char takes up
408432
match ctype {
@@ -413,23 +437,24 @@ fn expand_line(
413437

414438
// now dump out either spaces if we're expanding, or a literal tab if we're not
415439
if init || !options.iflag {
416-
if nts <= options.tspaces.len() {
417-
output.write_all(&options.tspaces.as_bytes()[..nts])?;
418-
} else {
419-
output.write_all(" ".repeat(nts).as_bytes())?;
420-
}
440+
write_tab_spaces(output, nts, &options.tspaces)?;
421441
} else {
422442
output.write_all(&buf[byte..byte + nbytes])?;
423443
}
424444
}
425-
_ => {
426-
col = if ctype == Other {
427-
col + cwidth
428-
} else if col > 0 {
429-
col - 1
430-
} else {
431-
0
432-
};
445+
Backspace => {
446+
col = col.saturating_sub(1);
447+
448+
// if we're writing anything other than a space, then we're
449+
// done with the line's leading spaces
450+
if buf[byte] != 0x20 {
451+
init = false;
452+
}
453+
454+
output.write_all(&buf[byte..byte + nbytes])?;
455+
}
456+
Other => {
457+
col += cwidth;
433458

434459
// if we're writing anything other than a space, then we're
435460
// done with the line's leading spaces

0 commit comments

Comments
 (0)