Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 79 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ fn format_string(value: &str, dst: &mut [u8]) -> usize {
{
let has_neon = cfg!(target_os = "macos") || std::arch::is_aarch64_feature_detected!("neon");
if has_neon {
unsafe { simd::neon::format_string(value, dst) }
simd::neon::format_string(value, dst)
} else {
simd::v128::format_string(value, dst)
}
Expand All @@ -302,13 +302,13 @@ fn format_string(value: &str, dst: &mut [u8]) -> usize {
#[cfg(feature = "avx512")]
{
if is_x86_feature_detected!("avx512f") {
return unsafe { simd::avx512::format_string(value, dst) };
return simd::avx512::format_string(value, dst);
}
}
if is_x86_feature_detected!("avx2") {
unsafe { simd::avx2::format_string(value, dst) }
simd::avx2::format_string(value, dst)
} else if is_x86_feature_detected!("sse2") {
unsafe { simd::sse2::format_string(value, dst) }
simd::sse2::format_string(value, dst)
} else {
simd::v128::format_string(value, dst)
}
Expand All @@ -320,6 +320,41 @@ fn format_string(value: &str, dst: &mut [u8]) -> usize {
}
}

#[inline(always)]
fn format_unquoted(value: &str, dst: &mut [u8]) -> usize {
#[cfg(target_arch = "aarch64")]
{
let has_neon = cfg!(target_os = "macos") || std::arch::is_aarch64_feature_detected!("neon");
if has_neon {
simd::neon::format_unquoted(value, dst)
} else {
simd::v128::format_unquoted(value, dst)
}
}

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
#[cfg(feature = "avx512")]
{
if is_x86_feature_detected!("avx512f") {
return simd::avx512::format_unquoted(value, dst);
}
}
if is_x86_feature_detected!("avx2") {
simd::avx2::format_unquoted(value, dst)
} else if is_x86_feature_detected!("sse2") {
simd::sse2::format_unquoted(value, dst)
} else {
simd::v128::format_unquoted(value, dst)
}
}

#[cfg(not(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")))]
{
simd::v128::format_unquoted(value, dst)
}
}

pub fn escape(value: &str) -> String {
let capacity = value.len() * 6 + 32 + 3;
let mut buf = Vec::with_capacity(capacity);
Expand All @@ -332,9 +367,22 @@ pub fn escape(value: &str) -> String {
unsafe { String::from_utf8_unchecked(buf) }
}

pub fn escape_unquote(value: &str) -> String {
let capacity = value.len() * 6 + 32 + 3;
let mut buf = Vec::with_capacity(capacity);
#[allow(clippy::uninit_vec)]
unsafe {
buf.set_len(capacity)
};
let cnt = format_unquoted(value, &mut buf);
unsafe { buf.set_len(cnt) };
unsafe { String::from_utf8_unchecked(buf) }
}

/// # Panics
///
/// Panics if the buffer is not large enough. Allocate enough capacity for dst.
/// Panics if the buffer is not large enough. Allocate enough capacity for dst,
/// x6 + 2 will be enough in the worst case.
pub fn escape_into<S: AsRef<str>>(value: S, dst: &mut Vec<u8>) {
let value = value.as_ref();
let old_len = dst.len();
Expand All @@ -350,6 +398,26 @@ pub fn escape_into<S: AsRef<str>>(value: S, dst: &mut Vec<u8>) {
}
}

/// Same as escape_into, just without open and close quotes.
/// # Panics
///
/// Panic if the buffer is not large enough. Allocation enough capacity for dst,
/// x6 will be in the worst case.
pub fn escape_into_unquote<S: AsRef<str>>(value: S, dst: &mut Vec<u8>) {
let value = value.as_ref();
let old_len = dst.len();

// SAFETY: We've reserved enough capacity above, and format_string will
// write valid UTF-8 bytes. We'll set the correct length after.
unsafe {
// Get a slice that includes the spare capacity
let spare =
std::slice::from_raw_parts_mut(dst.as_mut_ptr().add(old_len), dst.capacity() - old_len);
let cnt = format_unquoted(value, spare);
dst.set_len(old_len + cnt);
}
}

#[cfg(test)]
mod tests {
use std::fs::read_dir;
Expand Down Expand Up @@ -411,6 +479,12 @@ mod tests {
assert_eq!(escape("\r\n"), r#""\r\n""#);
}

#[test]
fn test_unquote() {
assert_eq!(escape_unquote("abcd"), "abcd");
assert_eq!(escape("abcd"), r#""abcd""#);
}

#[test]
fn test_small_strings_16_bytes() {
// Exactly 16 bytes - SSE register boundary
Expand Down
36 changes: 28 additions & 8 deletions src/simd/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,18 +103,40 @@ fn escaped_mask(v: Simd256u) -> u32 {
v.bitmask()
}

#[target_feature(enable = "avx2")]
pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
#[inline(always)]
pub fn format_string(value: &str, dst: &mut [u8]) -> usize {
unsafe {
let slice = value.as_bytes();
let mut sptr = slice.as_ptr();
let mut dptr = dst.as_mut_ptr();
let dstart = dptr;
let mut nb: usize = slice.len();

*dptr = b'"';
dptr = dptr.add(1);

dptr = format_raw(value, dptr);

*dptr = b'"';
dptr = dptr.add(1);
dptr as usize - dstart as usize
}
}

#[inline(always)]
pub fn format_unquoted(value: &str, dst: &mut [u8]) -> usize {
let mut dptr = dst.as_mut_ptr();
let dstart = dptr;

dptr = unsafe { format_raw(value, dptr) };

dptr as usize - dstart as usize
}

#[target_feature(enable = "avx2")]
pub unsafe fn format_raw(value: &str, mut dptr: *mut u8) -> *mut u8 {
unsafe {
let slice = value.as_bytes();
let mut sptr = slice.as_ptr();
let mut nb: usize = slice.len();

// Process CHUNK (4 * LANES = 128 bytes) at a time
while nb >= CHUNK {
// Load 4 SIMD vectors
Expand Down Expand Up @@ -260,8 +282,6 @@ pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
}
}

*dptr = b'"';
dptr = dptr.add(1);
dptr as usize - dstart as usize
dptr
}
}
37 changes: 29 additions & 8 deletions src/simd/avx512.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,18 +95,41 @@ fn escaped_mask(v: Simd512u) -> u64 {
v.bitmask()
}

#[target_feature(enable = "avx512f")]
pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
#[inline(always)]
pub fn format_string(value: &str, dst: &mut [u8]) -> usize {
unsafe {
let slice = value.as_bytes();
let mut sptr = slice.as_ptr();
let mut dptr = dst.as_mut_ptr();
let dstart = dptr;
let mut nb: usize = slice.len();

*dptr = b'"';
dptr = dptr.add(1);

dptr = format_raw(value, dptr);

*dptr = b'"';
dptr = dptr.add(1);
dptr as usize - dstart as usize
}
}

#[inline(always)]
pub fn format_unquoted(value: &str, dst: &mut [u8]) -> usize {
let mut dptr = dst.as_mut_ptr();
let dstart = dptr;

dptr = unsafe { format_raw(value, dptr) };

dptr as usize - dstart as usize
}

#[target_feature(enable = "avx512f")]
pub unsafe fn format_raw(value: &str, mut dptr: *mut u8) -> *mut u8 {
unsafe {
let slice = value.as_bytes();
let mut sptr = slice.as_ptr();
let dstart = dptr;
let mut nb: usize = slice.len();

// Process CHUNK (4 * LANES = 256 bytes) at a time
while nb >= CHUNK {
// Load 4 SIMD vectors
Expand Down Expand Up @@ -253,8 +276,6 @@ pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
}
}

*dptr = b'"';
dptr = dptr.add(1);
dptr as usize - dstart as usize
dptr
}
}
36 changes: 28 additions & 8 deletions src/simd/neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,18 +110,40 @@ fn escaped_mask(v: Simd128u) -> NeonBits {
escaped_mask_vec(v).bitmask()
}

#[target_feature(enable = "neon")]
pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
#[inline(always)]
pub fn format_string(value: &str, dst: &mut [u8]) -> usize {
unsafe {
let slice = value.as_bytes();
let mut sptr = slice.as_ptr();
let mut dptr = dst.as_mut_ptr();
let dstart = dptr;
let mut nb: usize = slice.len();

*dptr = b'"';
dptr = dptr.add(1);

dptr = format_raw(value, dptr);

*dptr = b'"';
dptr = dptr.add(1);
dptr as usize - dstart as usize
}
}

#[inline(always)]
pub fn format_unquoted(value: &str, dst: &mut [u8]) -> usize {
let mut dptr = dst.as_mut_ptr();
let dstart = dptr;

dptr = unsafe { format_raw(value, dptr) };

dptr as usize - dstart as usize
}

#[target_feature(enable = "neon")]
pub unsafe fn format_raw(value: &str, mut dptr: *mut u8) -> *mut u8 {
unsafe {
let slice = value.as_bytes();
let mut sptr = slice.as_ptr();
let mut nb: usize = slice.len();

// Process CHUNK (4 * LANES = 64 bytes) at a time
while nb >= CHUNK {
// Load 4 SIMD vectors
Expand Down Expand Up @@ -267,8 +289,6 @@ pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
}
}

*dptr = b'"';
dptr = dptr.add(1);
dptr as usize - dstart as usize
dptr
}
}
36 changes: 28 additions & 8 deletions src/simd/sse2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,18 +100,40 @@ fn escaped_mask(v: Simd128u) -> u16 {
v.bitmask()
}

#[target_feature(enable = "sse2")]
pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
#[inline(always)]
pub fn format_string(value: &str, dst: &mut [u8]) -> usize {
unsafe {
let slice = value.as_bytes();
let mut sptr = slice.as_ptr();
let mut dptr = dst.as_mut_ptr();
let dstart = dptr;
let mut nb: usize = slice.len();

*dptr = b'"';
dptr = dptr.add(1);

dptr = format_raw(value, dptr);

*dptr = b'"';
dptr = dptr.add(1);
dptr as usize - dstart as usize
}
}

#[inline(always)]
pub fn format_unquoted(value: &str, dst: &mut [u8]) -> usize {
let mut dptr = dst.as_mut_ptr();
let dstart = dptr;

dptr = unsafe { format_raw(value, dptr) };

dptr as usize - dstart as usize
}

#[target_feature(enable = "sse2")]
pub unsafe fn format_raw(value: &str, mut dptr: *mut u8) -> *mut u8 {
unsafe {
let slice = value.as_bytes();
let mut sptr = slice.as_ptr();
let mut nb: usize = slice.len();

// Process CHUNK (4 * LANES = 64 bytes) at a time
while nb >= CHUNK {
// Load 4 SIMD vectors
Expand Down Expand Up @@ -257,8 +279,6 @@ pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
}
}

*dptr = b'"';
dptr = dptr.add(1);
dptr as usize - dstart as usize
dptr
}
}
Loading