Skip to content

Commit 551e7ad

Browse files
sirkonДенис Черемисов
authored andcommitted
feat: unquoted escaping added
1 parent 6f12669 commit 551e7ad

7 files changed

Lines changed: 266 additions & 31 deletions

File tree

src/lib.rs

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,41 @@ fn format_string(value: &str, dst: &mut [u8]) -> usize {
320320
}
321321
}
322322

323+
#[inline(always)]
324+
fn format_unquoted(value: &str, dst: &mut [u8]) -> usize {
325+
#[cfg(target_arch = "aarch64")]
326+
{
327+
let has_neon = cfg!(target_os = "macos") || std::arch::is_aarch64_feature_detected!("neon");
328+
if has_neon {
329+
unsafe { simd::neon::format_unquoted(value, dst) }
330+
} else {
331+
simd::v128::format_string(value, dst)
332+
}
333+
}
334+
335+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
336+
{
337+
#[cfg(feature = "avx512")]
338+
{
339+
if is_x86_feature_detected!("avx512f") {
340+
return unsafe { simd::avx512::format_unquote(value, dst) };
341+
}
342+
}
343+
if is_x86_feature_detected!("avx2") {
344+
unsafe { simd::avx2::format_unquote(value, dst) }
345+
} else if is_x86_feature_detected!("sse2") {
346+
unsafe { simd::sse2::format_unquote(value, dst) }
347+
} else {
348+
simd::v128::format_unquote(value, dst)
349+
}
350+
}
351+
352+
#[cfg(not(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")))]
353+
{
354+
simd::v128::format_string(value, dst)
355+
}
356+
}
357+
323358
pub fn escape(value: &str) -> String {
324359
let capacity = value.len() * 6 + 32 + 3;
325360
let mut buf = Vec::with_capacity(capacity);
@@ -332,9 +367,22 @@ pub fn escape(value: &str) -> String {
332367
unsafe { String::from_utf8_unchecked(buf) }
333368
}
334369

370+
pub fn escape_unquote(value: &str) -> String {
371+
let capacity = value.len() * 6 + 32 + 3;
372+
let mut buf = Vec::with_capacity(capacity);
373+
#[allow(clippy::uninit_vec)]
374+
unsafe {
375+
buf.set_len(capacity)
376+
};
377+
let cnt = format_unquoted(value, &mut buf);
378+
unsafe { buf.set_len(cnt) };
379+
unsafe { String::from_utf8_unchecked(buf) }
380+
}
381+
335382
/// # Panics
336383
///
337-
/// Panics if the buffer is not large enough. Allocate enough capacity for dst.
384+
/// Panics if the buffer is not large enough. Allocate enough capacity for dst,
385+
/// x6 will be enough in the worst case.
338386
pub fn escape_into<S: AsRef<str>>(value: S, dst: &mut Vec<u8>) {
339387
let value = value.as_ref();
340388
let old_len = dst.len();
@@ -350,6 +398,26 @@ pub fn escape_into<S: AsRef<str>>(value: S, dst: &mut Vec<u8>) {
350398
}
351399
}
352400

401+
/// Same as escape_into, just without open and close quotes.
402+
/// # Panics
403+
///
404+
/// Panic if the buffer is not large enough. Allocation enough capacity for dst,
405+
/// x6 will be in the worst case.
406+
pub fn escape_into_unquote<S: AsRef<str>>(value: S, dst: &mut Vec<u8>) {
407+
let value = value.as_ref();
408+
let old_len = dst.len();
409+
410+
// SAFETY: We've reserved enough capacity above, and format_string will
411+
// write valid UTF-8 bytes. We'll set the correct length after.
412+
unsafe {
413+
// Get a slice that includes the spare capacity
414+
let spare =
415+
std::slice::from_raw_parts_mut(dst.as_mut_ptr().add(old_len), dst.capacity() - old_len);
416+
let cnt = format_unquoted(value, spare);
417+
dst.set_len(old_len + cnt);
418+
}
419+
}
420+
353421
#[cfg(test)]
354422
mod tests {
355423
use std::fs::read_dir;
@@ -411,6 +479,12 @@ mod tests {
411479
assert_eq!(escape("\r\n"), r#""\r\n""#);
412480
}
413481

482+
#[test]
483+
fn test_unquote() {
484+
assert_eq!(escape_unquote("abcd"), "abcd");
485+
assert_eq!(escape("abcd"), r#""abcd""#);
486+
}
487+
414488
#[test]
415489
fn test_small_strings_16_bytes() {
416490
// Exactly 16 bytes - SSE register boundary

src/simd/avx2.rs

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::arch::x86::*;
44
use std::arch::x86_64::*;
55

66
use std::ops::{BitAnd, BitOr, BitOrAssign};
7-
7+
use crate::simd::v128::format_raw;
88
use super::{Mask, Simd, traits::BitMask, util::escape_unchecked};
99

1010
#[cfg(any(target_os = "linux", target_os = "macos"))]
@@ -103,18 +103,46 @@ fn escaped_mask(v: Simd256u) -> u32 {
103103
v.bitmask()
104104
}
105105

106-
#[target_feature(enable = "avx2")]
107-
pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
106+
#[inline(always)]
107+
pub fn format_string(value: &str, dst: &mut [u8]) -> usize {
108108
unsafe {
109109
let slice = value.as_bytes();
110-
let mut sptr = slice.as_ptr();
111110
let mut dptr = dst.as_mut_ptr();
112111
let dstart = dptr;
113112
let mut nb: usize = slice.len();
114113

115114
*dptr = b'"';
116115
dptr = dptr.add(1);
117116

117+
dptr = format_raw(value, dptr);
118+
119+
*dptr = b'"';
120+
dptr = dptr.add(1);
121+
dptr as usize - dstart as usize
122+
}
123+
}
124+
125+
#[inline(always)]
126+
pub fn format_unquoted(value: &str, dst: &mut [u8]) -> usize {
127+
unsafe {
128+
let slice = value.as_bytes();
129+
let mut dptr = dst.as_mut_ptr();
130+
let dstart = dptr;
131+
let mut nb: usize = slice.len();
132+
133+
dptr = format_raw(value, dptr);
134+
135+
dptr as usize - dstart as usize
136+
}
137+
}
138+
139+
#[target_feature(enable = "avx2")]
140+
pub unsafe fn format_string(value: &str, mut dptr: *mut u8) -> *mut u8 {
141+
unsafe {
142+
let slice = value.as_bytes();
143+
let mut sptr = slice.as_ptr();
144+
let mut nb: usize = slice.len();
145+
118146
// Process CHUNK (4 * LANES = 128 bytes) at a time
119147
while nb >= CHUNK {
120148
// Load 4 SIMD vectors
@@ -260,8 +288,6 @@ pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
260288
}
261289
}
262290

263-
*dptr = b'"';
264-
dptr = dptr.add(1);
265-
dptr as usize - dstart as usize
291+
dptr
266292
}
267293
}

src/simd/avx512.rs

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::arch::x86::*;
44
use std::arch::x86_64::*;
55

66
use std::ops::{BitAnd, BitOr, BitOrAssign};
7-
7+
use crate::simd::v128::format_raw;
88
#[cfg(any(target_os = "linux", target_os = "macos"))]
99
use super::util::check_cross_page;
1010
use super::{Mask, Simd, traits::BitMask, util::escape_unchecked};
@@ -95,18 +95,49 @@ fn escaped_mask(v: Simd512u) -> u64 {
9595
v.bitmask()
9696
}
9797

98-
#[target_feature(enable = "avx512f")]
99-
pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
98+
#[inline(always)]
99+
pub fn format_string(value: &str, dst: &mut [u8]) -> usize {
100100
unsafe {
101101
let slice = value.as_bytes();
102-
let mut sptr = slice.as_ptr();
103102
let mut dptr = dst.as_mut_ptr();
104103
let dstart = dptr;
105104
let mut nb: usize = slice.len();
106105

107106
*dptr = b'"';
108107
dptr = dptr.add(1);
109108

109+
dptr = format_raw(value, dptr);
110+
111+
*dptr = b'"';
112+
dptr = dptr.add(1);
113+
dptr as usize - dstart as usize
114+
}
115+
}
116+
117+
#[inline(always)]
118+
pub fn format_unquoted(value: &str, dst: &mut [u8]) -> usize {
119+
unsafe {
120+
let slice = value.as_bytes();
121+
let mut dptr = dst.as_mut_ptr();
122+
let dstart = dptr;
123+
let mut nb: usize = slice.len();
124+
125+
dptr = format_raw(value, dptr);
126+
127+
dptr as usize - dstart as usize
128+
}
129+
}
130+
131+
132+
#[inline(always)]
133+
#[target_feature(enable = "avx512f")]
134+
pub unsafe fn format_string(value: &str, mut dptr: *mut u8) -> *mut u8 {
135+
unsafe {
136+
let slice = value.as_bytes();
137+
let mut sptr = slice.as_ptr();
138+
let dstart = dptr;
139+
let mut nb: usize = slice.len();
140+
110141
// Process CHUNK (4 * LANES = 256 bytes) at a time
111142
while nb >= CHUNK {
112143
// Load 4 SIMD vectors
@@ -253,8 +284,6 @@ pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
253284
}
254285
}
255286

256-
*dptr = b'"';
257-
dptr = dptr.add(1);
258-
dptr as usize - dstart as usize
287+
dptr
259288
}
260289
}

src/simd/neon.rs

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,18 +110,46 @@ fn escaped_mask(v: Simd128u) -> NeonBits {
110110
escaped_mask_vec(v).bitmask()
111111
}
112112

113-
#[target_feature(enable = "neon")]
114-
pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
113+
#[inline(always)]
114+
pub fn format_string(value: &str, dst: &mut [u8]) -> usize {
115115
unsafe {
116116
let slice = value.as_bytes();
117-
let mut sptr = slice.as_ptr();
118117
let mut dptr = dst.as_mut_ptr();
119118
let dstart = dptr;
120119
let mut nb: usize = slice.len();
121120

122121
*dptr = b'"';
123122
dptr = dptr.add(1);
124123

124+
dptr = crate::simd::v128::format_raw(value, dptr);
125+
126+
*dptr = b'"';
127+
dptr = dptr.add(1);
128+
dptr as usize - dstart as usize
129+
}
130+
}
131+
132+
#[inline(always)]
133+
pub fn format_unquoted(value: &str, dst: &mut [u8]) -> usize {
134+
unsafe {
135+
let slice = value.as_bytes();
136+
let mut dptr = dst.as_mut_ptr();
137+
let dstart = dptr;
138+
let mut nb: usize = slice.len();
139+
140+
dptr = crate::simd::v128::format_raw(value, dptr);
141+
142+
dptr as usize - dstart as usize
143+
}
144+
}
145+
146+
#[target_feature(enable = "neon")]
147+
pub unsafe fn format_raw(value: &str, mut dptr: *mut u8) -> *mut u8 {
148+
unsafe {
149+
let slice = value.as_bytes();
150+
let mut sptr = slice.as_ptr();
151+
let mut nb: usize = slice.len();
152+
125153
// Process CHUNK (4 * LANES = 64 bytes) at a time
126154
while nb >= CHUNK {
127155
// Load 4 SIMD vectors
@@ -267,8 +295,6 @@ pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
267295
}
268296
}
269297

270-
*dptr = b'"';
271-
dptr = dptr.add(1);
272-
dptr as usize - dstart as usize
298+
dptr
273299
}
274300
}

src/simd/sse2.rs

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,18 +100,47 @@ fn escaped_mask(v: Simd128u) -> u16 {
100100
v.bitmask()
101101
}
102102

103-
#[target_feature(enable = "sse2")]
104-
pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
103+
#[inline(always)]
104+
pub fn format_string(value: &str, dst: &mut [u8]) -> usize {
105105
unsafe {
106106
let slice = value.as_bytes();
107-
let mut sptr = slice.as_ptr();
108107
let mut dptr = dst.as_mut_ptr();
109108
let dstart = dptr;
110109
let mut nb: usize = slice.len();
111110

112111
*dptr = b'"';
113112
dptr = dptr.add(1);
114113

114+
dptr = crate::simd::v128::format_raw(value, dptr);
115+
116+
*dptr = b'"';
117+
dptr = dptr.add(1);
118+
dptr as usize - dstart as usize
119+
}
120+
}
121+
122+
#[inline(always)]
123+
pub fn format_unquoted(value: &str, dst: &mut [u8]) -> usize {
124+
unsafe {
125+
let slice = value.as_bytes();
126+
let mut dptr = dst.as_mut_ptr();
127+
let dstart = dptr;
128+
let mut nb: usize = slice.len();
129+
130+
dptr = crate::simd::v128::format_raw(value, dptr);
131+
132+
dptr as usize - dstart as usize
133+
}
134+
}
135+
136+
#[target_feature(enable = "sse2")]
137+
pub unsafe fn format_raw(value: &str, dptr: *mut u8) -> *mut u8 {
138+
unsafe {
139+
let slice = value.as_bytes();
140+
let mut sptr = slice.as_ptr();
141+
let dstart = dptr;
142+
let mut nb: usize = slice.len();
143+
115144
// Process CHUNK (4 * LANES = 64 bytes) at a time
116145
while nb >= CHUNK {
117146
// Load 4 SIMD vectors
@@ -257,8 +286,6 @@ pub unsafe fn format_string(value: &str, dst: &mut [u8]) -> usize {
257286
}
258287
}
259288

260-
*dptr = b'"';
261-
dptr = dptr.add(1);
262-
dptr as usize - dstart as usize
289+
dptr
263290
}
264291
}

0 commit comments

Comments
 (0)