Skip to content

Commit 3fbe03a

Browse files
authored
perf(package_json): skip building unused heavy fields during parse (#279)
1 parent 9b84c23 commit 3fbe03a

1 file changed

Lines changed: 71 additions & 17 deletions

File tree

src/package_json/simd.rs

Lines changed: 71 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@ use std::{
99
};
1010

1111
use camino::Utf8Path;
12+
use serde::de::{Deserialize, Deserializer, IgnoredAny, MapAccess, Visitor};
1213
use simd_json::{
13-
borrowed::Value, prelude::*, to_borrowed_value, BorrowedValue, Error as SimdParseError,
14+
borrowed::{Object, Value},
15+
prelude::*,
16+
serde::from_slice,
17+
to_borrowed_value, BorrowedValue, Error as SimdParseError, ObjectHasher,
1418
};
1519

1620
use crate::{path::PathUtil, ResolveError};
@@ -23,6 +27,60 @@ pub use simd_json::BorrowedValue as JSONValue;
2327

2428
use crate::package_json::{ModuleType, SideEffects};
2529

30+
/// Top-level `package.json` fields the resolver never reads. They are skipped
31+
/// while deserializing so their (often large) sub-trees are never built into the
32+
/// retained DOM. This is the same set the `package_json_raw_json_api` path used
33+
/// to strip after parsing — only now they are never allocated in the first place.
34+
const SKIPPED_FIELDS: [&str; 7] = [
35+
"dependencies",
36+
"devDependencies",
37+
"peerDependencies",
38+
"optionalDependencies",
39+
"scripts",
40+
"description",
41+
"keywords",
42+
];
43+
44+
/// A `package.json` object with [`SKIPPED_FIELDS`] dropped during deserialization.
45+
/// Built exactly like simd-json's own object visitor (borrowed keys + values),
46+
/// only short-circuiting the skipped keys to `IgnoredAny` so their values are
47+
/// drained without allocating any container.
48+
struct FilteredObject<'a>(Object<'a>);
49+
50+
impl<'de> Deserialize<'de> for FilteredObject<'de> {
51+
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
52+
struct ObjectVisitor;
53+
54+
impl<'de> Visitor<'de> for ObjectVisitor {
55+
type Value = FilteredObject<'de>;
56+
57+
fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
58+
formatter.write_str("a package.json object")
59+
}
60+
61+
fn visit_map<A: MapAccess<'de>>(self, mut map: A) -> Result<Self::Value, A::Error> {
62+
let size = map.size_hint().unwrap_or_default();
63+
let mut object = Object::with_capacity_and_hasher(size, ObjectHasher::default());
64+
while let Some(key) = map.next_key::<&str>()? {
65+
if SKIPPED_FIELDS.contains(&key) {
66+
map.next_value::<IgnoredAny>()?;
67+
} else {
68+
object.insert(key.into(), map.next_value::<Value>()?);
69+
}
70+
}
71+
Ok(FilteredObject(object))
72+
}
73+
}
74+
75+
deserializer.deserialize_map(ObjectVisitor)
76+
}
77+
}
78+
79+
/// Whether the first non-whitespace byte is `{`, i.e. the JSON root is an object.
80+
fn is_object_root(buf: &[u8]) -> bool {
81+
buf.iter().find(|b| !b.is_ascii_whitespace()) == Some(&b'{')
82+
}
83+
2684
pub struct JSONCell {
2785
value: BorrowedValue<'static>,
2886
buf: Vec<u8>,
@@ -31,22 +89,18 @@ pub struct JSONCell {
3189

3290
impl JSONCell {
3391
pub fn try_new(mut buf: Vec<u8>) -> Result<Self, SimdParseError> {
34-
let value = to_borrowed_value(&mut buf)?;
35-
// SAFETY: This is safe because `buf` is owned by the `JSONCell` struct,
36-
#[allow(unused_mut)]
37-
let mut value =
38-
unsafe { std::mem::transmute::<BorrowedValue<'_>, BorrowedValue<'static>>(value) };
39-
40-
#[cfg(feature = "package_json_raw_json_api")]
41-
if let Some(json_object) = value.as_object_mut() {
42-
json_object.remove("description");
43-
json_object.remove("keywords");
44-
json_object.remove("scripts");
45-
json_object.remove("dependencies");
46-
json_object.remove("devDependencies");
47-
json_object.remove("peerDependencies");
48-
json_object.remove("optionalDependencies");
49-
}
92+
// Every real package.json is a JSON object; parse those field-by-field so the
93+
// skipped fields never allocate. Non-object roots keep the generic parse so
94+
// behavior (e.g. an empty `PackageJson` for a malformed file) is unchanged.
95+
let value = if is_object_root(&buf) {
96+
Value::Object(Box::new(from_slice::<FilteredObject>(&mut buf)?.0))
97+
} else {
98+
to_borrowed_value(&mut buf)?
99+
};
100+
101+
// SAFETY: `value` only borrows from `buf`, which is moved into and owned by the
102+
// returned `JSONCell`, so the borrowed data outlives every `borrow_dependent`.
103+
let value = unsafe { std::mem::transmute::<BorrowedValue<'_>, BorrowedValue<'static>>(value) };
50104

51105
Ok(Self {
52106
value,

0 commit comments

Comments
 (0)