|
4 | 4 | // Regex Filter Plugin - Rust Implementation |
5 | 5 |
|
6 | 6 | use std::borrow::Cow; |
| 7 | +use std::collections::HashSet; |
7 | 8 | use std::sync::Once; |
8 | 9 |
|
9 | 10 | use log::debug; |
10 | 11 | use pyo3::prelude::*; |
11 | 12 | use pyo3::types::{PyDict, PyList, PyModule, PyTuple}; |
12 | 13 | use pyo3_stub_gen::define_stub_info_gatherer; |
| 14 | +use pyo3_stub_gen::derive::*; |
13 | 15 | use regex::{Regex, RegexSet}; |
14 | 16 |
|
15 | 17 | pub mod plugin; |
16 | 18 |
|
| 19 | +const MAX_NESTED_DEPTH: usize = 64; |
| 20 | + |
| 21 | +enum TraversalResult { |
| 22 | + Unchanged(Py<PyAny>), |
| 23 | + Modified(Py<PyAny>), |
| 24 | +} |
| 25 | + |
17 | 26 | #[derive(Debug, Clone)] |
18 | 27 | pub struct SearchReplace { |
19 | 28 | pub search: String, |
@@ -85,118 +94,185 @@ impl SearchReplaceConfig { |
85 | 94 | } |
86 | 95 | } |
87 | 96 |
|
| 97 | +#[gen_stub_pyclass] |
88 | 98 | #[derive(Debug)] |
89 | 99 | #[pyclass] |
90 | 100 | pub struct SearchReplacePluginRust { |
91 | 101 | pub config: SearchReplaceConfig, |
92 | 102 | } |
93 | 103 |
|
94 | | -#[pymethods] |
95 | | -impl SearchReplacePluginRust { |
96 | | - #[new] |
97 | | - pub fn new(config_dict: &Bound<'_, PyDict>) -> PyResult<Self> { |
98 | | - let config = SearchReplaceConfig::from_py_dict(config_dict).map_err(|error| { |
99 | | - PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("Invalid config: {}", error)) |
100 | | - })?; |
101 | | - Ok(Self { config }) |
| 104 | +fn apply_patterns_impl<'a>(config: &'a SearchReplaceConfig, text: &'a str) -> Cow<'a, str> { |
| 105 | + if let Some(ref pattern_set) = config.pattern_set |
| 106 | + && !pattern_set.is_match(text) |
| 107 | + { |
| 108 | + return Cow::Borrowed(text); |
102 | 109 | } |
103 | 110 |
|
104 | | - pub fn apply_patterns(&self, text: &str) -> String { |
105 | | - if let Some(ref pattern_set) = self.config.pattern_set |
106 | | - && !pattern_set.is_match(text) |
107 | | - { |
108 | | - return text.to_string(); |
| 111 | + let mut result = Cow::Borrowed(text); |
| 112 | + let mut modified = false; |
| 113 | + |
| 114 | + for pattern in &config.words { |
| 115 | + if pattern.compiled.is_match(&result) { |
| 116 | + let replaced = pattern.compiled.replace_all(&result, &pattern.replace); |
| 117 | + if let Cow::Owned(new_text) = replaced { |
| 118 | + result = Cow::Owned(new_text); |
| 119 | + modified = true; |
| 120 | + } else if modified { |
| 121 | + result = Cow::Owned(replaced.into_owned()); |
| 122 | + } |
| 123 | + } |
| 124 | + } |
| 125 | + |
| 126 | + result |
| 127 | +} |
| 128 | + |
| 129 | +fn process_nested_impl( |
| 130 | + plugin: &SearchReplacePluginRust, |
| 131 | + py: Python<'_>, |
| 132 | + data: &Bound<'_, PyAny>, |
| 133 | + depth: usize, |
| 134 | + seen: &mut HashSet<usize>, |
| 135 | +) -> PyResult<TraversalResult> { |
| 136 | + if depth >= MAX_NESTED_DEPTH { |
| 137 | + return Err(pyo3::exceptions::PyValueError::new_err(format!( |
| 138 | + "Maximum nested depth of {} exceeded", |
| 139 | + MAX_NESTED_DEPTH |
| 140 | + ))); |
| 141 | + } |
| 142 | + |
| 143 | + if let Ok(text) = data.extract::<String>() { |
| 144 | + let modified_text = apply_patterns_impl(&plugin.config, &text); |
| 145 | + return match modified_text { |
| 146 | + Cow::Borrowed(_) => Ok(TraversalResult::Unchanged(data.clone().unbind())), |
| 147 | + Cow::Owned(value) => Ok(TraversalResult::Modified( |
| 148 | + value.into_pyobject(py)?.into_any().unbind(), |
| 149 | + )), |
| 150 | + }; |
| 151 | + } |
| 152 | + |
| 153 | + if let Ok(dict) = data.cast::<PyDict>() { |
| 154 | + let identity = dict.as_ptr() as usize; |
| 155 | + if !seen.insert(identity) { |
| 156 | + return Err(pyo3::exceptions::PyValueError::new_err( |
| 157 | + "Cyclic containers are not supported", |
| 158 | + )); |
109 | 159 | } |
110 | 160 |
|
111 | | - let mut result = Cow::Borrowed(text); |
112 | | - let mut modified = false; |
113 | | - |
114 | | - for pattern in &self.config.words { |
115 | | - if pattern.compiled.is_match(&result) { |
116 | | - let replaced = pattern.compiled.replace_all(&result, &pattern.replace); |
117 | | - if let Cow::Owned(new_text) = replaced { |
118 | | - result = Cow::Owned(new_text); |
119 | | - modified = true; |
120 | | - } else if modified { |
121 | | - result = Cow::Owned(replaced.into_owned()); |
| 161 | + let mut any_modified = false; |
| 162 | + let mut processed_items = Vec::with_capacity(dict.len()); |
| 163 | + for (key, value) in dict.iter() { |
| 164 | + match process_nested_impl(plugin, py, &value, depth + 1, seen)? { |
| 165 | + TraversalResult::Unchanged(new_value) => { |
| 166 | + processed_items.push((key.clone().unbind(), new_value)); |
| 167 | + } |
| 168 | + TraversalResult::Modified(new_value) => { |
| 169 | + any_modified = true; |
| 170 | + processed_items.push((key.clone().unbind(), new_value)); |
122 | 171 | } |
123 | 172 | } |
124 | 173 | } |
| 174 | + seen.remove(&identity); |
125 | 175 |
|
126 | | - result.into_owned() |
| 176 | + if !any_modified { |
| 177 | + return Ok(TraversalResult::Unchanged(data.clone().unbind())); |
| 178 | + } |
| 179 | + |
| 180 | + let new_dict = PyDict::new(py); |
| 181 | + for (key, value) in processed_items { |
| 182 | + new_dict.set_item(key.bind(py), value.bind(py))?; |
| 183 | + } |
| 184 | + return Ok(TraversalResult::Modified(new_dict.into_any().unbind())); |
127 | 185 | } |
128 | 186 |
|
129 | | - pub fn process_nested( |
130 | | - &self, |
131 | | - py: Python<'_>, |
132 | | - data: &Bound<'_, PyAny>, |
133 | | - ) -> PyResult<(bool, Py<PyAny>)> { |
134 | | - if let Ok(text) = data.extract::<String>() { |
135 | | - let modified_text = self.apply_patterns(&text); |
136 | | - if modified_text == text { |
137 | | - return Ok((false, data.clone().unbind())); |
138 | | - } |
139 | | - return Ok((true, modified_text.into_pyobject(py)?.into_any().unbind())); |
| 187 | + if let Ok(list) = data.cast::<PyList>() { |
| 188 | + let identity = list.as_ptr() as usize; |
| 189 | + if !seen.insert(identity) { |
| 190 | + return Err(pyo3::exceptions::PyValueError::new_err( |
| 191 | + "Cyclic containers are not supported", |
| 192 | + )); |
140 | 193 | } |
141 | 194 |
|
142 | | - if let Ok(dict) = data.cast::<PyDict>() { |
143 | | - let mut any_modified = false; |
144 | | - let mut processed_items = Vec::with_capacity(dict.len()); |
145 | | - for (key, value) in dict.iter() { |
146 | | - let (item_modified, new_value) = self.process_nested(py, &value)?; |
147 | | - any_modified |= item_modified; |
148 | | - processed_items.push((key.clone().unbind(), new_value)); |
| 195 | + let mut any_modified = false; |
| 196 | + let mut new_items = Vec::with_capacity(list.len()); |
| 197 | + for item in list.iter() { |
| 198 | + match process_nested_impl(plugin, py, &item, depth + 1, seen)? { |
| 199 | + TraversalResult::Unchanged(new_item) => new_items.push(new_item), |
| 200 | + TraversalResult::Modified(new_item) => { |
| 201 | + any_modified = true; |
| 202 | + new_items.push(new_item); |
| 203 | + } |
149 | 204 | } |
| 205 | + } |
| 206 | + seen.remove(&identity); |
150 | 207 |
|
151 | | - if !any_modified { |
152 | | - return Ok((false, data.clone().unbind())); |
153 | | - } |
| 208 | + if !any_modified { |
| 209 | + return Ok(TraversalResult::Unchanged(data.clone().unbind())); |
| 210 | + } |
154 | 211 |
|
155 | | - let new_dict = PyDict::new(py); |
156 | | - for (key, value) in processed_items { |
157 | | - new_dict.set_item(key.bind(py), value.bind(py))?; |
158 | | - } |
159 | | - return Ok((true, new_dict.into_any().unbind())); |
| 212 | + let new_list = PyList::empty(py); |
| 213 | + for item in new_items { |
| 214 | + new_list.append(item.bind(py))?; |
160 | 215 | } |
| 216 | + return Ok(TraversalResult::Modified(new_list.into_any().unbind())); |
| 217 | + } |
161 | 218 |
|
162 | | - if let Ok(list) = data.cast::<PyList>() { |
163 | | - let mut any_modified = false; |
164 | | - let mut new_items = Vec::with_capacity(list.len()); |
165 | | - for item in list.iter() { |
166 | | - let (item_modified, new_item) = self.process_nested(py, &item)?; |
167 | | - any_modified |= item_modified; |
168 | | - new_items.push(new_item); |
169 | | - } |
| 219 | + if let Ok(tuple) = data.cast::<PyTuple>() { |
| 220 | + let identity = tuple.as_ptr() as usize; |
| 221 | + if !seen.insert(identity) { |
| 222 | + return Err(pyo3::exceptions::PyValueError::new_err( |
| 223 | + "Cyclic containers are not supported", |
| 224 | + )); |
| 225 | + } |
170 | 226 |
|
171 | | - if !any_modified { |
172 | | - return Ok((false, data.clone().unbind())); |
| 227 | + let mut any_modified = false; |
| 228 | + let mut new_items = Vec::with_capacity(tuple.len()); |
| 229 | + for item in tuple.iter() { |
| 230 | + match process_nested_impl(plugin, py, &item, depth + 1, seen)? { |
| 231 | + TraversalResult::Unchanged(new_item) => new_items.push(new_item), |
| 232 | + TraversalResult::Modified(new_item) => { |
| 233 | + any_modified = true; |
| 234 | + new_items.push(new_item); |
| 235 | + } |
173 | 236 | } |
| 237 | + } |
| 238 | + seen.remove(&identity); |
174 | 239 |
|
175 | | - let new_list = PyList::empty(py); |
176 | | - for item in new_items { |
177 | | - new_list.append(item.bind(py))?; |
178 | | - } |
179 | | - return Ok((true, new_list.into_any().unbind())); |
| 240 | + if !any_modified { |
| 241 | + return Ok(TraversalResult::Unchanged(data.clone().unbind())); |
180 | 242 | } |
181 | 243 |
|
182 | | - if let Ok(tuple) = data.cast::<PyTuple>() { |
183 | | - let mut any_modified = false; |
184 | | - let mut new_items = Vec::with_capacity(tuple.len()); |
185 | | - for item in tuple.iter() { |
186 | | - let (item_modified, new_item) = self.process_nested(py, &item)?; |
187 | | - any_modified |= item_modified; |
188 | | - new_items.push(new_item); |
189 | | - } |
| 244 | + let new_tuple = PyTuple::new(py, new_items.iter().map(|item| item.bind(py)))?; |
| 245 | + return Ok(TraversalResult::Modified(new_tuple.into_any().unbind())); |
| 246 | + } |
190 | 247 |
|
191 | | - if !any_modified { |
192 | | - return Ok((false, data.clone().unbind())); |
193 | | - } |
| 248 | + Ok(TraversalResult::Unchanged(data.clone().unbind())) |
| 249 | +} |
194 | 250 |
|
195 | | - let new_tuple = PyTuple::new(py, new_items.iter().map(|item| item.bind(py)))?; |
196 | | - return Ok((true, new_tuple.into_any().unbind())); |
197 | | - } |
| 251 | +#[gen_stub_pymethods] |
| 252 | +#[pymethods] |
| 253 | +impl SearchReplacePluginRust { |
| 254 | + #[new] |
| 255 | + pub fn new(config_dict: &Bound<'_, PyDict>) -> PyResult<Self> { |
| 256 | + let config = SearchReplaceConfig::from_py_dict(config_dict).map_err(|error| { |
| 257 | + PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("Invalid config: {}", error)) |
| 258 | + })?; |
| 259 | + Ok(Self { config }) |
| 260 | + } |
198 | 261 |
|
199 | | - Ok((false, data.clone().unbind())) |
| 262 | + pub fn apply_patterns(&self, text: &str) -> String { |
| 263 | + apply_patterns_impl(&self.config, text).into_owned() |
| 264 | + } |
| 265 | + |
| 266 | + pub fn process_nested( |
| 267 | + &self, |
| 268 | + py: Python<'_>, |
| 269 | + data: &Bound<'_, PyAny>, |
| 270 | + ) -> PyResult<(bool, Py<PyAny>)> { |
| 271 | + let mut seen = HashSet::new(); |
| 272 | + Ok(match process_nested_impl(self, py, data, 0, &mut seen)? { |
| 273 | + TraversalResult::Unchanged(value) => (false, value), |
| 274 | + TraversalResult::Modified(value) => (true, value), |
| 275 | + }) |
200 | 276 | } |
201 | 277 | } |
202 | 278 |
|
|
0 commit comments