Skip to content

Commit f0de74c

Browse files
committed
Revert old changes and update change to how we handle error nodes to be python specific on imports
1 parent 49e09d8 commit f0de74c

3 files changed

Lines changed: 107 additions & 107 deletions

File tree

crates/static-analysis-kernel/src/analysis/analyze.rs

Lines changed: 51 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use crate::analysis::ddsa_lib::js::flow::java::{ClassGraph, FileGraph};
33
use crate::analysis::ddsa_lib::runtime::ExecutionResult;
44
use crate::analysis::ddsa_lib::JsRuntime;
55
use crate::analysis::generated_content::{is_generated_file, is_minified_file};
6-
use crate::analysis::tree_sitter::{get_tree, get_tree_sitter_language, has_missing, TSQuery};
6+
use crate::analysis::tree_sitter::{get_tree, get_tree_sitter_language, TSQuery};
77
use crate::model::analysis::{
88
FileIgnoreBehavior, LinesToIgnore, ERROR_RULE_EXECUTION, ERROR_RULE_TIMEOUT,
99
};
@@ -183,12 +183,6 @@ where
183183
}
184184
return vec![];
185185
};
186-
187-
// skip the file if there is an error or missing node
188-
if tree.root_node().has_error() || has_missing(&tree.root_node()) {
189-
return vec![];
190-
}
191-
192186
let tree = Arc::new(tree);
193187
let cst_parsing_time = now.elapsed();
194188

@@ -444,52 +438,6 @@ def foo(arg1):
444438
)
445439
}
446440

447-
#[test]
448-
fn test_error_node_skips_file() {
449-
let rule = RuleInternal {
450-
name: "myrule".to_string(),
451-
short_description: None,
452-
description: None,
453-
category: RuleCategory::CodeStyle,
454-
severity: RuleSeverity::Notice,
455-
language: Language::Python,
456-
code: "function visit(node, filename, code) {}".to_string(),
457-
tree_sitter_query: get_query(QUERY_CODE, &Language::Python).unwrap(),
458-
};
459-
let results = analyze(
460-
&Language::Python,
461-
&vec![rule],
462-
&Arc::from("myfile.py"),
463-
&Arc::from("x = "), // incomplete assignment produces an ERROR node
464-
&RuleConfig::default(),
465-
&AnalysisOptions::default(),
466-
);
467-
assert!(results.is_empty());
468-
}
469-
470-
#[test]
471-
fn test_missing_node_skips_file() {
472-
let rule = RuleInternal {
473-
name: "myrule".to_string(),
474-
short_description: None,
475-
description: None,
476-
category: RuleCategory::CodeStyle,
477-
severity: RuleSeverity::Notice,
478-
language: Language::JavaScript,
479-
code: "function visit(node, filename, code) {}".to_string(),
480-
tree_sitter_query: get_query("(identifier) @name", &Language::JavaScript).unwrap(),
481-
};
482-
let results = analyze(
483-
&Language::JavaScript,
484-
&vec![rule],
485-
&Arc::from("myfile.js"),
486-
&Arc::from("function foo() {"), // missing closing "}" produces a MISSING node
487-
&RuleConfig::default(),
488-
&AnalysisOptions::default(),
489-
);
490-
assert!(results.is_empty());
491-
}
492-
493441
// execution time must be more than 0
494442
#[test]
495443
fn test_execution_time() {
@@ -866,6 +814,56 @@ def foo2(arg1):
866814
assert_eq!(2, suppressed.len());
867815
}
868816

817+
#[test]
818+
fn test_violation_ignore_taint_flow() {
819+
// language=java
820+
let text = "\
821+
class Test {
822+
// An ignore on a taint flow region (not the base region of the violation):
823+
// no-dd-sa
824+
void test(String input) {
825+
String a = input;
826+
var b = a;
827+
execute(b);
828+
}
829+
}
830+
";
831+
let ts_query = "\
832+
(argument_list (identifier) @arg)
833+
";
834+
// language=javascript
835+
let rule_code = r#"
836+
function visit(captures) {
837+
const arg = captures.get("arg");
838+
const sourceFlows = ddsa.getTaintSources(arg);
839+
const v = Violation.new("flow violation", sourceFlows[0]);
840+
addError(v);
841+
}
842+
"#;
843+
844+
let rule = RuleInternal {
845+
name: "java-security/flow-rule".to_string(),
846+
short_description: None,
847+
description: None,
848+
category: RuleCategory::Security,
849+
severity: RuleSeverity::Error,
850+
language: Language::Java,
851+
code: rule_code.to_string(),
852+
tree_sitter_query: get_query(ts_query, &Language::Java).unwrap(),
853+
};
854+
855+
let analysis_options = AnalysisOptions::default();
856+
let results = analyze(
857+
&Language::Python,
858+
&vec![rule],
859+
&Arc::from("file.java"),
860+
&Arc::from(text),
861+
&RuleConfig::default(),
862+
&analysis_options,
863+
);
864+
assert!(results[0].violations.is_empty());
865+
}
866+
869867
fn assert_lines_to_ignore(code: String, language: Language, rule: &'static str) {
870868
let lines_to_ignore = get_lines_to_ignore(code.as_str(), &language);
871869
assert_eq!(1, lines_to_ignore.lines_to_ignore_per_rule.len());

crates/static-analysis-kernel/src/analysis/languages/python/imports.rs

Lines changed: 55 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -232,14 +232,17 @@ fn parse_import_statement<'tree, 'text: 'tree>(
232232
// We loop so that if creating the `Import` fails, we can try any subsequent imports within
233233
// this node (e.g. in a comma-separated list) to avoid prematurely yielding `None` on the iterator.
234234
while idx < node.named_child_count() {
235-
debug_assert_eq!(node.field_name_for_named_child(idx as u32), Some("name"));
235+
let field_name = node.field_name_for_named_child(idx as u32);
236236
let name_node = node.named_child(idx).expect("should be in-bounds");
237-
let parsed = parse_field_child_node(source_code, name_node);
238-
239237
idx += 1;
240238

241-
if let Ok(import) = Import::try_new(parsed.full_text, parsed.alias, None, false) {
242-
return Some(import);
239+
if field_name != Some("name") {
240+
continue;
241+
}
242+
if let Some(parsed) = parse_field_child_node(source_code, name_node) {
243+
if let Ok(import) = Import::try_new(parsed.full_text, parsed.alias, None, false) {
244+
return Some(import);
245+
}
243246
}
244247
}
245248
None
@@ -254,7 +257,8 @@ fn parse_import_from_statement<'text>(
254257
debug_assert_eq!(node.kind(), "import_from_statement");
255258
let module_name_node = node.child_by_field_name("module_name").expect(FIELD_EXISTS);
256259
let is_relative = module_name_node.kind() == "relative_import";
257-
let parsed_module = parse_field_child_node(source_code, module_name_node);
260+
let parsed_module = parse_field_child_node(source_code, module_name_node)
261+
.ok_or_else(|| format!("invalid node type `{}`", module_name_node.kind()))?;
258262
// Python syntax invariant: the module of a "from" import can't have an alias.
259263
debug_assert!(parsed_module.alias.is_none());
260264
// tree-sitter grammar invariant: a valid wildcard import must end in a `wildcard_import` node.
@@ -269,13 +273,13 @@ fn parse_import_from_statement<'text>(
269273
let field_children = node.children_by_field_name("name", &mut cursor);
270274
let entities = field_children
271275
.into_iter()
272-
.map(|child| {
273-
let parsed = parse_field_child_node(source_code, child);
276+
.filter_map(|child| {
277+
let parsed = parse_field_child_node(source_code, child)?;
274278
// tree-sitter grammar invariant: these children represent entities, never a module.
275-
Entity {
279+
Some(Entity {
276280
name: parsed.full_text,
277281
alias: parsed.alias,
278-
}
282+
})
279283
})
280284
.collect::<Vec<_>>();
281285
ImportEntities::Specific(entities)
@@ -298,13 +302,13 @@ fn parse_future_import_statement<'text>(
298302
let field_children = node.children_by_field_name("name", &mut cursor);
299303
let entities = field_children
300304
.into_iter()
301-
.map(|child| {
302-
let parsed = parse_field_child_node(source_code, child);
305+
.filter_map(|child| {
306+
let parsed = parse_field_child_node(source_code, child)?;
303307
// tree-sitter grammar invariant: these children represent entities, never a module.
304-
Entity {
308+
Some(Entity {
305309
name: parsed.full_text,
306310
alias: parsed.alias,
307-
}
311+
})
308312
})
309313
.collect::<Vec<_>>();
310314
Import::try_new(
@@ -328,32 +332,32 @@ fn parse_future_import_statement<'text>(
328332
fn parse_field_child_node<'text>(
329333
source_code: &'text str,
330334
node: tree_sitter::Node,
331-
) -> MaybeAliased<'text> {
335+
) -> Option<MaybeAliased<'text>> {
332336
match node.kind() {
333337
"relative_import" => {
334338
// (relative_import (import_prefix) (dotted_name))
335339
for i in 0..node.child_count() {
336340
let child = node.child(i).expect("i should be in-bounds");
337341
if child.kind() == "dotted_name" {
338-
return MaybeAliased {
342+
return Some(MaybeAliased {
339343
full_text: ts_node_text(source_code, child),
340344
alias: None,
341-
};
345+
});
342346
}
343347
}
344348
// Otherwise, this `relative_import` only contains an `import_prefix` node,
345349
// so return the entire node's text (which will consist of one or more "."):
346-
MaybeAliased {
350+
Some(MaybeAliased {
347351
full_text: ts_node_text(source_code, node),
348352
alias: None,
349-
}
353+
})
350354
}
351355
"dotted_name" => {
352356
// (dotted_name (identifier)+)
353-
MaybeAliased {
357+
Some(MaybeAliased {
354358
full_text: ts_node_text(source_code, node),
355359
alias: None,
356-
}
360+
})
357361
}
358362
"aliased_import" => {
359363
// (aliased_import name: (dotted_name) alias: (identifier))
@@ -363,18 +367,20 @@ fn parse_field_child_node<'text>(
363367
let alias_node = node.child_by_field_name("alias").expect(FIELD_EXISTS);
364368
debug_assert_eq!(alias_node.kind(), "identifier");
365369
let alias = ts_node_text(source_code, alias_node);
366-
MaybeAliased {
370+
Some(MaybeAliased {
367371
full_text,
368372
alias: Some(alias),
369-
}
373+
})
370374
}
371-
other => panic!("invalid node type `{other}`"),
375+
_ => None,
372376
}
373377
}
374378

375379
#[cfg(test)]
376380
mod tests {
377-
use super::{parse_imports, Entity, Import, ImportEntities};
381+
use super::{parse_imports, parse_imports_with_tree, Entity, Import, ImportEntities};
382+
use crate::analysis::tree_sitter::get_tree;
383+
use crate::model::common::Language;
378384

379385
/// A shorthand to build an [`Entity`] without an alias.
380386
pub fn ent(name: &str) -> Entity<'_> {
@@ -477,6 +483,30 @@ mod tests {
477483
}
478484
}
479485
}
486+
487+
#[test]
488+
fn error_node_does_not_panic() {
489+
let code = "import foo, + bar";
490+
let tree = get_tree(code, &Language::Python).unwrap();
491+
assert!(tree.root_node().has_error());
492+
let imports = parse_imports_with_tree(code, &tree);
493+
assert_eq!(
494+
imports,
495+
vec![
496+
Import::try_new("foo", None, None, false).unwrap(),
497+
Import::try_new("bar", None, None, false).unwrap(),
498+
]
499+
);
500+
}
501+
502+
#[test]
503+
fn all_error_nodes_does_not_panic() {
504+
let code = "import +, +";
505+
let tree = get_tree(code, &Language::Python).unwrap();
506+
assert!(tree.root_node().has_error());
507+
let imports = parse_imports_with_tree(code, &tree);
508+
assert!(imports.is_empty());
509+
}
480510
}
481511

482512
/// mod for documenting (intentionally) "incorrect" parsing behavior.

crates/static-analysis-kernel/src/analysis/tree_sitter.rs

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::collections::HashMap;
66
use std::sync::Arc;
77
use std::time::Duration;
88
use streaming_iterator::StreamingIterator;
9-
use tree_sitter::{CaptureQuantifier, Node};
9+
use tree_sitter::CaptureQuantifier;
1010

1111
pub fn get_tree_sitter_language(language: &Language) -> tree_sitter::Language {
1212
extern "C" {
@@ -70,21 +70,6 @@ pub fn get_tree(code: &str, language: &Language) -> Option<tree_sitter::Tree> {
7070
tree_sitter_parser.parse(code, None)
7171
}
7272

73-
// traverse from a node and return whether there is a missing node or not
74-
pub fn has_missing(node: &Node) -> bool {
75-
if node.is_missing() {
76-
return true;
77-
}
78-
79-
for i in 0..node.child_count() {
80-
if has_missing(&node.child(i).unwrap()) {
81-
return true;
82-
}
83-
}
84-
85-
false
86-
}
87-
8873
// build the query from tree-sitter
8974
pub fn get_query(
9075
query_code: &str,
@@ -736,17 +721,4 @@ SELECT * FROM table WHERE column = 'value';
736721
assert_eq!(None, superclasses.field_name);
737722
assert!(query_node.captures.contains_key("classname"));
738723
}
739-
740-
#[test]
741-
fn test_has_missing_detects_missing_node() {
742-
// "function foo() {" is missing the closing "}" and here tree-sitter inserts a MISSING node
743-
let tree = get_tree("function foo() {", &Language::JavaScript).unwrap();
744-
assert!(has_missing(&tree.root_node()));
745-
}
746-
747-
#[test]
748-
fn test_has_missing_returns_false_for_valid_tree() {
749-
let tree = get_tree("def foo(): pass", &Language::Python).unwrap();
750-
assert!(!has_missing(&tree.root_node()));
751-
}
752724
}

0 commit comments

Comments
 (0)