Skip to content

Commit e1dc401

Browse files
committed
refactor: simplify type-arg matching with specialized matcher and condition-on-Result
- Encode return-type constraints as IsType conditions on PositionBase.Result rather than via SerializedSignatureMatcher.Partial.return; drop the now-unused return field on Partial. - Make ClassPattern.typeArgs nullable (null = no type-args / raw match). - Specialize SerializedTypeNameMatcher into TypeArgMatcher during rule resolution: name matchers are pre-compiled into ConditionNameMatcher, so the runtime evaluator dispatches on a small structural shape instead of running matchErasedName on a serialized matcher. - Replace JIRBasicAtomEvaluator's typedMethod+ASM-debug-info path with a PositionResolver<JIRType?> for resolving the typed view at a position. - Treat WildcardType as AnyType: collapse it at action translation, then drop the now-dead SerializedTypeNameMatcher.Wildcard / TypeArgMatcher.Wildcard variants. Java's <?> is the supertype of any concrete parameterization, so ResponseEntity<?> accepts any ResponseEntity<X>; A5 sample updated to flip the parameterized form from Negative to Positive. - resolveIsType now forces a typed-view check for ClassPattern/Array (instead of returning mkTrue early on erased-name match) so a raw pattern correctly rejects parameterized forms when the typed view is available.
1 parent bff66e9 commit e1dc401

15 files changed

Lines changed: 144 additions & 154 deletions

File tree

core/opentaint-configuration-rules/configuration-rules-jvm/src/main/kotlin/org/opentaint/dataflow/configuration/jvm/SerializedTypeMatching.kt

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,18 @@ fun SerializedTypeNameMatcher.matchType(
3030
type: JIRType,
3131
erasedMatch: SerializedTypeNameMatcher.(String) -> Boolean,
3232
): Boolean = when {
33-
this is SerializedTypeNameMatcher.Wildcard -> type is JIRUnboundWildcard
34-
35-
this is SerializedTypeNameMatcher.ClassPattern && typeArgs.isEmpty() && type is JIRClassType ->
33+
this is SerializedTypeNameMatcher.ClassPattern && typeArgs == null && type is JIRClassType ->
3634
erasedMatch(type.erasedName()) && type.isRawLike()
3735

38-
this is SerializedTypeNameMatcher.ClassPattern && typeArgs.isEmpty() ->
36+
this is SerializedTypeNameMatcher.ClassPattern && typeArgs == null ->
3937
erasedMatch(type.erasedName())
4038

41-
this is SerializedTypeNameMatcher.ClassPattern && type is JIRClassType ->
39+
this is SerializedTypeNameMatcher.ClassPattern && type is JIRClassType -> {
40+
val args = typeArgs!!
4241
erasedMatch(type.erasedName()) &&
43-
typeArgs.size == type.typeArguments.size &&
44-
typeArgs.zip(type.typeArguments).all { (m, a) -> m.matchType(a, erasedMatch) }
42+
args.size == type.typeArguments.size &&
43+
args.zip(type.typeArguments).all { (m, a) -> m.matchType(a, erasedMatch) }
44+
}
4545

4646
this is SerializedTypeNameMatcher.Array && type is JIRArrayType ->
4747
element.matchType(type.elementType, erasedMatch)
@@ -57,7 +57,7 @@ fun SerializedTypeNameMatcher.matchType(
5757
* pass-through rules whose return/parameter types show up as type variables
5858
* when resolved via the declaring class (e.g. `List.get` returns `E`).
5959
*/
60-
private fun JIRType.erasedName(): String = when (this) {
60+
fun JIRType.erasedName(): String = when (this) {
6161
is JIRClassType -> jIRClass.name
6262
is JIRTypeVariable -> jIRClass.name
6363
is JIRUnboundWildcard -> jIRClass.name

core/opentaint-configuration-rules/configuration-rules-jvm/src/main/kotlin/org/opentaint/dataflow/configuration/jvm/TaintCondition.kt

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.opentaint.dataflow.configuration.jvm
22

3-
import org.opentaint.dataflow.configuration.jvm.serialized.SerializedTypeNameMatcher
43
import org.opentaint.ir.api.jvm.JIRType
54
import java.util.Objects
65

@@ -117,10 +116,22 @@ sealed interface ConditionNameMatcher {
117116
data class PatternStartsWith(val prefix: String) : ConditionNameMatcher
118117
}
119118

119+
fun ConditionNameMatcher.match(name: String): Boolean = when (this) {
120+
is ConditionNameMatcher.PatternEndsWith -> name.endsWith(suffix)
121+
is ConditionNameMatcher.PatternStartsWith -> name.startsWith(prefix)
122+
is ConditionNameMatcher.Simple -> match(name)
123+
}
124+
125+
fun ConditionNameMatcher.Simple.match(name: String): Boolean = when (this) {
126+
is ConditionNameMatcher.Pattern -> pattern.containsMatchIn(name)
127+
is ConditionNameMatcher.Concrete -> this.name == name
128+
is ConditionNameMatcher.AnyName -> true
129+
}
130+
120131
data class TypeMatchesPattern(
121132
val position: Position,
122133
val pattern: ConditionNameMatcher,
123-
val typeArgs: List<SerializedTypeNameMatcher> = emptyList(),
134+
val typeArgs: List<TypeArgMatcher>? = null,
124135
) : Condition {
125136
override fun <R> accept(conditionVisitor: ConditionVisitor<R>): R = conditionVisitor.visit(this)
126137
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package org.opentaint.dataflow.configuration.jvm
2+
3+
import org.opentaint.ir.api.jvm.JIRArrayType
4+
import org.opentaint.ir.api.jvm.JIRClassType
5+
import org.opentaint.ir.api.jvm.JIRType
6+
7+
/**
8+
* A type-argument matcher that has been pre-resolved during rule resolution:
9+
* the erased-name matchers are already compiled to [ConditionNameMatcher],
10+
* so runtime evaluation only needs to dispatch on the structure.
11+
*/
12+
sealed interface TypeArgMatcher {
13+
fun matchType(type: JIRType): Boolean
14+
15+
data class Class(
16+
val name: ConditionNameMatcher,
17+
// null = no type-args constraint (matches raw / declared erasure).
18+
val typeArgs: List<TypeArgMatcher>?,
19+
) : TypeArgMatcher {
20+
override fun matchType(type: JIRType): Boolean {
21+
if (!name.match(type.erasedName())) return false
22+
23+
if (typeArgs == null) {
24+
return if (type is JIRClassType) type.isRawLike() else true
25+
}
26+
27+
if (type !is JIRClassType) return true
28+
if (typeArgs.size != type.typeArguments.size) return false
29+
return typeArgs.zip(type.typeArguments).all { (m, a) -> m.matchType(a) }
30+
}
31+
}
32+
33+
data class Array(val element: TypeArgMatcher) : TypeArgMatcher {
34+
override fun matchType(type: JIRType): Boolean =
35+
type is JIRArrayType && element.matchType(type.elementType)
36+
}
37+
}

core/opentaint-configuration-rules/configuration-rules-jvm/src/main/kotlin/org/opentaint/dataflow/configuration/jvm/serialized/SerializedNameMatcher.kt

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,13 @@ sealed interface SerializedTypeNameMatcher {
1919
data class ClassPattern(
2020
val `package`: SerializedSimpleNameMatcher,
2121
val `class`: SerializedSimpleNameMatcher,
22-
val typeArgs: List<SerializedTypeNameMatcher> = emptyList()
22+
// null = no type-args constraint (matches raw / declared erasure).
23+
// empty list is reserved for an explicit zero-arg parameterization.
24+
val typeArgs: List<SerializedTypeNameMatcher>? = null
2325
) : SerializedTypeNameMatcher
2426

2527
@Serializable
2628
data class Array(val element: SerializedTypeNameMatcher) : SerializedTypeNameMatcher
27-
28-
/**
29-
* Matches only an unbounded Java wildcard (`?`) at a type-argument slot.
30-
* Distinct from an "any" [ClassPattern] so a pattern like `Foo<?>` does not
31-
* match a concrete parameterization like `Foo<String>`.
32-
*/
33-
@Serializable
34-
data object Wildcard : SerializedTypeNameMatcher
3529
}
3630

3731
@Serializable(with = SimpleNameMatcherSerializer::class)

core/opentaint-configuration-rules/configuration-rules-jvm/src/main/kotlin/org/opentaint/dataflow/configuration/jvm/serialized/SerializedSignatureMatcher.kt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ sealed interface SerializedSignatureMatcher {
2727
@Serializable
2828
data class Partial(
2929
val params: List<SerializedArgMatcher>? = null,
30-
val `return`: SerializedTypeNameMatcher? = null
3130
) : SerializedSignatureMatcher
3231
}
3332

core/opentaint-dataflow-core/opentaint-jvm-dataflow/src/main/kotlin/org/opentaint/dataflow/jvm/ap/ifds/JIRMarkAwareConditionRewriter.kt

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,24 @@ import org.opentaint.dataflow.jvm.ap.ifds.analysis.JIRMethodAnalysisContext
1111
import org.opentaint.dataflow.jvm.ap.ifds.taint.ContainsMarkOnAnyField
1212
import org.opentaint.dataflow.jvm.ap.ifds.taint.JIRBasicAtomEvaluator
1313
import org.opentaint.ir.api.common.cfg.CommonInst
14-
import org.opentaint.ir.api.jvm.JIRTypedMethod
14+
import org.opentaint.ir.api.jvm.JIRType
1515

1616
/**
17-
* [typedMethod] enables generic-type-argument matching in `TypeMatchesPattern`
18-
* atoms (see [JIRBasicAtomEvaluator.resolveGenericType]). When null, matching
19-
* falls back to erased-name comparison — type-arg predicates in the rule will
20-
* silently pass regardless of the runtime parameterization. Pass the typed
21-
* view of the analyzed method whenever available.
17+
* [positionTypeResolver] enables generic-type-argument matching in
18+
* `TypeMatchesPattern` atoms by resolving each position to a typed
19+
* [JIRType]. When null, matching falls back to erased-name comparison —
20+
* type-arg predicates in the rule will silently pass regardless of the
21+
* runtime parameterization.
2222
*/
2323
class JIRMarkAwareConditionRewriter(
2424
positionResolver: PositionResolver<CallPositionValue>,
2525
factTypeChecker: JIRFactTypeChecker,
2626
aliasAnalysis: JIRLocalAliasAnalysis?,
2727
statement: CommonInst,
28-
typedMethod: JIRTypedMethod? = null,
28+
positionTypeResolver: PositionResolver<JIRType?>? = null,
2929
) {
30-
private val positiveAtomEvaluator = JIRBasicAtomEvaluator(negated = false, positionResolver, factTypeChecker, aliasAnalysis, statement, typedMethod)
31-
private val negativeAtomEvaluator = JIRBasicAtomEvaluator(negated = true, positionResolver, factTypeChecker, aliasAnalysis, statement, typedMethod)
30+
private val positiveAtomEvaluator = JIRBasicAtomEvaluator(negated = false, positionResolver, factTypeChecker, aliasAnalysis, statement, positionTypeResolver)
31+
private val negativeAtomEvaluator = JIRBasicAtomEvaluator(negated = true, positionResolver, factTypeChecker, aliasAnalysis, statement, positionTypeResolver)
3232

3333
constructor(
3434
positionResolver: PositionResolver<CallPositionValue>,

core/opentaint-dataflow-core/opentaint-jvm-dataflow/src/main/kotlin/org/opentaint/dataflow/jvm/ap/ifds/taint/JIRBasicAtomEvaluator.kt

Lines changed: 8 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,12 @@ import org.opentaint.dataflow.jvm.ap.ifds.JIRLocalAliasAnalysis
2929
import org.opentaint.dataflow.jvm.ap.ifds.JIRLocalAliasAnalysis.AliasAllocInfo
3030
import org.opentaint.dataflow.jvm.ap.ifds.JIRLocalAliasAnalysis.AliasApInfo
3131
import org.opentaint.dataflow.jvm.ap.ifds.JIRLocalAliasAnalysis.AliasInfo
32-
import org.opentaint.dataflow.configuration.jvm.matchType
33-
import org.opentaint.dataflow.configuration.jvm.serialized.SerializedSimpleNameMatcher
34-
import org.opentaint.dataflow.configuration.jvm.serialized.SerializedTypeNameMatcher
32+
import org.opentaint.dataflow.configuration.jvm.match
3533
import org.opentaint.ir.api.common.cfg.CommonInst
3634
import org.opentaint.ir.api.common.cfg.CommonValue
3735
import org.opentaint.ir.api.jvm.JIRClassType
3836
import org.opentaint.ir.api.jvm.JIRRefType
3937
import org.opentaint.ir.api.jvm.JIRType
40-
import org.opentaint.ir.api.jvm.JIRTypedMethod
4138
import org.opentaint.ir.api.jvm.cfg.JIRBool
4239
import org.opentaint.ir.api.jvm.cfg.JIRCallExpr
4340
import org.opentaint.ir.api.jvm.cfg.JIRConstant
@@ -57,7 +54,7 @@ class JIRBasicAtomEvaluator(
5754
private val typeChecker: JIRFactTypeChecker,
5855
private val aliasAnalysis: JIRLocalAliasAnalysis?,
5956
private val statement: CommonInst,
60-
private val typedMethod: JIRTypedMethod? = null,
57+
private val positionTypeResolver: PositionResolver<JIRType?>? = null,
6158
) : ConditionVisitor<Boolean> {
6259
override fun visit(condition: Not): Boolean = error("Non-atomic condition")
6360
override fun visit(condition: And): Boolean = error("Non-atomic condition")
@@ -356,12 +353,13 @@ class JIRBasicAtomEvaluator(
356353
}
357354
}
358355

359-
if (condition.typeArgs.isNotEmpty()) {
360-
val genericType = resolveGenericType(value)
356+
val typeArgs = condition.typeArgs
357+
if (typeArgs != null) {
358+
val genericType = positionTypeResolver?.resolve(condition.position)
361359
if (genericType is JIRClassType) {
362-
if (genericType.typeArguments.size != condition.typeArgs.size) return false
363-
return condition.typeArgs.zip(genericType.typeArguments).all { (matcher, arg) ->
364-
matcher.matchType(arg) { name -> matchErasedName(name) }
360+
if (genericType.typeArguments.size != typeArgs.size) return false
361+
return typeArgs.zip(genericType.typeArguments).all { (matcher, arg) ->
362+
matcher.matchType(arg)
365363
}
366364
}
367365
return true
@@ -370,53 +368,6 @@ class JIRBasicAtomEvaluator(
370368
return true
371369
}
372370

373-
private fun resolveGenericType(value: JIRValue): JIRType? {
374-
val localVar = value as? JIRLocalVar ?: return null
375-
val typedMethod = typedMethod ?: return null
376-
val method = (statement as? JIRInst)?.location?.method ?: return null
377-
val localVarNode = method.withAsmNode { methodNode ->
378-
methodNode.localVariables?.find { lvn -> lvn.index == localVar.index }
379-
} ?: return null
380-
// typedMethod.typeOf can throw on unresolved references / malformed
381-
// debug info; skip generic-aware matching rather than aborting the
382-
// atom evaluation.
383-
return try {
384-
typedMethod.typeOf(localVarNode)
385-
} catch (_: Exception) {
386-
null
387-
}
388-
}
389-
390-
private fun SerializedTypeNameMatcher.matchErasedName(name: String): Boolean = when (this) {
391-
is SerializedSimpleNameMatcher.Simple -> value == name || name.endsWith(".$value")
392-
is SerializedSimpleNameMatcher.Pattern -> Regex(pattern).containsMatchIn(name)
393-
is SerializedTypeNameMatcher.ClassPattern -> {
394-
val lastDot = name.lastIndexOf('.')
395-
val pkgName = if (lastDot >= 0) name.substring(0, lastDot) else ""
396-
val clsName = if (lastDot >= 0) name.substring(lastDot + 1) else name
397-
`package`.matchErasedName(pkgName) && `class`.matchErasedName(clsName)
398-
}
399-
is SerializedTypeNameMatcher.Array -> {
400-
val nameWithout = name.removeSuffix("[]")
401-
name != nameWithout && element.matchErasedName(nameWithout)
402-
}
403-
// A wildcard matcher is only meaningful at a type-argument slot; it has
404-
// no erased-name projection to compare against a string.
405-
is SerializedTypeNameMatcher.Wildcard -> false
406-
}
407-
408-
private fun ConditionNameMatcher.match(name: String): Boolean = when (this) {
409-
is ConditionNameMatcher.PatternEndsWith -> name.endsWith(suffix)
410-
is ConditionNameMatcher.PatternStartsWith -> name.startsWith(prefix)
411-
is ConditionNameMatcher.Simple -> match(name)
412-
}
413-
414-
private fun ConditionNameMatcher.Simple.match(name: String): Boolean = when (this) {
415-
is ConditionNameMatcher.Pattern -> pattern.containsMatchIn(name)
416-
is ConditionNameMatcher.Concrete -> this.name == name
417-
is ConditionNameMatcher.AnyName -> true
418-
}
419-
420371
private fun Position.eval(
421372
none: Boolean = false,
422373
value: (value: JIRValue) -> Boolean,

core/opentaint-java-querylang/samples/src/main/java/example/RuleWithWildcardGeneric.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ ResponseEntity<String> methodReturningResponseEntityString(String data) {
2020
}
2121

2222
/**
23-
* Wildcard ResponseEntity&lt;?&gt; is a valid Java construct that the rule
24-
* pattern also expresses. Keeping it as a Positive to pin the current behavior.
23+
* Wildcard ResponseEntity&lt;?&gt; trivially matches the &lt;?&gt; rule
24+
* pattern.
2525
*/
2626
final static class PositiveWildcard extends RuleWithWildcardGeneric {
2727
@Override
@@ -32,10 +32,12 @@ public void entrypoint() {
3232
}
3333

3434
/**
35-
* ResponseEntity&lt;String&gt; is a concrete parameterized form and must not
36-
* match a wildcard &lt;?&gt; type argument in the rule pattern.
35+
* ResponseEntity&lt;String&gt; is a concrete parameterization. Java's
36+
* unbounded wildcard `?` is the supertype of any `X`, so `&lt;?&gt;`
37+
* accepts any concrete type argument — `ResponseEntity&lt;String&gt;`
38+
* matches.
3739
*/
38-
final static class NegativeConcreteDoesNotMatch extends RuleWithWildcardGeneric {
40+
final static class PositiveConcreteMatchesWildcard extends RuleWithWildcardGeneric {
3941
@Override
4042
public void entrypoint() {
4143
String data = "tainted";

core/opentaint-java-querylang/src/main/kotlin/org/opentaint/semgrep/pattern/conversion/ParamCondition.kt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@ sealed interface TypeNamePattern {
3131
}
3232

3333
/**
34-
* Java unbounded wildcard `?` as a type argument. Unlike [AnyType], which
35-
* is an unconstrained matcher that subsumes any type, [WildcardType] only
36-
* matches an unbounded wildcard at the corresponding type-argument slot.
34+
* Java unbounded wildcard `?` as a type argument. Java's `?` is the
35+
* supertype of any concrete parameterization, so a `Foo<?>` pattern
36+
* accepts any `Foo<X>` — semantically equivalent to [AnyType] at a
37+
* type-argument slot.
3738
*/
3839
@Serializable
3940
data object WildcardType : TypeNamePattern {

core/opentaint-java-querylang/src/main/kotlin/org/opentaint/semgrep/pattern/conversion/taint/AutomataToTaintRuleConversion.kt

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -566,21 +566,16 @@ private fun TaintRuleGenerationCtx.evaluateFormulaSignature(
566566
}
567567
}
568568

569-
// Convert return type to signature matcher (must apply to all builder paths)
569+
// Encode the return-type constraint as an IsType condition on the Result
570+
// position rather than on the signature matcher.
570571
val returnType = signature.returnType
571572
if (returnType != null) {
572573
val returnTypeFormula = typeMatcher(returnType, semgrepRuleTrace)
573-
val returnTypeMatcher = when (returnTypeFormula) {
574-
null -> null
575-
is MetaVarConstraintFormula.Constraint -> returnTypeFormula.constraint
576-
else -> null
577-
}
578-
if (returnTypeMatcher != null) {
574+
if (returnTypeFormula != null) {
579575
for (builder in buildersWithMethodName) {
580-
builder.signature = SerializedSignatureMatcher.Partial(
581-
params = null,
582-
`return` = returnTypeMatcher
583-
)
576+
builder.conditions += returnTypeFormula.toSerializedCondition { typeNameMatcher ->
577+
SerializedCondition.IsType(typeNameMatcher, PositionBase.Result)
578+
}
584579
}
585580
}
586581
}
@@ -628,10 +623,6 @@ private fun TaintRuleGenerationCtx.evaluateFormulaSignature(
628623
is Pattern -> {
629624
TODO("Signature class name pattern")
630625
}
631-
632-
is SerializedTypeNameMatcher.Wildcard -> {
633-
TODO("Signature class is a wildcard")
634-
}
635626
}
636627

637628
builders.mapTo(buildersWithClass) { builder ->
@@ -854,8 +845,8 @@ private fun TaintRuleGenerationCtx.typeMatcher(
854845
// Preserve arity of typeArgs: a metavar like $T or AnyType that
855846
// produces null still takes a slot in the type-arg list with an
856847
// "any" matcher, so the outer matcher remains distinguishable
857-
// from a raw (zero-type-arg) form.
858-
val serializedTypeArgs = typeName.typeArgs.map {
848+
// from a raw (no-type-arg) form.
849+
val serializedTypeArgs = typeName.typeArgs.takeIf { it.isNotEmpty() }?.map {
859850
(typeMatcher(it, semgrepRuleTrace) as? MetaVarConstraintFormula.Constraint<SerializedTypeNameMatcher>)?.constraint
860851
?: anyClassPattern()
861852
}
@@ -901,11 +892,11 @@ private fun TaintRuleGenerationCtx.typeMatcher(
901892
}
902893
}
903894

904-
is TypeNamePattern.AnyType -> null
905-
906-
is TypeNamePattern.WildcardType -> MetaVarConstraintFormula.Constraint(
907-
SerializedTypeNameMatcher.Wildcard
908-
)
895+
// `<?>` is the supertype of any concrete parameterization, so a
896+
// wildcard slot has the same matching semantics as an unconstrained
897+
// matcher — collapse it into [AnyType] at translation time.
898+
is TypeNamePattern.AnyType,
899+
is TypeNamePattern.WildcardType -> null
909900

910901
is TypeNamePattern.MetaVar -> {
911902
val constraints = metaVarInfo.constraints[typeName.metaVar]

0 commit comments

Comments
 (0)