-
Notifications
You must be signed in to change notification settings - Fork 336
Expand file tree
/
Copy pathCopyPasteDetectorPlugin.kt
More file actions
88 lines (79 loc) · 3.15 KB
/
CopyPasteDetectorPlugin.kt
File metadata and controls
88 lines (79 loc) · 3.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
package datadog.gradle.plugin.lint
import org.gradle.api.Plugin
import org.gradle.api.Project
import java.io.File
class CopyPasteDetectorPlugin : Plugin<Project> {
override fun apply(target: Project) {
target.tasks.register("checkCodeDuplication") {
group = "verification"
description = "Detect copy-pasted code using hash-based method body comparison"
doLast {
val dirs = listOf("dd-java-agent/instrumentation", "dd-trace-core", "internal-api")
.map { target.rootProject.file(it) }
.filter { it.exists() }
val methodHashes = mutableMapOf<Int, MutableList<String>>()
var totalFiles = 0
dirs.forEach { dir ->
dir.walkTopDown()
.filter { it.isFile && it.extension == "java" && !it.path.contains("/build/") && !it.path.contains("/generated/") }
.forEach { file ->
totalFiles++
extractMethodBodies(file).forEach { (name, body) ->
val normalized = normalizeCode(body)
if (normalized.length > 200) {
val hash = normalized.hashCode()
val location = "${file.relativeTo(target.rootProject.projectDir).path}:$name"
methodHashes.getOrPut(hash) { mutableListOf() }.add(location)
}
}
}
}
val duplicates = methodHashes.filter { it.value.size > 1 }
if (duplicates.isNotEmpty()) {
target.logger.warn("CPD: Found ${duplicates.size} group(s) of duplicate methods across $totalFiles files:")
duplicates.entries.take(20).forEach { (_, locations) ->
target.logger.warn(" DUPLICATE GROUP (${locations.size} copies):")
locations.forEach { loc -> target.logger.warn(" - $loc") }
}
if (duplicates.size > 20) {
target.logger.warn(" ... and ${duplicates.size - 20} more groups")
}
} else {
target.logger.lifecycle("CPD: No significant duplicates found across $totalFiles files")
}
}
}
}
private fun extractMethodBodies(file: File): List<Pair<String, String>> {
val content = file.readText()
val results = mutableListOf<Pair<String, String>>()
val methodPattern = Regex("""(?:public|private|protected|static|final|synchronized|\s)+[\w<>\[\],\s]+\s+(\w+)\s*\([^)]*\)[^{]*\{""")
methodPattern.findAll(content).forEach { match ->
val methodName = match.groupValues[1]
val startIdx = match.range.last + 1
var braceCount = 1
var idx = startIdx
while (idx < content.length && braceCount > 0) {
when (content[idx]) {
'{' -> braceCount++
'}' -> braceCount--
}
idx++
}
if (braceCount == 0) {
val body = content.substring(startIdx, idx - 1)
if (body.lines().size >= 5) {
results.add(methodName to body)
}
}
}
return results
}
private fun normalizeCode(code: String): String {
return code.lines()
.map { it.trim() }
.filter { it.isNotEmpty() && !it.startsWith("//") && !it.startsWith("*") }
.joinToString("\n")
.replace(Regex("""\s+"""), " ")
}
}