|
28 | 28 | import lombok.*; |
29 | 29 | import lombok.extern.slf4j.Slf4j; |
30 | 30 | import org.apache.commons.codec.digest.DigestUtils; |
| 31 | +import org.apache.commons.io.FilenameUtils; |
31 | 32 | import org.apache.tika.Tika; |
32 | 33 | import org.apache.tika.mime.MediaType; |
33 | 34 | import org.apache.tika.mime.MediaTypeRegistry; |
|
37 | 38 | import java.io.IOException; |
38 | 39 | import java.nio.charset.Charset; |
39 | 40 | import java.nio.file.Files; |
| 41 | +import java.nio.file.Path; |
40 | 42 | import java.util.*; |
41 | 43 | import java.util.zip.CRC32C; |
42 | 44 | import java.util.zip.Checksum; |
@@ -69,6 +71,12 @@ public class Winnowing { |
69 | 71 | @Builder.Default |
70 | 72 | private int snippetLimit = MAX_LONG_LINE_CHARS; // Enable limiting of size of a single line of snippet generation |
71 | 73 |
|
| 74 | + private Map<String, Path> obfuscationMap; |
| 75 | + |
| 76 | + public Path getRealFilePathFor(String obfuscatedPath) { |
| 77 | + return obfuscationMap.get(obfuscatedPath); |
| 78 | + } |
| 79 | + |
72 | 80 | /** |
73 | 81 | * Calculate the WFP (fingerprint) for the given file |
74 | 82 | * |
@@ -112,7 +120,14 @@ public String wfpForContents(@NonNull String filename, Boolean binFile, byte[] c |
112 | 120 | char[] fileContents = (new String(contents, Charset.defaultCharset())).toCharArray(); |
113 | 121 | String fileMD5 = DigestUtils.md5Hex(contents); |
114 | 122 | StringBuilder wfpBuilder = new StringBuilder(); |
115 | | - // TODO add obfuscation of the filename here |
| 123 | + |
| 124 | + if (obfuscate) { |
| 125 | + String extension = FilenameUtils.getExtension(filename); |
| 126 | + String obfuscatedPath = obfuscationMap.size() + "." + extension; |
| 127 | + this.obfuscationMap.put(obfuscatedPath, Path.of(filename)); |
| 128 | + filename = obfuscatedPath; |
| 129 | + } |
| 130 | + |
116 | 131 | wfpBuilder.append(String.format("file=%s,%d,%s\n", fileMD5, contents.length, filename)); |
117 | 132 | if (binFile || this.skipSnippets || this.skipSnippets(filename, fileContents)) { |
118 | 133 | return wfpBuilder.toString(); |
|
0 commit comments