|
19 | 19 |
|
20 | 20 | package org.apache.cassandra.spark.bulkwriter; |
21 | 21 |
|
| 22 | +import java.io.File; |
22 | 23 | import java.io.IOException; |
23 | 24 | import java.math.BigInteger; |
| 25 | +import java.nio.ByteBuffer; |
24 | 26 | import java.nio.file.DirectoryStream; |
25 | 27 | import java.nio.file.Files; |
26 | 28 | import java.nio.file.Path; |
| 29 | +import java.util.AbstractMap; |
27 | 30 | import java.util.ArrayList; |
28 | 31 | import java.util.Arrays; |
| 32 | +import java.util.Collections; |
29 | 33 | import java.util.HashSet; |
30 | 34 | import java.util.List; |
31 | 35 | import java.util.Map; |
32 | 36 | import java.util.Set; |
| 37 | +import java.util.SortedMap; |
| 38 | +import java.util.TreeMap; |
33 | 39 | import java.util.concurrent.CountDownLatch; |
34 | 40 | import java.util.concurrent.ExecutorService; |
35 | 41 | import java.util.concurrent.Executors; |
36 | 42 | import java.util.concurrent.Future; |
37 | 43 | import java.util.concurrent.TimeUnit; |
38 | 44 | import java.util.stream.Collectors; |
39 | 45 |
|
| 46 | +import com.google.common.collect.ImmutableList; |
40 | 47 | import com.google.common.collect.ImmutableMap; |
41 | 48 | import com.google.common.util.concurrent.Uninterruptibles; |
42 | 49 | import org.junit.jupiter.api.AfterAll; |
|
45 | 52 | import org.junit.jupiter.params.ParameterizedTest; |
46 | 53 | import org.junit.jupiter.params.provider.MethodSource; |
47 | 54 |
|
| 55 | +import org.apache.cassandra.bridge.BloomFilter; |
| 56 | +import org.apache.cassandra.bridge.CassandraBridge; |
| 57 | +import org.apache.cassandra.bridge.CassandraBridgeFactory; |
48 | 58 | import org.apache.cassandra.bridge.CassandraVersion; |
49 | 59 | import org.apache.cassandra.bridge.CassandraVersionFeatures; |
50 | 60 | import org.apache.cassandra.bridge.SSTableDescriptor; |
51 | 61 | import org.apache.cassandra.spark.bulkwriter.token.ConsistencyLevel; |
52 | 62 | import org.apache.cassandra.spark.bulkwriter.token.TokenRangeMapping; |
53 | 63 | import org.apache.cassandra.spark.common.Digest; |
| 64 | +import org.apache.cassandra.spark.data.CqlTable; |
| 65 | +import org.apache.cassandra.spark.data.FileSystemSSTable; |
| 66 | +import org.apache.cassandra.spark.data.ReplicationFactor; |
| 67 | +import org.apache.cassandra.spark.data.partitioner.Partitioner; |
| 68 | +import org.apache.cassandra.spark.stats.BufferingInputStreamStats; |
54 | 69 | import org.apache.cassandra.spark.utils.XXHash32DigestAlgorithm; |
55 | 70 | import org.jetbrains.annotations.NotNull; |
56 | 71 |
|
@@ -148,6 +163,122 @@ public void canCreateWriterForVersion(String version) throws IOException |
148 | 163 | tw.validateSSTables(writerContext, tw.getOutDir(), dataFilePaths); |
149 | 164 | } |
150 | 165 |
|
| 166 | + @ParameterizedTest |
| 167 | + @MethodSource("supportedVersions") |
| 168 | + public void testBloomFilterRebuild(String version) throws IOException |
| 169 | + { |
| 170 | + int rowCount = 50_000; |
| 171 | + CassandraBridge bridge = CassandraBridgeFactory.get(version); |
| 172 | + MockBulkWriterContext writerContext = new MockBulkWriterContext(tokenRangeMapping, version, ConsistencyLevel.CL.LOCAL_QUORUM); |
| 173 | + Partitioner partitioner = writerContext.getPartitioner(); |
| 174 | + CqlTable cqlTable = bridge.buildSchema(writerContext.schema().getTableSchema().createStatement, |
| 175 | + writerContext.qualifiedTableName().keyspace(), |
| 176 | + new ReplicationFactor(ReplicationFactor.ReplicationStrategy.SimpleStrategy, |
| 177 | + ImmutableMap.of("replication_factor", 1)), |
| 178 | + partitioner, |
| 179 | + Collections.emptySet()); |
| 180 | + SortedMap<BigInteger, List<String>> sortedKeys = new TreeMap<>(); |
| 181 | + for (int i = 0; i < rowCount; ++i) |
| 182 | + { |
| 183 | + List<String> keys = ImmutableList.of(String.valueOf(i), "1"); |
| 184 | + AbstractMap.SimpleEntry<ByteBuffer, BigInteger> partitionKey = bridge.getPartitionKey(cqlTable, partitioner, keys); |
| 185 | + sortedKeys.put(partitionKey.getValue(), keys); |
| 186 | + } |
| 187 | + |
| 188 | + SortedSSTableWriter tw = new SortedSSTableWriter(writerContext, tmpDir, new XXHash32DigestAlgorithm(), 1); |
| 189 | + List<SSTableDescriptor> allSSTables = new ArrayList<>(); |
| 190 | + tw.setSSTablesProducedListener(allSSTables::addAll); |
| 191 | + for (BigInteger token : sortedKeys.keySet()) |
| 192 | + { |
| 193 | + List<String> partitionKey = sortedKeys.get(token); |
| 194 | + tw.addRow(token, |
| 195 | + ImmutableMap.of("id", Integer.parseInt(partitionKey.get(0)), |
| 196 | + "date", Integer.parseInt(partitionKey.get(1)), |
| 197 | + "course", "foo", "marks", 1)); |
| 198 | + } |
| 199 | + tw.close(writerContext); |
| 200 | + |
| 201 | + assertThat(allSSTables).hasSize(1); |
| 202 | + |
| 203 | + Set<Path> filterFilePaths = new HashSet<>(); |
| 204 | + try (DirectoryStream<Path> filterFileStream = Files.newDirectoryStream(tw.getOutDir(), "*-Filter.db")) |
| 205 | + { |
| 206 | + filterFileStream.forEach(filterFilePaths::add); |
| 207 | + } |
| 208 | + |
| 209 | + assertThat(filterFilePaths).hasSize(1); |
| 210 | + |
| 211 | + Path filterFile = filterFilePaths.iterator().next(); |
| 212 | + String dataFileName = filterFile.toFile().getName().replace("-Filter", "-Data"); |
| 213 | + Path dataFilePath = filterFile.getParent().resolve(dataFileName); |
| 214 | + FileSystemSSTable ssTable = new FileSystemSSTable(dataFilePath, false, BufferingInputStreamStats::doNothingStats); |
| 215 | + |
| 216 | + BloomFilter bloomFilter = bridge.openBloomFilter(partitioner, |
| 217 | + writerContext.qualifiedTableName().keyspace(), |
| 218 | + writerContext.qualifiedTableName().table(), |
| 219 | + ssTable); |
| 220 | + |
| 221 | + // second column is always set to 1 when inserting data |
| 222 | + List<ByteBuffer> searchKeys = bridge.encodePartitionKeys(partitioner, |
| 223 | + writerContext.qualifiedTableName().keyspace(), |
| 224 | + writerContext.schema().getTableSchema().createStatement, |
| 225 | + ImmutableList.of(ImmutableList.of("1", "1"), ImmutableList.of("7", "2"))); |
| 226 | + |
| 227 | + assertThat(bloomFilter.mightContain(searchKeys.get(0))).isTrue(); |
| 228 | + // Flaky assertion: bloom filters can answer false positive, but since we are using limited data set, |
| 229 | + // it is unlikely to happen. |
| 230 | + assertThat(bloomFilter.doesNotContain(searchKeys.get(1))).isTrue(); |
| 231 | + } |
| 232 | + |
| 233 | + @ParameterizedTest |
| 234 | + @MethodSource("supportedVersions") |
| 235 | + public void testBloomFilterRebuildErrorHandling(String version) throws IOException |
| 236 | + { |
| 237 | + MockBulkWriterContext writerContext = new MockBulkWriterContext(tokenRangeMapping, version, ConsistencyLevel.CL.LOCAL_QUORUM); |
| 238 | + SortedSSTableWriter tw = new SortedSSTableWriter(writerContext, tmpDir, new XXHash32DigestAlgorithm(), 1) |
| 239 | + { |
| 240 | + protected void rebuildFilterComponents(@NotNull BulkWriterContext writerContext, |
| 241 | + @NotNull DirectoryStream.Filter<Path> filter) throws IOException |
| 242 | + { |
| 243 | + // temporarily move index file to simulate error in bloom filter rebuild process |
| 244 | + try (DirectoryStream<Path> indexFileStream = Files.newDirectoryStream(getOutDir(), "*.db")) |
| 245 | + { |
| 246 | + indexFileStream.forEach(indexFilePath -> { |
| 247 | + for (String indexSuffix : Arrays.asList("Partitions.db", "Index.db")) |
| 248 | + { |
| 249 | + if (indexFilePath.toFile().getName().endsWith(indexSuffix)) |
| 250 | + { |
| 251 | + File indexFile = indexFilePath.toFile(); |
| 252 | + boolean moved = indexFile.renameTo(new File(indexFile.getAbsolutePath() + "_hidden")); |
| 253 | + assertThat(moved).isTrue(); |
| 254 | + } |
| 255 | + } |
| 256 | + }); |
| 257 | + } |
| 258 | + super.rebuildFilterComponents(writerContext, filter); |
| 259 | + // move the index files back |
| 260 | + try (DirectoryStream<Path> hiddenFileStream = Files.newDirectoryStream(getOutDir(), "*_hidden")) |
| 261 | + { |
| 262 | + hiddenFileStream.forEach(hiddenFilePath -> { |
| 263 | + File hiddenFile = hiddenFilePath.toFile(); |
| 264 | + boolean moved = hiddenFile.renameTo(new File(hiddenFile.getParent(), hiddenFile.getName().replace("_hidden", ""))); |
| 265 | + assertThat(moved).isTrue(); |
| 266 | + }); |
| 267 | + } |
| 268 | + } |
| 269 | + }; |
| 270 | + List<SSTableDescriptor> allSSTables = new ArrayList<>(); |
| 271 | + tw.setSSTablesProducedListener(allSSTables::addAll); |
| 272 | + tw.addRow(BigInteger.ONE, ImmutableMap.of("id", 1, "date", 1, "course", "foo", "marks", 1)); |
| 273 | + tw.close(writerContext); |
| 274 | + assertThat(allSSTables).hasSize(1); |
| 275 | + // verify that bloom filter was not created |
| 276 | + try (DirectoryStream<Path> filterFileStream = Files.newDirectoryStream(tw.getOutDir(), "*Filter.db")) |
| 277 | + { |
| 278 | + assertThat(filterFileStream.iterator().hasNext()).isFalse(); |
| 279 | + } |
| 280 | + } |
| 281 | + |
151 | 282 | /** |
152 | 283 | * Tests the race condition fix between prepareSStablesToSend (called from background threads) |
153 | 284 | * and close (called from the main thread). This test exercises CASSANALYTICS-107. |
|
0 commit comments