2323import org .apache .ignite .cluster .ClusterNode ;
2424import org .apache .ignite .lang .IgniteBiPredicate ;
2525
26- /** */
26+ /**
27+ * Multi-data center affinity backup filter that ensures each partition's data is distributed across multiple data centers,
28+ * providing high availability and fault tolerance. This implementation guarantees at least one copy of the data in each
29+ * data center and attempts to maintain the configured backup factor without discarding copies.
30+ * <p>
31+ * The filter works by grouping nodes based on their data center identification attribute (@see {@link ClusterNode#dataCenterId()})
32+ * and ensuring that for every partition, at least one node from each data center is included in the primary-backup set.
33+ * <p>
34+ * The filter will discard backup copies only if the number of available nodes in a given data center is less
35+ * than the number of copies assigned to that data center.
36+ * For example, if a partition has 4 copies (1 primary and 3 backups) and the cluster has 2 data centers,
37+ * than 2 copies are assigned to each data center. The only scenario when just a single copy is assigned to a node in a data center is when
38+ * the number of nodes in that data center is one.
39+ * <p>
40+ * This class is constructed with a number of data centers the cluster spans and a number of backups of the cache this filter is applied to.
41+ * Implementation expects that all copies can be spread evenly across all data centers. In other words, (backups + 1) is divisible by
42+ * number of data centers without remainder. Uneven distributions of copies are not supported.
43+ * <p>
44+ * Warning: Ensure that all nodes have a consistent and valid data center identifier attribute. Missing or inconsistent values
45+ * may lead to unexpected placement of data.
46+ * </pre>
47+ * <h2 class="header">Spring Example</h2>
48+ * Create a partitioned cache template where each data center has at least one copy of the data, and the backup count is maintained.
49+ * <pre name="code" class="xml">
50+ * <property name="cacheConfiguration">
51+ * <list>
52+ * <bean id="cache-template-bean" abstract="true" class="org.apache.ignite.configuration.CacheConfiguration">
53+ * <property name="name" value="JobcaseDefaultCacheConfig*"/>
54+ * <property name="cacheMode" value="PARTITIONED" />
55+ * <property name="backups" value="3" />
56+ * <property name="affinity">
57+ * <bean class="org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction">
58+ * <property name="affinityBackupFilter">
59+ * <bean class="org.apache.ignite.cache.affinity.rendezvous.MdcAffinityBackupFilter">
60+ * <constructor-arg value="2"/> <!-- dcsNumber -->
61+ * <constructor-arg value="3"/> <!-- backups, the same as in the cache template -->
62+ * </bean>
63+ * </property>
64+ * </bean>
65+ * </property>
66+ * </bean>
67+ * </list>
68+ * </property>
69+ * </pre>
70+ * <p>
71+ * With more backups, additional replicas can be distributed across different data centers to further improve redundancy.
72+ */
2773public class MdcAffinityBackupFilter implements IgniteBiPredicate <ClusterNode , List <ClusterNode >> {
2874 /** */
2975 private static final long serialVersionUID = 1L ;
3076
31- /** */
32- private final int dcsCount ;
77+ /** Number of data centers. */
78+ private final int dcsNum ;
3379
34- /** */
80+ /** Number of copies of each partition, including primary. */
3581 private final int primaryAndBackups ;
3682
37- /** */
83+ /** Map is used to optimize the time it takes to perform a partition assignment procedure. */
3884 private final Map <String , Integer > partsDistrMap ;
3985
4086 /**
41- * @param dcsCount
42- * @param backups
87+ * @param dcsNum Number of data centers.
88+ * @param backups Number of backups.
4389 */
44- public MdcAffinityBackupFilter (int dcsCount , int backups ) {
45- this .dcsCount = dcsCount ;
46- partsDistrMap = new HashMap <>(dcsCount + 1 );
90+ public MdcAffinityBackupFilter (int dcsNum , int backups ) {
91+ this .dcsNum = dcsNum ;
92+ partsDistrMap = new HashMap <>(dcsNum + 1 );
4793 primaryAndBackups = backups + 1 ;
4894 }
4995
50- /** {@inheritDoc} */
51- @ Override public boolean apply (ClusterNode node , List <ClusterNode > list ) {
52- if (list .size () == 1 ) { //list contains only primary node, thus we started new assignment round.
96+ /**
97+ * Defines a predicate which returns {@code true} if a node is acceptable for a backup
98+ * or {@code false} otherwise.
99+ * An acceptable node is the one that belongs to a data center that has some additional copies of partition to assign to.
100+ * @param candidate A node that is a candidate for becoming a backup node for a partition.
101+ * @param previouslySelected A list of primary/backup nodes already chosen for a partition.
102+ * The primary is first.
103+ */
104+ @ Override public boolean apply (ClusterNode candidate , List <ClusterNode > previouslySelected ) {
105+ if (previouslySelected .size () == 1 ) { //list contains only primary node, thus we started new assignment round.
53106 partsDistrMap .replaceAll ((e , v ) -> -1 );
54107
55- partsDistrMap .put (list .get (0 ).dataCenterId (), 1 );
108+ partsDistrMap .put (previouslySelected .get (0 ).dataCenterId (), 1 );
56109 }
57110
58- String candidateDcId = node .dataCenterId ();
111+ String candidateDcId = candidate .dataCenterId ();
59112 Integer candDcPartsCopies = partsDistrMap .get (candidateDcId );
60113 boolean res = false ;
61114
@@ -65,7 +118,7 @@ public MdcAffinityBackupFilter(int dcsCount, int backups) {
65118 res = true ;
66119 }
67120 else {
68- int partCopiesPerDc = primaryAndBackups / dcsCount ;
121+ int partCopiesPerDc = primaryAndBackups / dcsNum ;
69122
70123 if (candDcPartsCopies < partCopiesPerDc ) {
71124 partsDistrMap .put (candidateDcId , candDcPartsCopies + 1 );
0 commit comments