@@ -125,6 +125,22 @@ public long vertexLabelToId(String label) {
125125 }
126126 }
127127
128+ public boolean isArc (long fromId , long toId ) {
129+ final LazyIntIterator succors = graph .successors ((int ) fromId );
130+ for (int s ; (s = succors .nextInt ()) != -1 ;) {
131+ if (s == toId ) {
132+ return true ;
133+ } else if (s > toId ) {
134+ break ;
135+ }
136+ }
137+ return false ;
138+ }
139+
140+ public boolean isArc (String from , String to ) {
141+ return isArc (vertexLabelToId (from ), vertexLabelToId (to ));
142+ }
143+
128144 public int outdegree (long vertexId ) {
129145 return graph .outdegree ((int ) vertexId );
130146 }
@@ -185,6 +201,10 @@ public IntStream successorIntStream(ImmutableGraph graph, long vertexId, Interva
185201 return Arrays .stream (graph .successorArray ((int ) vertexId )).filter (x -> (interval .compareTo (x ) == 0 ));
186202 }
187203
204+ public long [] successorIntersect (long vertexId , long [] vertexIds ) {
205+ return intersect (vertexIds , successors (vertexId ));
206+ }
207+
188208 public Stream <String > successorTopLevelDomainStream (ImmutableGraph graph , long vertexId ) {
189209 return Arrays .stream (graph .successorArray ((int ) vertexId )).mapToObj (i -> getTopLevelDomain (vertexIdToLabel (i )));
190210 }
@@ -291,6 +311,10 @@ public IntStream predecessorIntStream(String vertexLabel, String prefix) {
291311 return successorIntStream (graphT , vertexLabelToId (vertexLabel ), vertexMap .getInterval (prefix ));
292312 }
293313
314+ public long [] predecessorIntersect (long vertexId , long [] vertexIds ) {
315+ return intersect (vertexIds , predecessors (vertexId ));
316+ }
317+
294318 public Stream <Entry <String , Long >> predecessorTopLevelDomainCounts (String vertexLabel ) {
295319 return successorTopLevelDomainCounts (graphT , vertexLabelToId (vertexLabel ));
296320 }
@@ -360,6 +384,67 @@ public static String getTopLevelDomain(String reversedDomainName) {
360384 return reversedDomainName ;
361385 }
362386
387+
388+ /** Intersection of two sorted lists */
389+ public static long [] intersect (long [] a , long [] b ) {
390+ int m = a .length ;
391+ int n = b .length ;
392+ LongArrayList res = new LongArrayList (Integer .min (m , n ));
393+ int i = 0 , j = 0 ;
394+ while (i < m && j < n ) {
395+ if (a [i ] < b [j ]) {
396+ i ++;
397+ } else if (a [i ] > b [j ]) {
398+ j ++;
399+ } else {
400+ res .add (a [i ]);
401+ i ++;
402+ j ++;
403+ }
404+ }
405+ return res .toArray (new long [0 ]);
406+ }
407+
408+ /** Intersection of two sorted lists */
409+ public static long [] intersect (long [] a , int [] b ) {
410+ int m = a .length ;
411+ int n = b .length ;
412+ LongArrayList res = new LongArrayList (Integer .min (m , n ));
413+ int i = 0 , j = 0 ;
414+ while (i < m && j < n ) {
415+ if (a [i ] < b [j ]) {
416+ i ++;
417+ } else if (a [i ] > b [j ]) {
418+ j ++;
419+ } else {
420+ res .add (a [i ]);
421+ i ++;
422+ j ++;
423+ }
424+ }
425+ return res .toArray (new long [0 ]);
426+ }
427+
428+ /** Difference of two sorted lists: a \ b */
429+ public static long [] difference (long [] a , long [] b ) {
430+ int m = a .length ;
431+ int n = b .length ;
432+ LongArrayList res = new LongArrayList (Integer .max (m , n ));
433+ int i = 0 , j = 0 ;
434+ while (i < m ) {
435+ if (j >= n || a [i ] < b [j ]) {
436+ res .add (a [i ]);
437+ i ++;
438+ } else if (a [i ] > b [j ]) {
439+ j ++;
440+ } else {
441+ i ++;
442+ j ++;
443+ }
444+ }
445+ return res .toArray (new long [0 ]);
446+ }
447+
363448 /**
364449 * Get the registered domain for a host name based on the ICANN section of the
365450 * <a href="https://www.publicsuffix.org/">public suffix list</a>.
@@ -414,4 +499,31 @@ public static String getRegisteredDomainReversed(String reversedHostName, boolea
414499 public static String reverseDomainName (String domainName ) {
415500 return HostToDomainGraph .reverseHost (domainName );
416501 }
502+
503+ public void subgraphMetrics (long [] nodes ) {
504+ long totalInlinks = 0 , totalOutlinks = 0 , arcsInCluster = 0 , clusterInlinks = 0 , clusterOutlinks = 0 ;
505+ for (long i : nodes ) {
506+ int nInlinks = indegree (i );
507+ totalInlinks += nInlinks ;
508+ int nOutlinks = outdegree (i );
509+ totalOutlinks += nOutlinks ;
510+ int inClusterInlinks = predecessorIntersect (i , nodes ).length ;
511+ arcsInCluster += inClusterInlinks ;
512+ clusterInlinks += nInlinks - inClusterInlinks ;
513+ int inClusterOutlinks = successorIntersect (i , nodes ).length ;
514+ // Note: we do only count in-cluster inlinks (but not outlinks)
515+ // as in-cluster arcs. Otherwise we would count arcs twice.
516+ clusterOutlinks += nOutlinks - inClusterOutlinks ;
517+ }
518+ LOG .info ("Subgraph metrics:" );
519+ LOG .info ("\t nodes = {}" , nodes .length );
520+ LOG .info ("\t arcs = {} (counting only arcs connecting subgraph nodes)" , arcsInCluster );
521+ LOG .info ("\t avgdegree = {} (average degree in subgraph)" , (double ) arcsInCluster / nodes .length );
522+ LOG .info ("\t inlinks = {} (links from the outer graph into the subgraph)" , clusterInlinks );
523+ LOG .info ("\t outlinks = {} (links from the subgraph to outer nodes)" , clusterOutlinks );
524+ LOG .info ("\t total inlinks = {} (all inlinks)" , totalInlinks );
525+ LOG .info ("\t total outlinks = {} (all outlinks)" , totalOutlinks );
526+ LOG .info ("\t nodes linked = {} (outer nodes linked from subgraph)" , sharedSuccessors (nodes , 1 , nodes .length ).length );
527+ LOG .info ("\t nodes linking = {} (outer nodes linking to subgraph)" , sharedPredecessors (nodes , 1 , nodes .length ).length );
528+ }
417529}
0 commit comments