diff --git a/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/blockmodel/StructurallyEquivalent.java b/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/blockmodel/StructurallyEquivalent.java
index cbf06926..a05a258b 100644
--- a/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/blockmodel/StructurallyEquivalent.java
+++ b/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/blockmodel/StructurallyEquivalent.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004, The JUNG Authors
+ * Copyright (c) 2004, The JUNG Authors
*
* All rights reserved.
* Created on Jan 28, 2004
@@ -10,6 +10,9 @@
*/
package edu.uci.ics.jung.algorithms.blockmodel;
+import com.google.common.base.Function;
+import edu.uci.ics.jung.graph.Graph;
+import edu.uci.ics.jung.graph.util.Pair;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -20,155 +23,143 @@
import java.util.Map;
import java.util.Set;
-import com.google.common.base.Function;
-
-import edu.uci.ics.jung.graph.Graph;
-import edu.uci.ics.jung.graph.util.Pair;
-
/**
- * Identifies sets of structurally equivalent vertices in a graph. Vertices
- * i and j are structurally equivalent iff the set of i's
- * neighbors is identical to the set of j's neighbors, with the
- * exception of i and j themselves. This algorithm finds all
- * sets of equivalent vertices in O(V^2) time.
- *
- *
You can extend this class to have a different definition of equivalence (by
- * overriding isStructurallyEquivalent
), and may give it hints for
- * accelerating the process by overriding canPossiblyCompare
.
- * (For example, in a bipartite graph, canPossiblyCompare
may
- * return false
for vertices in
- * different partitions. This function should be fast.)
- *
+ * Identifies sets of structurally equivalent vertices in a graph. Vertices i and j
+ * are structurally equivalent iff the set of i's neighbors is identical to the set of
+ * j's neighbors, with the exception of i and j themselves. This algorithm
+ * finds all sets of equivalent vertices in O(V^2) time.
+ *
+ *
You can extend this class to have a different definition of equivalence (by overriding
+ * isStructurallyEquivalent
), and may give it hints for accelerating the process by
+ * overriding canPossiblyCompare
. (For example, in a bipartite graph,
+ * canPossiblyCompare
may return false
for vertices in different partitions.
+ * This function should be fast.)
+ *
* @author Danyel Fisher
*/
-public class StructurallyEquivalent implements Function, VertexPartition>
-{
- public VertexPartition apply(Graph g)
- {
- Set> vertex_pairs = getEquivalentPairs(g);
-
- Set> rv = new HashSet>();
- Map> intermediate = new HashMap>();
- for (Pair p : vertex_pairs)
- {
- Set res = intermediate.get(p.getFirst());
- if (res == null)
- res = intermediate.get(p.getSecond());
- if (res == null) // we haven't seen this one before
- res = new HashSet();
- res.add(p.getFirst());
- res.add(p.getSecond());
- intermediate.put(p.getFirst(), res);
- intermediate.put(p.getSecond(), res);
+public class StructurallyEquivalent implements Function, VertexPartition> {
+ public VertexPartition apply(Graph g) {
+ Set> vertex_pairs = getEquivalentPairs(g);
+
+ Set> rv = new HashSet>();
+ Map> intermediate = new HashMap>();
+ for (Pair p : vertex_pairs) {
+ Set res = intermediate.get(p.getFirst());
+ if (res == null) res = intermediate.get(p.getSecond());
+ if (res == null) // we haven't seen this one before
+ res = new HashSet();
+ res.add(p.getFirst());
+ res.add(p.getSecond());
+ intermediate.put(p.getFirst(), res);
+ intermediate.put(p.getSecond(), res);
+ }
+ rv.addAll(intermediate.values());
+
+ // pick up the vertices which don't appear in intermediate; they are
+ // singletons (equivalence classes of size 1)
+ Collection singletons = new ArrayList(g.getVertices());
+ singletons.removeAll(intermediate.keySet());
+ for (V v : singletons) {
+ Set v_set = Collections.singleton(v);
+ intermediate.put(v, v_set);
+ rv.add(v_set);
+ }
+
+ return new VertexPartition(g, intermediate, rv);
+ }
+
+ /**
+ * For each vertex pair v, v1 in G, checks whether v and v1 are fully equivalent: meaning that
+ * they connect to the exact same vertices. (Is this regular equivalence, or whathaveyou?)
+ *
+ * @param g the graph whose equivalent pairs are to be generated
+ * @return a Set of Pairs of vertices, where all the vertices in the inner Pairs are equivalent.
+ */
+ protected Set> getEquivalentPairs(Graph g) {
+
+ Set> rv = new HashSet>();
+ Set alreadyEquivalent = new HashSet();
+
+ List l = new ArrayList(g.getVertices());
+
+ for (V v1 : l) {
+ if (alreadyEquivalent.contains(v1)) {
+ continue;
+ }
+
+ for (Iterator iterator = l.listIterator(l.indexOf(v1) + 1); iterator.hasNext(); ) {
+ V v2 = iterator.next();
+
+ if (alreadyEquivalent.contains(v2)) {
+ continue;
}
- rv.addAll(intermediate.values());
-
- // pick up the vertices which don't appear in intermediate; they are
- // singletons (equivalence classes of size 1)
- Collection singletons = new ArrayList(g.getVertices());
- singletons.removeAll(intermediate.keySet());
- for (V v : singletons)
- {
- Set v_set = Collections.singleton(v);
- intermediate.put(v, v_set);
- rv.add(v_set);
+
+ if (!canBeEquivalent(v1, v2)) {
+ continue;
}
- return new VertexPartition(g, intermediate, rv);
- }
-
- /**
- * For each vertex pair v, v1 in G, checks whether v and v1 are fully
- * equivalent: meaning that they connect to the exact same vertices. (Is
- * this regular equivalence, or whathaveyou?)
- *
- * @param g the graph whose equivalent pairs are to be generated
- * @return a Set of Pairs of vertices, where all the vertices in the inner
- * Pairs are equivalent.
- */
- protected Set> getEquivalentPairs(Graph g) {
-
- Set> rv = new HashSet>();
- Set alreadyEquivalent = new HashSet();
-
- List l = new ArrayList(g.getVertices());
-
- for (V v1 : l)
- {
- if (alreadyEquivalent.contains(v1))
- continue;
-
- for (Iterator iterator = l.listIterator(l.indexOf(v1) + 1); iterator.hasNext();) {
- V v2 = iterator.next();
-
- if (alreadyEquivalent.contains(v2))
- continue;
-
- if (!canBeEquivalent(v1, v2))
- continue;
-
- if (isStructurallyEquivalent(g, v1, v2)) {
- Pair p = new Pair(v1, v2);
- alreadyEquivalent.add(v2);
- rv.add(p);
- }
- }
- }
-
- return rv;
- }
-
- /**
- * @param g the graph in which the structural equivalence comparison is to take place
- * @param v1 the vertex to check for structural equivalence to v2
- * @param v2 the vertex to check for structural equivalence to v1
- * @return {@code true} if {@code v1}'s predecessors/successors are equal to
- * {@code v2}'s predecessors/successors
- */
- protected boolean isStructurallyEquivalent(Graph g, V v1, V v2) {
-
- if( g.degree(v1) != g.degree(v2)) {
- return false;
- }
-
- Set n1 = new HashSet(g.getPredecessors(v1));
- n1.remove(v2);
- n1.remove(v1);
- Set n2 = new HashSet(g.getPredecessors(v2));
- n2.remove(v1);
- n2.remove(v2);
-
- Set o1 = new HashSet(g.getSuccessors(v1));
- Set o2 = new HashSet(g.getSuccessors(v2));
- o1.remove(v1);
- o1.remove(v2);
- o2.remove(v1);
- o2.remove(v2);
-
- // this neglects self-loops and directed edges from 1 to other
- boolean b = (n1.equals(n2) && o1.equals(o2));
- if (!b)
- return b;
-
- // if there's a directed edge v1->v2 then there's a directed edge v2->v1
- b &= ( g.isSuccessor(v1, v2) == g.isSuccessor(v2, v1));
-
- // self-loop check
- b &= ( g.isSuccessor(v1, v1) == g.isSuccessor(v2, v2));
-
- return b;
-
- }
-
- /**
- * This is a space for optimizations. For example, for a bipartite graph,
- * vertices from different partitions cannot possibly be equivalent.
- *
- * @param v1 the first vertex to compare
- * @param v2 the second vertex to compare
- * @return {@code true} if the vertices can be equivalent
- */
- protected boolean canBeEquivalent(V v1, V v2) {
- return true;
- }
+ if (isStructurallyEquivalent(g, v1, v2)) {
+ Pair p = new Pair(v1, v2);
+ alreadyEquivalent.add(v2);
+ rv.add(p);
+ }
+ }
+ }
+
+ return rv;
+ }
+
+ /**
+ * @param g the graph in which the structural equivalence comparison is to take place
+ * @param v1 the vertex to check for structural equivalence to v2
+ * @param v2 the vertex to check for structural equivalence to v1
+ * @return {@code true} if {@code v1}'s predecessors/successors are equal to {@code v2}'s
+ * predecessors/successors
+ */
+ protected boolean isStructurallyEquivalent(Graph g, V v1, V v2) {
+
+ if (g.degree(v1) != g.degree(v2)) {
+ return false;
+ }
+
+ Set n1 = new HashSet(g.getPredecessors(v1));
+ n1.remove(v2);
+ n1.remove(v1);
+ Set n2 = new HashSet(g.getPredecessors(v2));
+ n2.remove(v1);
+ n2.remove(v2);
+
+ Set o1 = new HashSet(g.getSuccessors(v1));
+ Set o2 = new HashSet(g.getSuccessors(v2));
+ o1.remove(v1);
+ o1.remove(v2);
+ o2.remove(v1);
+ o2.remove(v2);
+
+ // this neglects self-loops and directed edges from 1 to other
+ boolean b = (n1.equals(n2) && o1.equals(o2));
+ if (!b) {
+ return b;
+ }
+
+ // if there's a directed edge v1->v2 then there's a directed edge v2->v1
+ b &= (g.isSuccessor(v1, v2) == g.isSuccessor(v2, v1));
+
+ // self-loop check
+ b &= (g.isSuccessor(v1, v1) == g.isSuccessor(v2, v2));
+
+ return b;
+ }
+
+ /**
+ * This is a space for optimizations. For example, for a bipartite graph, vertices from different
+ * partitions cannot possibly be equivalent.
+ *
+ * @param v1 the first vertex to compare
+ * @param v2 the second vertex to compare
+ * @return {@code true} if the vertices can be equivalent
+ */
+ protected boolean canBeEquivalent(V v1, V v2) {
+ return true;
+ }
}
diff --git a/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/blockmodel/VertexPartition.java b/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/blockmodel/VertexPartition.java
index dea478b2..5a8f2f01 100644
--- a/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/blockmodel/VertexPartition.java
+++ b/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/blockmodel/VertexPartition.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, The JUNG Authors
+ * Copyright (c) 2003, The JUNG Authors
*
* All rights reserved.
*
@@ -12,120 +12,106 @@
*/
package edu.uci.ics.jung.algorithms.blockmodel;
-import java.util.*;
-
import edu.uci.ics.jung.graph.Graph;
-
+import java.util.*;
/**
- * Maintains information about a vertex partition of a graph.
- * This can be built from a map from vertices to vertex sets
- * or from a collection of (disjoint) vertex sets,
- * such as those created by various clustering methods.
+ * Maintains information about a vertex partition of a graph. This can be built from a map from
+ * vertices to vertex sets or from a collection of (disjoint) vertex sets, such as those created by
+ * various clustering methods.
*/
-public class VertexPartition
-{
- private Map> vertex_partition_map;
- private Collection> vertex_sets;
- private Graph graph;
-
- /**
- * Creates an instance based on the specified graph and mapping from vertices
- * to vertex sets, and generates a set of partitions based on this mapping.
- * @param g the graph over which the vertex partition is defined
- * @param partition_map the mapping from vertices to vertex sets (partitions)
- */
- public VertexPartition(Graph g, Map> partition_map)
- {
- this.vertex_partition_map = Collections.unmodifiableMap(partition_map);
- this.graph = g;
- }
+public class VertexPartition {
+ private Map> vertex_partition_map;
+ private Collection> vertex_sets;
+ private Graph graph;
+
+ /**
+ * Creates an instance based on the specified graph and mapping from vertices to vertex sets, and
+ * generates a set of partitions based on this mapping.
+ *
+ * @param g the graph over which the vertex partition is defined
+ * @param partition_map the mapping from vertices to vertex sets (partitions)
+ */
+ public VertexPartition(Graph g, Map> partition_map) {
+ this.vertex_partition_map = Collections.unmodifiableMap(partition_map);
+ this.graph = g;
+ }
+
+ /**
+ * Creates an instance based on the specified graph, vertex-set mapping, and set of disjoint
+ * vertex sets. The vertex-set mapping and vertex partitions must be consistent; that is, the
+ * mapping must reflect the division of vertices into partitions, and each vertex must appear in
+ * exactly one partition.
+ *
+ * @param g the graph over which the vertex partition is defined
+ * @param partition_map the mapping from vertices to vertex sets (partitions)
+ * @param vertex_sets the set of disjoint vertex sets
+ */
+ public VertexPartition(
+ Graph g, Map> partition_map, Collection> vertex_sets) {
+ this.vertex_partition_map = Collections.unmodifiableMap(partition_map);
+ this.vertex_sets = vertex_sets;
+ this.graph = g;
+ }
+
+ /**
+ * Creates an instance based on the specified graph and set of disjoint vertex sets, and generates
+ * a vertex-to-partition map based on these sets.
+ *
+ * @param g the graph over which the vertex partition is defined
+ * @param vertex_sets the set of disjoint vertex sets
+ */
+ public VertexPartition(Graph g, Collection> vertex_sets) {
+ this.vertex_sets = vertex_sets;
+ this.graph = g;
+ }
+
+ /**
+ * Returns the graph on which the partition is defined.
+ *
+ * @return the graph on which the partition is defined
+ */
+ public Graph getGraph() {
+ return graph;
+ }
- /**
- * Creates an instance based on the specified graph, vertex-set mapping,
- * and set of disjoint vertex sets. The vertex-set mapping and vertex
- * partitions must be consistent; that is, the mapping must reflect the
- * division of vertices into partitions, and each vertex must appear in
- * exactly one partition.
- * @param g the graph over which the vertex partition is defined
- * @param partition_map the mapping from vertices to vertex sets (partitions)
- * @param vertex_sets the set of disjoint vertex sets
- */
- public VertexPartition(Graph g, Map> partition_map,
- Collection> vertex_sets)
- {
- this.vertex_partition_map = Collections.unmodifiableMap(partition_map);
- this.vertex_sets = vertex_sets;
- this.graph = g;
+ /**
+ * Returns a map from each vertex in the input graph to its partition. This map is generated if it
+ * does not already exist.
+ *
+ * @return a map from each vertex in the input graph to a vertex set
+ */
+ public Map> getVertexToPartitionMap() {
+ if (vertex_partition_map == null) {
+ this.vertex_partition_map = new HashMap>();
+ for (Set set : this.vertex_sets) for (V v : set) this.vertex_partition_map.put(v, set);
}
+ return vertex_partition_map;
+ }
- /**
- * Creates an instance based on the specified graph and set of disjoint vertex sets,
- * and generates a vertex-to-partition map based on these sets.
- * @param g the graph over which the vertex partition is defined
- * @param vertex_sets the set of disjoint vertex sets
- */
- public VertexPartition(Graph g, Collection> vertex_sets)
- {
- this.vertex_sets = vertex_sets;
- this.graph = g;
+ /**
+ * Returns a collection of vertex sets, where each vertex in the input graph is in exactly one
+ * set. This collection is generated based on the vertex-to-partition map if it does not already
+ * exist.
+ *
+ * @return a collection of vertex sets such that each vertex in the instance's graph is in exactly
+ * one set
+ */
+ public Collection> getVertexPartitions() {
+ if (vertex_sets == null) {
+ this.vertex_sets = new HashSet>();
+ this.vertex_sets.addAll(vertex_partition_map.values());
}
-
- /**
- * Returns the graph on which the partition is defined.
- * @return the graph on which the partition is defined
- */
- public Graph getGraph()
- {
- return graph;
- }
+ return vertex_sets;
+ }
- /**
- * Returns a map from each vertex in the input graph to its partition.
- * This map is generated if it does not already exist.
- * @return a map from each vertex in the input graph to a vertex set
- */
- public Map> getVertexToPartitionMap()
- {
- if (vertex_partition_map == null)
- {
- this.vertex_partition_map = new HashMap>();
- for (Set set : this.vertex_sets)
- for (V v : set)
- this.vertex_partition_map.put(v, set);
- }
- return vertex_partition_map;
- }
-
- /**
- * Returns a collection of vertex sets, where each vertex in the
- * input graph is in exactly one set.
- * This collection is generated based on the vertex-to-partition map
- * if it does not already exist.
- * @return a collection of vertex sets such that each vertex in the
- * instance's graph is in exactly one set
- */
- public Collection> getVertexPartitions()
- {
- if (vertex_sets == null)
- {
- this.vertex_sets = new HashSet>();
- this.vertex_sets.addAll(vertex_partition_map.values());
- }
- return vertex_sets;
- }
+ /** @return the number of partitions. */
+ public int numPartitions() {
+ return vertex_sets.size();
+ }
- /**
- * @return the number of partitions.
- */
- public int numPartitions()
- {
- return vertex_sets.size();
- }
-
- @Override
- public String toString()
- {
- return "Partitions: " + vertex_partition_map;
- }
+ @Override
+ public String toString() {
+ return "Partitions: " + vertex_partition_map;
+ }
}
diff --git a/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/cluster/BicomponentClusterer.java b/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/cluster/BicomponentClusterer.java
index 359f1005..baf64578 100644
--- a/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/cluster/BicomponentClusterer.java
+++ b/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/cluster/BicomponentClusterer.java
@@ -1,14 +1,16 @@
/*
-* Copyright (c) 2003, The JUNG Authors
-*
-* All rights reserved.
-*
-* This software is open-source under the BSD license; see either
-* "license.txt" or
-* https://github.com/jrtom/jung/blob/master/LICENSE for a description.
-*/
+ * Copyright (c) 2003, The JUNG Authors
+ *
+ * All rights reserved.
+ *
+ * This software is open-source under the BSD license; see either
+ * "license.txt" or
+ * https://github.com/jrtom/jung/blob/master/LICENSE for a description.
+ */
package edu.uci.ics.jung.algorithms.cluster;
+import com.google.common.base.Function;
+import edu.uci.ics.jung.graph.UndirectedGraph;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
@@ -16,150 +18,127 @@
import java.util.Set;
import java.util.Stack;
-import com.google.common.base.Function;
-
-import edu.uci.ics.jung.graph.UndirectedGraph;
-
/**
- * Finds all biconnected components (bicomponents) of an undirected graph.
- * A graph is a biconnected component if
- * at least 2 vertices must be removed in order to disconnect the graph. (Graphs
- * consisting of one vertex, or of two connected vertices, are also biconnected.) Biconnected
- * components of three or more vertices have the property that every pair of vertices in the component
- * are connected by two or more vertex-disjoint paths.
- *
- * Running time: O(|V| + |E|) where |V| is the number of vertices and |E| is the number of edges
+ * Finds all biconnected components (bicomponents) of an undirected graph. A graph is a biconnected
+ * component if at least 2 vertices must be removed in order to disconnect the graph. (Graphs
+ * consisting of one vertex, or of two connected vertices, are also biconnected.) Biconnected
+ * components of three or more vertices have the property that every pair of vertices in the
+ * component are connected by two or more vertex-disjoint paths.
+ *
+ *
Running time: O(|V| + |E|) where |V| is the number of vertices and |E| is the number of edges
+ *
* @see "Depth first search and linear graph algorithms by R. E. Tarjan (1972), SIAM J. Comp."
- *
* @author Joshua O'Madadhain
*/
-public class BicomponentClusterer implements Function, Set>>
-{
- protected Map dfs_num;
- protected Map high;
- protected Map parents;
- protected Stack stack;
- protected int converse_depth;
+public class BicomponentClusterer implements Function, Set>> {
+ protected Map dfs_num;
+ protected Map high;
+ protected Map parents;
+ protected Stack stack;
+ protected int converse_depth;
- /**
- * Constructs a new bicomponent finder
- */
- public BicomponentClusterer() {
- }
+ /** Constructs a new bicomponent finder */
+ public BicomponentClusterer() {}
- /**
- * Extracts the bicomponents from the graph.
- * @param theGraph the graph whose bicomponents are to be extracted
- * @return the ClusterSet
of bicomponents
- */
- public Set> apply(UndirectedGraph theGraph)
- {
- Set> bicomponents = new LinkedHashSet>();
+ /**
+ * Extracts the bicomponents from the graph.
+ *
+ * @param theGraph the graph whose bicomponents are to be extracted
+ * @return the ClusterSet
of bicomponents
+ */
+ public Set> apply(UndirectedGraph theGraph) {
+ Set> bicomponents = new LinkedHashSet>();
- if (theGraph.getVertices().isEmpty())
- return bicomponents;
+ if (theGraph.getVertices().isEmpty()) {
+ return bicomponents;
+ }
- // initialize DFS number for each vertex to 0
- dfs_num = new HashMap();
- for (V v : theGraph.getVertices())
- {
- dfs_num.put(v, 0);
- }
+ // initialize DFS number for each vertex to 0
+ dfs_num = new HashMap();
+ for (V v : theGraph.getVertices()) {
+ dfs_num.put(v, 0);
+ }
+
+ for (V v : theGraph.getVertices()) {
+ if (dfs_num.get(v).intValue() == 0) // if we haven't hit this vertex yet...
+ {
+ high = new HashMap();
+ stack = new Stack();
+ parents = new HashMap();
+ converse_depth = theGraph.getVertexCount();
+ // find the biconnected components for this subgraph, starting from v
+ findBiconnectedComponents(theGraph, v, bicomponents);
- for (V v : theGraph.getVertices())
- {
- if (dfs_num.get(v).intValue() == 0) // if we haven't hit this vertex yet...
- {
- high = new HashMap();
- stack = new Stack();
- parents = new HashMap();
- converse_depth = theGraph.getVertexCount();
- // find the biconnected components for this subgraph, starting from v
- findBiconnectedComponents(theGraph, v, bicomponents);
-
- // if we only visited one vertex, this method won't have
- // ID'd it as a biconnected component, so mark it as one
- if (theGraph.getVertexCount() - converse_depth == 1)
- {
- Set s = new HashSet();
- s.add(v);
- bicomponents.add(s);
- }
- }
+ // if we only visited one vertex, this method won't have
+ // ID'd it as a biconnected component, so mark it as one
+ if (theGraph.getVertexCount() - converse_depth == 1) {
+ Set s = new HashSet();
+ s.add(v);
+ bicomponents.add(s);
}
-
- return bicomponents;
+ }
}
- /**
- * Stores, in bicomponents
, all the biconnected
- * components that are reachable from v
.
- *
- *
The algorithm basically proceeds as follows: do a depth-first
- * traversal starting from v
, marking each vertex with
- * a value that indicates the order in which it was encountered (dfs_num),
- * and with
- * a value that indicates the highest point in the DFS tree that is known
- * to be reachable from this vertex using non-DFS edges (high). (Since it
- * is measured on non-DFS edges, "high" tells you how far back in the DFS
- * tree you can reach by two distinct paths, hence biconnectivity.)
- * Each time a new vertex w is encountered, push the edge just traversed
- * on a stack, and call this method recursively. If w.high is no greater than
- * v.dfs_num, then the contents of the stack down to (v,w) is a
- * biconnected component (and v is an articulation point, that is, a
- * component boundary). In either case, set v.high to max(v.high, w.high),
- * and continue. If w has already been encountered but is
- * not v's parent, set v.high max(v.high, w.dfs_num) and continue.
- *
- *
(In case anyone cares, the version of this algorithm on p. 224 of
- * Udi Manber's "Introduction to Algorithms: A Creative Approach" seems to be
- * wrong: the stack should be initialized outside this method,
- * (v,w) should only be put on the stack if w hasn't been seen already,
- * and there's no real benefit to putting v on the stack separately: just
- * check for (v,w) on the stack rather than v. Had I known this, I could
- * have saved myself a few days. JRTOM)
- *
- * @param g the graph to check for biconnected components
- * @param v the starting place for searching for biconnected components
- * @param bicomponents storage for the biconnected components found by this algorithm
- */
- protected void findBiconnectedComponents(UndirectedGraph g, V v, Set> bicomponents)
- {
- int v_dfs_num = converse_depth;
- dfs_num.put(v, v_dfs_num);
- converse_depth--;
- high.put(v, v_dfs_num);
+ return bicomponents;
+ }
+
+ /**
+ * Stores, in bicomponents
, all the biconnected components that are reachable from
+ * v
.
+ *
+ * The algorithm basically proceeds as follows: do a depth-first traversal starting from
+ * v
, marking each vertex with a value that indicates the order in which it was encountered
+ * (dfs_num), and with a value that indicates the highest point in the DFS tree that is known to
+ * be reachable from this vertex using non-DFS edges (high). (Since it is measured on non-DFS
+ * edges, "high" tells you how far back in the DFS tree you can reach by two distinct paths, hence
+ * biconnectivity.) Each time a new vertex w is encountered, push the edge just traversed on a
+ * stack, and call this method recursively. If w.high is no greater than v.dfs_num, then the
+ * contents of the stack down to (v,w) is a biconnected component (and v is an articulation point,
+ * that is, a component boundary). In either case, set v.high to max(v.high, w.high), and
+ * continue. If w has already been encountered but is not v's parent, set v.high max(v.high,
+ * w.dfs_num) and continue.
+ *
+ *
(In case anyone cares, the version of this algorithm on p. 224 of Udi Manber's "Introduction
+ * to Algorithms: A Creative Approach" seems to be wrong: the stack should be initialized outside
+ * this method, (v,w) should only be put on the stack if w hasn't been seen already, and there's
+ * no real benefit to putting v on the stack separately: just check for (v,w) on the stack rather
+ * than v. Had I known this, I could have saved myself a few days. JRTOM)
+ *
+ * @param g the graph to check for biconnected components
+ * @param v the starting place for searching for biconnected components
+ * @param bicomponents storage for the biconnected components found by this algorithm
+ */
+ protected void findBiconnectedComponents(UndirectedGraph g, V v, Set> bicomponents) {
+ int v_dfs_num = converse_depth;
+ dfs_num.put(v, v_dfs_num);
+ converse_depth--;
+ high.put(v, v_dfs_num);
- for (V w : g.getNeighbors(v))
- {
- int w_dfs_num = dfs_num.get(w).intValue();//get(w, dfs_num);
- E vw = g.findEdge(v,w);
- if (w_dfs_num == 0) // w hasn't yet been visited
- {
- parents.put(w, v); // v is w's parent in the DFS tree
- stack.push(vw);
- findBiconnectedComponents(g, w, bicomponents);
- int w_high = high.get(w).intValue();//get(w, high);
- if (w_high <= v_dfs_num)
- {
- // v disconnects w from the rest of the graph,
- // i.e., v is an articulation point
- // thus, everything between the top of the stack and
- // v is part of a single biconnected component
- Set bicomponent = new HashSet();
- E e;
- do
- {
- e = stack.pop();
- bicomponent.addAll(g.getIncidentVertices(e));
- }
- while (e != vw);
- bicomponents.add(bicomponent);
- }
- high.put(v, Math.max(w_high, high.get(v).intValue()));
- }
- else if (w != parents.get(v)) // (v,w) is a back or a forward edge
- high.put(v, Math.max(w_dfs_num, high.get(v).intValue()));
+ for (V w : g.getNeighbors(v)) {
+ int w_dfs_num = dfs_num.get(w).intValue(); //get(w, dfs_num);
+ E vw = g.findEdge(v, w);
+ if (w_dfs_num == 0) // w hasn't yet been visited
+ {
+ parents.put(w, v); // v is w's parent in the DFS tree
+ stack.push(vw);
+ findBiconnectedComponents(g, w, bicomponents);
+ int w_high = high.get(w).intValue(); //get(w, high);
+ if (w_high <= v_dfs_num) {
+ // v disconnects w from the rest of the graph,
+ // i.e., v is an articulation point
+ // thus, everything between the top of the stack and
+ // v is part of a single biconnected component
+ Set bicomponent = new HashSet();
+ E e;
+ do {
+ e = stack.pop();
+ bicomponent.addAll(g.getIncidentVertices(e));
+ } while (e != vw);
+ bicomponents.add(bicomponent);
}
+ high.put(v, Math.max(w_high, high.get(v).intValue()));
+ } else if (w != parents.get(v)) // (v,w) is a back or a forward edge
+ high.put(v, Math.max(w_dfs_num, high.get(v).intValue()));
}
+ }
}
diff --git a/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/cluster/EdgeBetweennessClusterer.java b/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/cluster/EdgeBetweennessClusterer.java
index 80e30c0d..54f93820 100644
--- a/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/cluster/EdgeBetweennessClusterer.java
+++ b/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/cluster/EdgeBetweennessClusterer.java
@@ -1,109 +1,105 @@
/*
-* Copyright (c) 2003, The JUNG Authors
-*
-* All rights reserved.
-*
-* This software is open-source under the BSD license; see either
-* "license.txt" or
-* https://github.com/jrtom/jung/blob/master/LICENSE for a description.
-*/
+ * Copyright (c) 2003, The JUNG Authors
+ *
+ * All rights reserved.
+ *
+ * This software is open-source under the BSD license; see either
+ * "license.txt" or
+ * https://github.com/jrtom/jung/blob/master/LICENSE for a description.
+ */
package edu.uci.ics.jung.algorithms.cluster;
+import com.google.common.base.Function;
+import edu.uci.ics.jung.algorithms.scoring.BetweennessCentrality;
+import edu.uci.ics.jung.graph.Graph;
+import edu.uci.ics.jung.graph.util.Pair;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
-import com.google.common.base.Function;
-
-import edu.uci.ics.jung.algorithms.scoring.BetweennessCentrality;
-import edu.uci.ics.jung.graph.Graph;
-import edu.uci.ics.jung.graph.util.Pair;
-
-
/**
* An algorithm for computing clusters (community structure) in graphs based on edge betweenness.
- * The betweenness of an edge is defined as the extent to which that edge lies along
- * shortest paths between all pairs of nodes.
+ * The betweenness of an edge is defined as the extent to which that edge lies along shortest paths
+ * between all pairs of nodes.
+ *
+ * This algorithm works by iteratively following the 2 step process:
*
- * This algorithm works by iteratively following the 2 step process:
*
- * - Compute edge betweenness for all edges in current graph
- *
- Remove edge with highest betweenness
+ *
- Compute edge betweenness for all edges in current graph
+ *
- Remove edge with highest betweenness
*
- *
- * Running time is: O(kmn) where k is the number of edges to remove, m is the total number of edges, and
- * n is the total number of vertices. For very sparse graphs the running time is closer to O(kn^2) and for
- * graphs with strong community structure, the complexity is even lower.
- *
- * This algorithm is a slight modification of the algorithm discussed below in that the number of edges
- * to be removed is parameterized.
+ *
+ *
Running time is: O(kmn) where k is the number of edges to remove, m is the total number of
+ * edges, and n is the total number of vertices. For very sparse graphs the running time is closer
+ * to O(kn^2) and for graphs with strong community structure, the complexity is even lower.
+ *
+ *
This algorithm is a slight modification of the algorithm discussed below in that the number of
+ * edges to be removed is parameterized.
+ *
* @author Scott White
* @author Tom Nelson (converted to jung2)
* @see "Community structure in social and biological networks by Michelle Girvan and Mark Newman"
*/
-public class EdgeBetweennessClusterer implements Function,Set>> {
- private int mNumEdgesToRemove;
- private Map> edges_removed;
+public class EdgeBetweennessClusterer implements Function, Set>> {
+ private int mNumEdgesToRemove;
+ private Map> edges_removed;
- /**
- * Constructs a new clusterer for the specified graph.
- * @param numEdgesToRemove the number of edges to be progressively removed from the graph
- */
- public EdgeBetweennessClusterer(int numEdgesToRemove) {
- mNumEdgesToRemove = numEdgesToRemove;
- edges_removed = new LinkedHashMap>();
- }
+ /**
+ * Constructs a new clusterer for the specified graph.
+ *
+ * @param numEdgesToRemove the number of edges to be progressively removed from the graph
+ */
+ public EdgeBetweennessClusterer(int numEdgesToRemove) {
+ mNumEdgesToRemove = numEdgesToRemove;
+ edges_removed = new LinkedHashMap>();
+ }
- /**
- * Finds the set of clusters which have the strongest "community structure".
- * The more edges removed the smaller and more cohesive the clusters.
- * @param graph the graph
- */
- public Set> apply(Graph graph) {
-
- if (mNumEdgesToRemove < 0 || mNumEdgesToRemove > graph.getEdgeCount()) {
- throw new IllegalArgumentException("Invalid number of edges passed in.");
- }
-
- edges_removed.clear();
+ /**
+ * Finds the set of clusters which have the strongest "community structure". The more edges
+ * removed the smaller and more cohesive the clusters.
+ *
+ * @param graph the graph
+ */
+ public Set> apply(Graph graph) {
- for (int k=0;k bc = new BetweennessCentrality(graph);
- E to_remove = null;
- double score = 0;
- for (E e : graph.getEdges())
- if (bc.getEdgeScore(e) > score)
- {
- to_remove = e;
- score = bc.getEdgeScore(e);
- }
- edges_removed.put(to_remove, graph.getEndpoints(to_remove));
- graph.removeEdge(to_remove);
- }
+ if (mNumEdgesToRemove < 0 || mNumEdgesToRemove > graph.getEdgeCount()) {
+ throw new IllegalArgumentException("Invalid number of edges passed in.");
+ }
- WeakComponentClusterer wcSearch = new WeakComponentClusterer();
- Set> clusterSet = wcSearch.apply(graph);
+ edges_removed.clear();
- for (Map.Entry> entry : edges_removed.entrySet())
- {
- Pair endpoints = entry.getValue();
- graph.addEdge(entry.getKey(), endpoints.getFirst(), endpoints.getSecond());
+ for (int k = 0; k < mNumEdgesToRemove; k++) {
+ BetweennessCentrality bc = new BetweennessCentrality(graph);
+ E to_remove = null;
+ double score = 0;
+ for (E e : graph.getEdges())
+ if (bc.getEdgeScore(e) > score) {
+ to_remove = e;
+ score = bc.getEdgeScore(e);
}
- return clusterSet;
+ edges_removed.put(to_remove, graph.getEndpoints(to_remove));
+ graph.removeEdge(to_remove);
}
- /**
- * Retrieves the list of all edges that were removed
- * (assuming extract(...) was previously called).
- * The edges returned
- * are stored in order in which they were removed.
- *
- * @return the edges in the original graph
- */
- public List getEdgesRemoved()
- {
- return new ArrayList(edges_removed.keySet());
+ WeakComponentClusterer wcSearch = new WeakComponentClusterer();
+ Set> clusterSet = wcSearch.apply(graph);
+
+ for (Map.Entry> entry : edges_removed.entrySet()) {
+ Pair endpoints = entry.getValue();
+ graph.addEdge(entry.getKey(), endpoints.getFirst(), endpoints.getSecond());
}
+ return clusterSet;
+ }
+
+ /**
+ * Retrieves the list of all edges that were removed (assuming extract(...) was previously
+ * called). The edges returned are stored in order in which they were removed.
+ *
+ * @return the edges in the original graph
+ */
+ public List getEdgesRemoved() {
+ return new ArrayList(edges_removed.keySet());
+ }
}
diff --git a/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/cluster/VoltageClusterer.java b/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/cluster/VoltageClusterer.java
index cc2abf8b..41b4789b 100644
--- a/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/cluster/VoltageClusterer.java
+++ b/jung-algorithms/src/main/java/edu/uci/ics/jung/algorithms/cluster/VoltageClusterer.java
@@ -16,7 +16,6 @@
import edu.uci.ics.jung.algorithms.util.KMeansClusterer;
import edu.uci.ics.jung.algorithms.util.KMeansClusterer.NotEnoughClustersException;
import edu.uci.ics.jung.graph.Graph;
-
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -31,340 +30,293 @@
import java.util.Set;
/**
- * Clusters vertices of a Graph
based on their ranks as
- * calculated by VoltageScorer
. This algorithm is based on,
- * but not identical with, the method described in the paper below.
- * The primary difference is that Wu and Huberman assume a priori that the clusters
- * are of approximately the same size, and therefore use a more complex
- * method than k-means (which is used here) for determining cluster
- * membership based on co-occurrence data.
+ * Clusters vertices of a Graph
based on their ranks as calculated by
+ * VoltageScorer
. This algorithm is based on, but not identical with, the method described in
+ * the paper below. The primary difference is that Wu and Huberman assume a priori that the clusters
+ * are of approximately the same size, and therefore use a more complex method than k-means (which
+ * is used here) for determining cluster membership based on co-occurrence data.
*
*
The algorithm proceeds as follows:
+ *
*
- * - first, generate a set of candidate clusters as follows:
- *
- * - pick (widely separated) vertex pair, run VoltageScorer
- *
- group the vertices in two clusters according to their voltages
- *
- store resulting candidate clusters
- *
- * - second, generate k-1 clusters as follows:
- *
- * - pick a vertex v as a cluster 'seed'
- *
(Wu/Huberman: most frequent vertex in candidate clusters)
- * - calculate co-occurrence over all candidate clusters of v with each other
- * vertex
- *
- separate co-occurrence counts into high/low;
- * high vertices constitute a cluster
- *
- remove v's vertices from candidate clusters; continue
- *
- * - finally, remaining unassigned vertices are assigned to the kth ("garbage")
- * cluster.
+ *
- first, generate a set of candidate clusters as follows:
+ *
+ * - pick (widely separated) vertex pair, run VoltageScorer
+ *
- group the vertices in two clusters according to their voltages
+ *
- store resulting candidate clusters
+ *
+ * - second, generate k-1 clusters as follows:
+ *
+ * - pick a vertex v as a cluster 'seed'
+ * (Wu/Huberman: most frequent vertex in candidate clusters)
+ * - calculate co-occurrence over all candidate clusters of v with each other vertex
+ *
- separate co-occurrence counts into high/low; high vertices constitute a cluster
+ *
- remove v's vertices from candidate clusters; continue
+ *
+ * - finally, remaining unassigned vertices are assigned to the kth ("garbage") cluster.
*
*
- * NOTE: Depending on how the co-occurrence data splits the data into
- * clusters, the number of clusters returned by this algorithm may be less than the
- * number of clusters requested. The number of clusters will never be more than
- * the number requested, however.
+ *
NOTE: Depending on how the co-occurrence data splits the data into clusters, the number
+ * of clusters returned by this algorithm may be less than the number of clusters requested. The
+ * number of clusters will never be more than the number requested, however.
*
* @author Joshua O'Madadhain
- * @see "'Finding communities in linear time: a physics approach', Fang Wu and Bernardo Huberman, http://www.hpl.hp.com/research/idl/papers/linear/"
+ * @see "'Finding communities in linear time: a physics approach', Fang Wu and Bernardo Huberman,
+ * http://www.hpl.hp.com/research/idl/papers/linear/"
* @see VoltageScorer
* @see KMeansClusterer
*/
-public class VoltageClusterer
-{
- protected int num_candidates;
- protected KMeansClusterer kmc;
- protected Random rand;
- protected Graph g;
-
- /**
- * Creates an instance of a VoltageCluster with the specified parameters.
- * These are mostly parameters that are passed directly to VoltageScorer
- * and KMeansClusterer.
- *
- * @param g the graph whose vertices are to be clustered
- * @param num_candidates the number of candidate clusters to create
- */
- public VoltageClusterer(Graph g, int num_candidates)
- {
- if (num_candidates < 1)
- throw new IllegalArgumentException("must generate >=1 candidates");
-
- this.num_candidates = num_candidates;
- this.kmc = new KMeansClusterer();
- rand = new Random();
- this.g = g;
- }
-
- protected void setRandomSeed(int random_seed)
- {
- rand = new Random(random_seed);
- }
-
- /**
- * @param v the vertex whose community we wish to discover
- * @return a community (cluster) centered around v
.
- */
- public Collection> getCommunity(V v)
- {
- return cluster_internal(v, 2);
+public class VoltageClusterer {
+ protected int num_candidates;
+ protected KMeansClusterer kmc;
+ protected Random rand;
+ protected Graph g;
+
+ /**
+ * Creates an instance of a VoltageCluster with the specified parameters. These are mostly
+ * parameters that are passed directly to VoltageScorer and KMeansClusterer.
+ *
+ * @param g the graph whose vertices are to be clustered
+ * @param num_candidates the number of candidate clusters to create
+ */
+ public VoltageClusterer(Graph g, int num_candidates) {
+ if (num_candidates < 1) throw new IllegalArgumentException("must generate >=1 candidates");
+
+ this.num_candidates = num_candidates;
+ this.kmc = new KMeansClusterer();
+ rand = new Random();
+ this.g = g;
+ }
+
+ protected void setRandomSeed(int random_seed) {
+ rand = new Random(random_seed);
+ }
+
+ /**
+ * @param v the vertex whose community we wish to discover
+ * @return a community (cluster) centered around v
.
+ */
+ public Collection> getCommunity(V v) {
+ return cluster_internal(v, 2);
+ }
+
+ /**
+ * Clusters the vertices of g
into num_clusters
clusters, based on their
+ * connectivity.
+ *
+ * @param num_clusters the number of clusters to identify
+ * @return a collection of clusters (sets of vertices)
+ */
+ public Collection> cluster(int num_clusters) {
+ return cluster_internal(null, num_clusters);
+ }
+
+ /**
+ * Does the work of getCommunity
and cluster
.
+ *
+ * @param origin the vertex around which clustering is to be done
+ * @param num_clusters the (maximum) number of clusters to find
+ * @return a collection of clusters (sets of vertices)
+ */
+ protected Collection> cluster_internal(V origin, int num_clusters) {
+ // generate candidate clusters
+ // repeat the following 'samples' times:
+ // * pick (widely separated) vertex pair, run VoltageScorer
+ // * use k-means to identify 2 communities in ranked graph
+ // * store resulting candidate communities
+ ArrayList v_array = new ArrayList(g.getVertices());
+
+ LinkedList> candidates = new LinkedList>();
+
+ for (int j = 0; j < num_candidates; j++) {
+ V source;
+ if (origin == null) source = v_array.get((int) (rand.nextDouble() * v_array.size()));
+ else source = origin;
+ V target = null;
+ do {
+ target = v_array.get((int) (rand.nextDouble() * v_array.size()));
+ } while (source == target);
+ VoltageScorer vs = new VoltageScorer(g, source, target);
+ vs.evaluate();
+
+ Map voltage_ranks = new HashMap();
+ for (V v : g.getVertices()) voltage_ranks.put(v, new double[] {vs.getVertexScore(v)});
+
+ // addOneCandidateCluster(candidates, voltage_ranks);
+ addTwoCandidateClusters(candidates, voltage_ranks);
}
- /**
- * Clusters the vertices of g
into
- * num_clusters
clusters, based on their connectivity.
- * @param num_clusters the number of clusters to identify
- * @return a collection of clusters (sets of vertices)
- */
- public Collection> cluster(int num_clusters)
- {
- return cluster_internal(null, num_clusters);
+ // repeat the following k-1 times:
+ // * pick a vertex v as a cluster seed
+ // (Wu/Huberman: most frequent vertex in candidates)
+ // * calculate co-occurrence (in candidate clusters)
+ // of this vertex with all others
+ // * use k-means to separate co-occurrence counts into high/low;
+ // high vertices are a cluster
+ // * remove v's vertices from candidate clusters
+
+ Collection> clusters = new LinkedList>();
+ Set remaining = new HashSet(g.getVertices());
+
+ List seed_candidates = getSeedCandidates(candidates);
+ int seed_index = 0;
+
+ for (int j = 0; j < (num_clusters - 1); j++) {
+ if (remaining.isEmpty()) {
+ break;
+ }
+
+ V seed;
+ if (seed_index == 0 && origin != null) {
+ seed = origin;
+ } else {
+ do {
+ seed = seed_candidates.get(seed_index++);
+ } while (!remaining.contains(seed));
+ }
+
+ Map occur_counts = getObjectCounts(candidates, seed);
+ if (occur_counts.size() < 2) {
+ break;
+ }
+
+ // now that we have the counts, cluster them...
+ try {
+ Collection