100 KiB
100 KiB
"""
Visualization of clusters
"""
import sys
try:
import networkx as nx
except ImportError:
sys.stderr.write("Networkx not present\n")
raise
try:
from matplotlib.pylab import show, cm, axis
except ImportError:
sys.stderr.write("Matplotlib not present\n")
raise
def draw_graph(matrix, clusters, **kwargs):
"""
Visualize the clustering
:param matrix: The unprocessed adjacency matrix
:param clusters: list of tuples containing clusters as returned
by 'get_clusters'
:param kwargs: Additional keyword arguments to be passed to
networkx.draw_networkx
"""
# make a networkx graph from the adjacency matrix
graph = nx.Graph(matrix)
# map node to cluster id for colors
cluster_map = {node: i for i, cluster in enumerate(clusters) for node in cluster}
colors = [cluster_map[i] for i in range(len(graph.nodes()))]
# if colormap not specified in kwargs, use a default
if not kwargs.get("cmap", False):
kwargs["cmap"] = cm.tab20
# draw
nx.draw_networkx(graph, node_color=colors, **kwargs)
axis("off")
show(block=False)
# import numpy as np
# from scipy.sparse import isspmatrix, dok_matrix, csc_matrix
# import sklearn.preprocessing
# def sparse_allclose(a, b, rtol=1e-5, atol=1e-8):
# """
# Version of np.allclose for use with sparse matrices
# """
# c = np.abs(a - b) - rtol * np.abs(b)
# # noinspection PyUnresolvedReferences
# return c.max() <= atol
# def normalize(matrix):
# """
# Normalize the columns of the given matrix
# :param matrix: The matrix to be normalized
# :returns: The normalized matrix
# """
# return sklearn.preprocessing.normalize(matrix, norm="l1", axis=0)
# def inflate(matrix, power):
# """
# Apply cluster inflation to the given matrix by raising
# each element to the given power.
# :param matrix: The matrix to be inflated
# :param power: Cluster inflation parameter
# :returns: The inflated matrix
# """
# if isspmatrix(matrix):
# return normalize(matrix.power(power))
# return normalize(np.power(matrix, power))
# def expand(matrix, power):
# """
# Apply cluster expansion to the given matrix by raising
# the matrix to the given power.
# :param matrix: The matrix to be expanded
# :param power: Cluster expansion parameter
# :returns: The expanded matrix
# """
# if isspmatrix(matrix):
# return matrix ** power
# return np.linalg.matrix_power(matrix, power)
# def add_self_loops(matrix, loop_value):
# """
# Add self-loops to the matrix by setting the diagonal
# to loop_value
# :param matrix: The matrix to add loops to
# :param loop_value: Value to use for self-loops
# :returns: The matrix with self-loops
# """
# shape = matrix.shape
# assert shape[0] == shape[1], "Error, matrix is not square"
# if isspmatrix(matrix):
# new_matrix = matrix.todok()
# else:
# new_matrix = matrix.copy()
# for i in range(shape[0]):
# new_matrix[i, i] = loop_value
# if isspmatrix(matrix):
# return new_matrix.tocsc()
# return new_matrix
# def prune(matrix, threshold):
# """
# Prune the matrix so that very small edges are removed.
# The maximum value in each column is never pruned.
# :param matrix: The matrix to be pruned
# :param threshold: The value below which edges will be removed
# :returns: The pruned matrix
# """
# if isspmatrix(matrix):
# pruned = dok_matrix(matrix.shape)
# pruned[matrix >= threshold] = matrix[matrix >= threshold]
# pruned = pruned.tocsc()
# else:
# pruned = matrix.copy()
# pruned[pruned < threshold] = 0
# # keep max value in each column. same behaviour for dense/sparse
# num_cols = matrix.shape[1]
# row_indices = matrix.argmax(axis=0).reshape((num_cols,))
# col_indices = np.arange(num_cols)
# pruned[row_indices, col_indices] = matrix[row_indices, col_indices]
# return pruned
# def converged(matrix1, matrix2):
# """
# Check for convergence by determining if
# matrix1 and matrix2 are approximately equal.
# :param matrix1: The matrix to compare with matrix2
# :param matrix2: The matrix to compare with matrix1
# :returns: True if matrix1 and matrix2 approximately equal
# """
# if isspmatrix(matrix1) or isspmatrix(matrix2):
# return sparse_allclose(matrix1, matrix2)
# return np.allclose(matrix1, matrix2)
# def iterate(matrix, expansion, inflation):
# """
# Run a single iteration (expansion + inflation) of the mcl algorithm
# :param matrix: The matrix to perform the iteration on
# :param expansion: Cluster expansion factor
# :param inflation: Cluster inflation factor
# """
# # Expansion
# matrix = expand(matrix, expansion)
# # Inflation
# matrix = inflate(matrix, inflation)
# return matrix
# def get_clusters(matrix):
# """
# Retrieve the clusters from the matrix
# :param matrix: The matrix produced by the MCL algorithm
# :returns: A list of tuples where each tuple represents a cluster and
# contains the indices of the nodes belonging to the cluster
# """
# if not isspmatrix(matrix):
# # cast to sparse so that we don't need to handle different
# # matrix types
# matrix = csc_matrix(matrix)
# # get the attractors - non-zero elements of the matrix diagonal
# attractors = matrix.diagonal().nonzero()[0]
# # somewhere to put the clusters
# clusters = set()
# # the nodes in the same row as each attractor form a cluster
# for attractor in attractors:
# cluster = tuple(matrix.getrow(attractor).nonzero()[1].tolist())
# clusters.add(cluster)
# return sorted(list(clusters))
# def run_mcl(matrix, expansion=2, inflation=2, loop_value=1,
# iterations=100, pruning_threshold=0.001, pruning_frequency=1,
# convergence_check_frequency=1):
# """
# Perform MCL on the given similarity matrix
# :param matrix: The similarity matrix to cluster
# :param expansion: The cluster expansion factor
# :param inflation: The cluster inflation factor
# :param loop_value: Initialization value for self-loops
# :param iterations: Maximum number of iterations
# (actual number of iterations will be less if convergence is reached)
# :param pruning_threshold: Threshold below which matrix elements will be set
# set to 0
# :param pruning_frequency: Perform pruning every 'pruning_frequency'
# iterations.
# :param convergence_check_frequency: Perform the check for convergence
# every convergence_check_frequency iterations
# :param verbose: Print extra information to the console
# :returns: The final matrix
# """
# assert expansion > 1, "Invalid expansion parameter"
# assert inflation > 1, "Invalid inflation parameter"
# assert loop_value >= 0, "Invalid loop_value"
# assert iterations > 0, "Invalid number of iterations"
# assert pruning_threshold >= 0, "Invalid pruning_threshold"
# assert pruning_frequency > 0, "Invalid pruning_frequency"
# assert convergence_check_frequency > 0, "Invalid convergence_check_frequency"
# print("-" * 50)
# print("MCL Parameters")
# print("Expansion: {}".format(expansion))
# print("Inflation: {}".format(inflation))
# if pruning_threshold > 0:
# print("Pruning threshold: {}, frequency: {} iteration{}".format(
# pruning_threshold, pruning_frequency, "s" if pruning_frequency > 1 else ""))
# else:
# print("No pruning")
# print("Convergence check: {} iteration{}".format(
# convergence_check_frequency, "s" if convergence_check_frequency > 1 else ""))
# print("Maximum iterations: {}".format(iterations))
# print("{} matrix mode".format("Sparse" if isspmatrix(matrix) else "Dense"))
# print("-" * 50)
# # Initialize self-loops
# if loop_value > 0:
# matrix = add_self_loops(matrix, loop_value)
# # Normalize
# matrix = normalize(matrix)
# # iterations
# for i in range(iterations):
# print("Iteration {}".format(i + 1))
# # store current matrix for convergence checking
# last_mat = matrix.copy()
# # perform MCL expansion and inflation
# matrix = iterate(matrix, expansion, inflation)
# # prune
# if pruning_threshold > 0 and i % pruning_frequency == pruning_frequency - 1:
# print("Pruning")
# matrix = prune(matrix, pruning_threshold)
# # Check for convergence
# if i % convergence_check_frequency == convergence_check_frequency - 1:
# print("Checking for convergence")
# if converged(matrix, last_mat):
# print("Converged after {} iteration{}".format(i + 1, "s" if i > 0 else ""))
# break
# print("-" * 50)
# return matrix
# # # G = nx.karate_club_graph()
# # G = nx.balanced_tree(2,3)
# G = nx.balanced_tree(2,5)
# # Build adjacency matrix
# A = nx.to_numpy_matrix(G)
# # Run MCL algorithm
# result = run_mcl(A)
# clusters = get_clusters(result)
# print(clusters)
# draw_graph(A, clusters, node_size=50, with_labels=True, edge_color="silver")
DRUGI ALGORYTM
import sys
import numpy as np
#rusujemy grafy
def draw(G, A, cluster_map):
graph = nx.Graph(G)
clust_map = {}
for k, vals in cluster_map.items():
for v in vals:
clust_map[v] = k
colors = []
for i in range(len(G.nodes())):
colors.append(clust_map.get(i, 100))
nx.draw_networkx(graph,node_color =colors,linewidths=7)
axis("off")
show(block=False)
def normalize(A):
#Normalizujemy kolumny macierzy
# Normalize the columns of the given matrix
return sklearn.preprocessing.normalize(A, norm="l1", axis=0)
def inflate(A, inflate_factor):
#Podnosimy każdy element macierzy do potęgi
# Apply cluster inflation to the given matrix by raising each element to the given power.
return normalize(np.power(A, inflate_factor))
def expand(A, expand_factor):
# Apply cluster expansion to the given matrix by raising the matrix to the given power.
return np.linalg.matrix_power(A, expand_factor)
def add_diag(A, mult_factor):
# Add self-loops to the matrix by setting the diagonal to 1
return A + mult_factor * np.identity(A.shape[0])
def get_clusters(A):
clusters = []
for i, r in enumerate((A>0).tolist()):
if r[i]:
clusters.append(A[i,:]>0)
clust_map ={}
for cn , c in enumerate(clusters):
for x in [ i for i, x in enumerate(c) if x ]:
clust_map[cn] = clust_map.get(cn, []) + [x]
return clust_map
def stop(M, i):
if i%5==4:
m = np.max( M**2 - M) - np.min( M**2 - M)
if m==0:
print("Stop at iteration %s" % i)
return True
return False
def mcl(M, expand_factor = 2, inflate_factor = 2, max_loop = 10 , mult_factor = 1):
M = add_diag(M, mult_factor)
M = normalize(M)
for i in range(max_loop):
M = expand(M, expand_factor)
M = inflate(M, inflate_factor)
if stop(M, i): break
clusters = get_clusters(M)
return M, clusters
def networkx_mcl(G, expand_factor = 2, inflate_factor = 2, max_loop = 50 , mult_factor = 1):
A = nx.adjacency_matrix(G)
return mcl(np.array(A.todense()), expand_factor, inflate_factor, max_loop, mult_factor)
def calculate_and_draw(Graph):
M = nx.to_numpy_matrix(Graph)
print(" number of nodes: %s\n" % M.shape[0])
M, clusters = networkx_mcl(Graph)
[print(key, ": ", value) for key, value in clusters.items()]
draw(Graph, M, clusters)
# https://networkx.org/documentation/stable/reference/generators.html
G_bin = nx.binomial_tree(4)
calculate_and_draw(G_bin)
number of nodes: 16 Stop at iteration 14 0 : [0, 1] 1 : [2, 3] 2 : [4, 5] 3 : [6, 7] 4 : [8, 9] 5 : [10, 12] 6 : [11, 13] 7 : [14, 15]
G_balanced = nx.balanced_tree(2,3)
calculate_and_draw(G_balanced)
number of nodes: 15 0 : [0, 1, 3, 7, 8] 1 : [0, 1, 4, 9, 10] 2 : [0, 2, 5, 11, 12] 3 : [0, 2, 6, 13, 14]
G_balanced = nx.balanced_tree(2,5)
calculate_and_draw(G_balanced)
number of nodes: 63 0 : [0, 1, 3, 4] 1 : [0, 2, 5, 6] 2 : [7, 15, 31, 32] 3 : [7, 16, 33, 34] 4 : [8, 17, 35, 36] 5 : [8, 18, 37, 38] 6 : [9, 19, 39, 40] 7 : [9, 20, 41, 42] 8 : [10, 21, 43, 44] 9 : [10, 22, 45, 46] 10 : [11, 23, 47, 48] 11 : [11, 24, 49, 50] 12 : [12, 25, 51, 52] 13 : [12, 26, 53, 54] 14 : [13, 27, 55, 56] 15 : [13, 28, 57, 58] 16 : [14, 29, 59, 60] 17 : [14, 30, 61, 62]