# https://github.com/rfsantacruz/cvx-nb/blob/master/DoublyStochasticApproximation.ipynb
# approximate dh-matrix
import numpy as np
import networkx as nx
from matplotlib import pyplot as plt
%matplotlib inline
def sinkhorn(X, max_it=10):
Y = np.copy(X)
for it in range(max_it):
# Line multipliers normalizer
D1 = np.diagflat(1. / np.sum(Y, axis=1))
# Cols multipliers normalizer
D2 = np.diagflat(1. / np.sum(np.dot(D1, Y), axis=0))
# Normalize D1 Y D2
Y = np.dot(np.dot(D1, Y),D2)
return Y
X = np.random.rand(2,2)
Y = sinkhorn(X)
print("Random input:\n{}".format(X))
print("Sinkhorn Approx. DSM:\n{}".format(Y))
def generate_random_symmetric_doubly_stochastic_matrix(n, max_it = 10):
# Generate a random matrix of size n x n
matrix = np.random.rand(n, n)
# Make the matrix symmetric
matrix = (matrix + matrix.T) / 2
# Normalize each column to sum up to 1
wc = np.sqrt(np.sum(matrix, axis=0))
matrix = matrix / wc
matrix = matrix / wc[:, np.newaxis]
matrix = sinkhorn(matrix,max_it=max_it)
return matrix
n = 10
x = generate_random_symmetric_doubly_stochastic_matrix(n)
print(np.sum(x-np.transpose(x)))
print('row sum',np.sum(x,axis=0))
print('col sum',np.sum(x,axis=1))
def get_pagerank(matrix):
graph = nx.from_numpy_array(matrix, create_using=nx.DiGraph)
pagerank = nx.pagerank(graph)
pr = np.array(list(pagerank.values()))
return pr
pr = get_pagerank(matrix)
print('page rank: ', pr)
pr_rank = np.argsort(-pr)
print('rank wrt pagerank', pr_rank)
sim_rank = np.argsort(-np.sum(matrix,axis=0))
print('rank wrt pagerank', sim_rank)
n1 = pr_rank[:k]
n2 = sim_rank[:k]
print('n1, n2',n1,n2)