pagerank/pagerank_auto_generated_data.py

180 lines
4.5 KiB
Python

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import random
import array
import subprocess
import shlex
def create_files():
f = open('100k.txt', 'w')
f.write('#FromNodeId' + '\t'+ 'ToNodeId' + '\t' + 'Weight' + '\n')
for x in range(1,100001):
x = random.randint(1,100001)
y = random.randint(1,100001)
z = random.randint(1,100)
z = z / 100
f.write(str(x) + '\t'+ str(y) + '\t' + str(z) + '\n')
f.close()
f = open('20.txt', 'w')
f.write('#FromNodeId' + '\t'+ 'ToNodeId' + '\t' + 'Weight' + '\n')
for x in range(1,21):
x = random.randint(1,21)
y = random.randint(1,21)
z = random.randint(1,100)
z = z / 100
f.write(str(x) + '\t'+ str(y) + '\t' + str(z) + '\n')
f.close()
return 0
def pagerank():
G = nx.read_edgelist('./100k.txt',nodetype=int,
data=(('weight',float),), create_using=nx.DiGraph())
alpha=0.85
personalization=None
max_iter=100
tol=1.0e-6
nstart=None
weight='weight'
dangling=None
xdd = []
ydd = []
if len(G) == 0:
return {}
if not G.is_directed():
D = G.to_directed()
else:
D = G
W = nx.stochastic_graph(D, weight=weight)
N = W.number_of_nodes()
if nstart is None:
x = dict.fromkeys(W, 1.0 / N)
else:
s = float(sum(nstart.values()))
x = dict((k, v / s) for k, v in nstart.items())
if personalization is None:
p = dict.fromkeys(W, 1.0 / N)
else:
missing = set(G) - set(personalization)
if missing:
raise NetworkXError('Error 404' % missing)
s = float(sum(personalization.values()))
p = dict((k, v / s) for k, v in personalization.items())
if dangling is None:
dangling_weights = p
else:
missing = set(G) - set(dangling)
if missing:
raise NetworkXError('Error 404' % missing)
s = float(sum(dangling.values()))
dangling_weights = dict((k, v/s) for k, v in dangling.items())
dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
for zdd in range(max_iter):
xdd.append(zdd)
xlast = x
x = dict.fromkeys(xlast.keys(), 0)
danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
for n in x:
for nbr in W[n]:
x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
err = sum([abs(x[n] - xlast[n]) for n in x])
ydd.append(err)
if err < N*tol:
return print(x)
def pagerank_chart():
G = nx.read_edgelist('./100k.txt',nodetype=int,
data=(('weight',float),), create_using=nx.DiGraph())
alpha=0.85
personalization=None
max_iter=100
tol=1.0e-6
nstart=None
weight='weight'
dangling=None
xdd = []
ydd = []
if len(G) == 0:
return {}
if not G.is_directed():
D = G.to_directed()
else:
D = G
W = nx.stochastic_graph(D, weight=weight)
N = W.number_of_nodes()
if nstart is None:
x = dict.fromkeys(W, 1.0 / N)
else:
s = float(sum(nstart.values()))
x = dict((k, v / s) for k, v in nstart.items())
if personalization is None:
p = dict.fromkeys(W, 1.0 / N)
else:
missing = set(G) - set(personalization)
if missing:
raise NetworkXError('Error 404' % missing)
s = float(sum(personalization.values()))
p = dict((k, v / s) for k, v in personalization.items())
if dangling is None:
dangling_weights = p
else:
missing = set(G) - set(dangling)
if missing:
raise NetworkXError('Error 404' % missing)
s = float(sum(dangling.values()))
dangling_weights = dict((k, v/s) for k, v in dangling.items())
dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
for zdd in range(max_iter):
xdd.append(zdd)
xlast = x
x = dict.fromkeys(xlast.keys(), 0)
danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
for n in x:
for nbr in W[n]:
x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
err = sum([abs(x[n] - xlast[n]) for n in x])
ydd.append(err)
if err < N*tol:
xdd1 = np.array(xdd)
ydd1 = np.array(ydd)
plt.plot(xdd1, ydd1)
fname = './test.pdf'
plt.savefig(fname)
proc=subprocess.Popen(shlex.split('lpr {f}'.format(f=fname)))
return 0
def main():
create_files()
G = nx.read_edgelist('./20.txt',nodetype=int,
data=(('weight',float),), create_using=nx.DiGraph())
print(G.edges(data=True))
nx.draw(G)
plt.show()
G = nx.read_edgelist('./100k.txt',nodetype=int,
data=(('weight',float),), create_using=nx.DiGraph())
pr = nx.pagerank(G,0.4)
#print(pr)
pagerank()
pagerank_chart()
if __name__ == "__main__":
main()