add auto generating data solution
This commit is contained in:
commit
7bbcdbe031
180
pagerank_auto_generated_data.py
Normal file
180
pagerank_auto_generated_data.py
Normal file
@ -0,0 +1,180 @@
|
||||
import numpy as np
|
||||
import networkx as nx
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.animation import FuncAnimation
|
||||
import random
|
||||
import array
|
||||
import subprocess
|
||||
import shlex
|
||||
|
||||
def create_files():
|
||||
f = open('100k.txt', 'w')
|
||||
f.write('#FromNodeId' + '\t'+ 'ToNodeId' + '\t' + 'Weight' + '\n')
|
||||
|
||||
for x in range(1,100001):
|
||||
x = random.randint(1,100001)
|
||||
y = random.randint(1,100001)
|
||||
z = random.randint(1,100)
|
||||
z = z / 100
|
||||
f.write(str(x) + '\t'+ str(y) + '\t' + str(z) + '\n')
|
||||
f.close()
|
||||
|
||||
f = open('20.txt', 'w')
|
||||
f.write('#FromNodeId' + '\t'+ 'ToNodeId' + '\t' + 'Weight' + '\n')
|
||||
|
||||
for x in range(1,21):
|
||||
x = random.randint(1,21)
|
||||
y = random.randint(1,21)
|
||||
z = random.randint(1,100)
|
||||
z = z / 100
|
||||
f.write(str(x) + '\t'+ str(y) + '\t' + str(z) + '\n')
|
||||
f.close()
|
||||
return 0
|
||||
|
||||
def pagerank():
|
||||
G = nx.read_edgelist('./100k.txt',nodetype=int,
|
||||
data=(('weight',float),), create_using=nx.DiGraph())
|
||||
alpha=0.85
|
||||
personalization=None
|
||||
max_iter=100
|
||||
tol=1.0e-6
|
||||
nstart=None
|
||||
weight='weight'
|
||||
dangling=None
|
||||
|
||||
xdd = []
|
||||
ydd = []
|
||||
|
||||
if len(G) == 0:
|
||||
return {}
|
||||
|
||||
if not G.is_directed():
|
||||
D = G.to_directed()
|
||||
else:
|
||||
D = G
|
||||
W = nx.stochastic_graph(D, weight=weight)
|
||||
N = W.number_of_nodes()
|
||||
if nstart is None:
|
||||
x = dict.fromkeys(W, 1.0 / N)
|
||||
else:
|
||||
s = float(sum(nstart.values()))
|
||||
x = dict((k, v / s) for k, v in nstart.items())
|
||||
|
||||
if personalization is None:
|
||||
p = dict.fromkeys(W, 1.0 / N)
|
||||
else:
|
||||
missing = set(G) - set(personalization)
|
||||
if missing:
|
||||
raise NetworkXError('Error 404' % missing)
|
||||
s = float(sum(personalization.values()))
|
||||
p = dict((k, v / s) for k, v in personalization.items())
|
||||
|
||||
if dangling is None:
|
||||
dangling_weights = p
|
||||
else:
|
||||
missing = set(G) - set(dangling)
|
||||
if missing:
|
||||
raise NetworkXError('Error 404' % missing)
|
||||
s = float(sum(dangling.values()))
|
||||
dangling_weights = dict((k, v/s) for k, v in dangling.items())
|
||||
dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
|
||||
for zdd in range(max_iter):
|
||||
xdd.append(zdd)
|
||||
xlast = x
|
||||
x = dict.fromkeys(xlast.keys(), 0)
|
||||
danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
|
||||
for n in x:
|
||||
for nbr in W[n]:
|
||||
x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
|
||||
x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
|
||||
err = sum([abs(x[n] - xlast[n]) for n in x])
|
||||
ydd.append(err)
|
||||
if err < N*tol:
|
||||
return print(x)
|
||||
|
||||
def pagerank_chart():
|
||||
G = nx.read_edgelist('./100k.txt',nodetype=int,
|
||||
data=(('weight',float),), create_using=nx.DiGraph())
|
||||
alpha=0.85
|
||||
personalization=None
|
||||
max_iter=100
|
||||
tol=1.0e-6
|
||||
nstart=None
|
||||
weight='weight'
|
||||
dangling=None
|
||||
|
||||
xdd = []
|
||||
ydd = []
|
||||
|
||||
if len(G) == 0:
|
||||
return {}
|
||||
|
||||
if not G.is_directed():
|
||||
D = G.to_directed()
|
||||
else:
|
||||
D = G
|
||||
W = nx.stochastic_graph(D, weight=weight)
|
||||
N = W.number_of_nodes()
|
||||
if nstart is None:
|
||||
x = dict.fromkeys(W, 1.0 / N)
|
||||
else:
|
||||
s = float(sum(nstart.values()))
|
||||
x = dict((k, v / s) for k, v in nstart.items())
|
||||
|
||||
if personalization is None:
|
||||
p = dict.fromkeys(W, 1.0 / N)
|
||||
else:
|
||||
missing = set(G) - set(personalization)
|
||||
if missing:
|
||||
raise NetworkXError('Error 404' % missing)
|
||||
s = float(sum(personalization.values()))
|
||||
p = dict((k, v / s) for k, v in personalization.items())
|
||||
|
||||
if dangling is None:
|
||||
dangling_weights = p
|
||||
else:
|
||||
missing = set(G) - set(dangling)
|
||||
if missing:
|
||||
raise NetworkXError('Error 404' % missing)
|
||||
s = float(sum(dangling.values()))
|
||||
dangling_weights = dict((k, v/s) for k, v in dangling.items())
|
||||
dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
|
||||
for zdd in range(max_iter):
|
||||
xdd.append(zdd)
|
||||
xlast = x
|
||||
x = dict.fromkeys(xlast.keys(), 0)
|
||||
danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
|
||||
for n in x:
|
||||
for nbr in W[n]:
|
||||
x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
|
||||
x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
|
||||
err = sum([abs(x[n] - xlast[n]) for n in x])
|
||||
ydd.append(err)
|
||||
if err < N*tol:
|
||||
xdd1 = np.array(xdd)
|
||||
ydd1 = np.array(ydd)
|
||||
plt.plot(xdd1, ydd1)
|
||||
fname = './test.pdf'
|
||||
plt.savefig(fname)
|
||||
proc=subprocess.Popen(shlex.split('lpr {f}'.format(f=fname)))
|
||||
return 0
|
||||
|
||||
def main():
|
||||
create_files()
|
||||
G = nx.read_edgelist('./20.txt',nodetype=int,
|
||||
data=(('weight',float),), create_using=nx.DiGraph())
|
||||
print(G.edges(data=True))
|
||||
nx.draw(G)
|
||||
plt.show()
|
||||
|
||||
G = nx.read_edgelist('./100k.txt',nodetype=int,
|
||||
data=(('weight',float),), create_using=nx.DiGraph())
|
||||
pr = nx.pagerank(G,0.4)
|
||||
#print(pr)
|
||||
|
||||
pagerank()
|
||||
|
||||
pagerank_chart()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user