add auto generating data solution

This commit is contained in:
Szymon Polak 2021-09-30 17:23:10 +02:00
commit 7bbcdbe031

View File

@ -0,0 +1,180 @@
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import random
import array
import subprocess
import shlex
def create_files():
f = open('100k.txt', 'w')
f.write('#FromNodeId' + '\t'+ 'ToNodeId' + '\t' + 'Weight' + '\n')
for x in range(1,100001):
x = random.randint(1,100001)
y = random.randint(1,100001)
z = random.randint(1,100)
z = z / 100
f.write(str(x) + '\t'+ str(y) + '\t' + str(z) + '\n')
f.close()
f = open('20.txt', 'w')
f.write('#FromNodeId' + '\t'+ 'ToNodeId' + '\t' + 'Weight' + '\n')
for x in range(1,21):
x = random.randint(1,21)
y = random.randint(1,21)
z = random.randint(1,100)
z = z / 100
f.write(str(x) + '\t'+ str(y) + '\t' + str(z) + '\n')
f.close()
return 0
def pagerank():
G = nx.read_edgelist('./100k.txt',nodetype=int,
data=(('weight',float),), create_using=nx.DiGraph())
alpha=0.85
personalization=None
max_iter=100
tol=1.0e-6
nstart=None
weight='weight'
dangling=None
xdd = []
ydd = []
if len(G) == 0:
return {}
if not G.is_directed():
D = G.to_directed()
else:
D = G
W = nx.stochastic_graph(D, weight=weight)
N = W.number_of_nodes()
if nstart is None:
x = dict.fromkeys(W, 1.0 / N)
else:
s = float(sum(nstart.values()))
x = dict((k, v / s) for k, v in nstart.items())
if personalization is None:
p = dict.fromkeys(W, 1.0 / N)
else:
missing = set(G) - set(personalization)
if missing:
raise NetworkXError('Error 404' % missing)
s = float(sum(personalization.values()))
p = dict((k, v / s) for k, v in personalization.items())
if dangling is None:
dangling_weights = p
else:
missing = set(G) - set(dangling)
if missing:
raise NetworkXError('Error 404' % missing)
s = float(sum(dangling.values()))
dangling_weights = dict((k, v/s) for k, v in dangling.items())
dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
for zdd in range(max_iter):
xdd.append(zdd)
xlast = x
x = dict.fromkeys(xlast.keys(), 0)
danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
for n in x:
for nbr in W[n]:
x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
err = sum([abs(x[n] - xlast[n]) for n in x])
ydd.append(err)
if err < N*tol:
return print(x)
def pagerank_chart():
G = nx.read_edgelist('./100k.txt',nodetype=int,
data=(('weight',float),), create_using=nx.DiGraph())
alpha=0.85
personalization=None
max_iter=100
tol=1.0e-6
nstart=None
weight='weight'
dangling=None
xdd = []
ydd = []
if len(G) == 0:
return {}
if not G.is_directed():
D = G.to_directed()
else:
D = G
W = nx.stochastic_graph(D, weight=weight)
N = W.number_of_nodes()
if nstart is None:
x = dict.fromkeys(W, 1.0 / N)
else:
s = float(sum(nstart.values()))
x = dict((k, v / s) for k, v in nstart.items())
if personalization is None:
p = dict.fromkeys(W, 1.0 / N)
else:
missing = set(G) - set(personalization)
if missing:
raise NetworkXError('Error 404' % missing)
s = float(sum(personalization.values()))
p = dict((k, v / s) for k, v in personalization.items())
if dangling is None:
dangling_weights = p
else:
missing = set(G) - set(dangling)
if missing:
raise NetworkXError('Error 404' % missing)
s = float(sum(dangling.values()))
dangling_weights = dict((k, v/s) for k, v in dangling.items())
dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
for zdd in range(max_iter):
xdd.append(zdd)
xlast = x
x = dict.fromkeys(xlast.keys(), 0)
danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
for n in x:
for nbr in W[n]:
x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
err = sum([abs(x[n] - xlast[n]) for n in x])
ydd.append(err)
if err < N*tol:
xdd1 = np.array(xdd)
ydd1 = np.array(ydd)
plt.plot(xdd1, ydd1)
fname = './test.pdf'
plt.savefig(fname)
proc=subprocess.Popen(shlex.split('lpr {f}'.format(f=fname)))
return 0
def main():
create_files()
G = nx.read_edgelist('./20.txt',nodetype=int,
data=(('weight',float),), create_using=nx.DiGraph())
print(G.edges(data=True))
nx.draw(G)
plt.show()
G = nx.read_edgelist('./100k.txt',nodetype=int,
data=(('weight',float),), create_using=nx.DiGraph())
pr = nx.pagerank(G,0.4)
#print(pr)
pagerank()
pagerank_chart()
if __name__ == "__main__":
main()