add auto generating data solution

2021-09-30 17:23:10 +02:00 · 2021-09-30 17:23:10 +02:00 · 7bbcdbe031
commit 7bbcdbe031
1 changed files with 180 additions and 0 deletions
--- a/pagerank_auto_generated_data.py
+++ b/pagerank_auto_generated_data.py
@ -0,0 +1,180 @@
+import numpy as np
+import networkx as nx
+import matplotlib.pyplot as plt
+from matplotlib.animation import FuncAnimation
+import random
+import array
+import subprocess
+import shlex
+
+def create_files():
+    f = open('100k.txt', 'w')
+    f.write('#FromNodeId' + '\t'+ 'ToNodeId' + '\t' + 'Weight' + '\n')
+
+    for x in range(1,100001):
+        x = random.randint(1,100001)
+        y = random.randint(1,100001)
+        z = random.randint(1,100)
+        z = z / 100
+        f.write(str(x) + '\t'+ str(y) + '\t' + str(z) + '\n')
+    f.close()
+
+    f = open('20.txt', 'w')
+    f.write('#FromNodeId' + '\t'+ 'ToNodeId' + '\t' + 'Weight' + '\n')
+
+    for x in range(1,21):
+        x = random.randint(1,21)
+        y = random.randint(1,21)
+        z = random.randint(1,100)
+        z = z / 100
+        f.write(str(x) + '\t'+ str(y) + '\t' + str(z) + '\n')
+    f.close()
+    return 0
+
+def pagerank():			
+	G = nx.read_edgelist('./100k.txt',nodetype=int,
+		data=(('weight',float),), create_using=nx.DiGraph())
+	alpha=0.85
+	personalization=None
+	max_iter=100
+	tol=1.0e-6
+	nstart=None
+	weight='weight'
+	dangling=None
+	
+	xdd = []
+	ydd = []
+				
+	if len(G) == 0:
+		return {}
+
+	if not G.is_directed():
+		D = G.to_directed()
+	else:
+		D = G
+	W = nx.stochastic_graph(D, weight=weight)
+	N = W.number_of_nodes()
+	if nstart is None:
+		x = dict.fromkeys(W, 1.0 / N)
+	else:
+		s = float(sum(nstart.values()))
+		x = dict((k, v / s) for k, v in nstart.items())
+
+	if personalization is None:
+		p = dict.fromkeys(W, 1.0 / N)
+	else:
+		missing = set(G) - set(personalization)
+		if missing:
+			raise NetworkXError('Error 404' % missing)
+		s = float(sum(personalization.values()))
+		p = dict((k, v / s) for k, v in personalization.items())
+
+	if dangling is None:
+		dangling_weights = p
+	else:
+		missing = set(G) - set(dangling)
+		if missing:
+			raise NetworkXError('Error 404' % missing)
+		s = float(sum(dangling.values()))
+		dangling_weights = dict((k, v/s) for k, v in dangling.items())
+	dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
+	for zdd in range(max_iter):
+		xdd.append(zdd)
+		xlast = x
+		x = dict.fromkeys(xlast.keys(), 0)
+		danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
+		for n in x:
+			for nbr in W[n]:
+				x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
+			x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
+		err = sum([abs(x[n] - xlast[n]) for n in x])
+		ydd.append(err)
+		if err < N*tol:
+			return print(x)
+           
+def pagerank_chart():			
+	G = nx.read_edgelist('./100k.txt',nodetype=int,
+		data=(('weight',float),), create_using=nx.DiGraph())
+	alpha=0.85
+	personalization=None
+	max_iter=100
+	tol=1.0e-6
+	nstart=None
+	weight='weight'
+	dangling=None
+	
+	xdd = []
+	ydd = []
+				
+	if len(G) == 0:
+		return {}
+
+	if not G.is_directed():
+		D = G.to_directed()
+	else:
+		D = G
+	W = nx.stochastic_graph(D, weight=weight)
+	N = W.number_of_nodes()
+	if nstart is None:
+		x = dict.fromkeys(W, 1.0 / N)
+	else:
+		s = float(sum(nstart.values()))
+		x = dict((k, v / s) for k, v in nstart.items())
+
+	if personalization is None:
+		p = dict.fromkeys(W, 1.0 / N)
+	else:
+		missing = set(G) - set(personalization)
+		if missing:
+			raise NetworkXError('Error 404' % missing)
+		s = float(sum(personalization.values()))
+		p = dict((k, v / s) for k, v in personalization.items())
+
+	if dangling is None:
+		dangling_weights = p
+	else:
+		missing = set(G) - set(dangling)
+		if missing:
+			raise NetworkXError('Error 404' % missing)
+		s = float(sum(dangling.values()))
+		dangling_weights = dict((k, v/s) for k, v in dangling.items())
+	dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
+	for zdd in range(max_iter):
+		xdd.append(zdd)
+		xlast = x
+		x = dict.fromkeys(xlast.keys(), 0)
+		danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
+		for n in x:
+			for nbr in W[n]:
+				x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
+			x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
+		err = sum([abs(x[n] - xlast[n]) for n in x])
+		ydd.append(err)
+		if err < N*tol:
+			xdd1 = np.array(xdd)
+			ydd1 = np.array(ydd)
+			plt.plot(xdd1, ydd1)
+			fname = './test.pdf'
+			plt.savefig(fname)
+			proc=subprocess.Popen(shlex.split('lpr {f}'.format(f=fname)))
+			return 0
+            
+def main():
+    create_files()
+    G = nx.read_edgelist('./20.txt',nodetype=int,
+  	data=(('weight',float),), create_using=nx.DiGraph())
+    print(G.edges(data=True))
+    nx.draw(G)
+    plt.show()
+    
+    G = nx.read_edgelist('./100k.txt',nodetype=int,
+  	data=(('weight',float),), create_using=nx.DiGraph())
+    pr = nx.pagerank(G,0.4)
+    #print(pr)
+    
+    pagerank()
+    
+    pagerank_chart()
+
+if __name__ == "__main__":
+    main()