From 7bbcdbe031bd4bc1129a5fac2f2909fe26159dc5 Mon Sep 17 00:00:00 2001 From: Szymon Polak Date: Thu, 30 Sep 2021 17:23:10 +0200 Subject: [PATCH] add auto generating data solution --- pagerank_auto_generated_data.py | 180 ++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 pagerank_auto_generated_data.py diff --git a/pagerank_auto_generated_data.py b/pagerank_auto_generated_data.py new file mode 100644 index 0000000..04bf3de --- /dev/null +++ b/pagerank_auto_generated_data.py @@ -0,0 +1,180 @@ +import numpy as np +import networkx as nx +import matplotlib.pyplot as plt +from matplotlib.animation import FuncAnimation +import random +import array +import subprocess +import shlex + +def create_files(): + f = open('100k.txt', 'w') + f.write('#FromNodeId' + '\t'+ 'ToNodeId' + '\t' + 'Weight' + '\n') + + for x in range(1,100001): + x = random.randint(1,100001) + y = random.randint(1,100001) + z = random.randint(1,100) + z = z / 100 + f.write(str(x) + '\t'+ str(y) + '\t' + str(z) + '\n') + f.close() + + f = open('20.txt', 'w') + f.write('#FromNodeId' + '\t'+ 'ToNodeId' + '\t' + 'Weight' + '\n') + + for x in range(1,21): + x = random.randint(1,21) + y = random.randint(1,21) + z = random.randint(1,100) + z = z / 100 + f.write(str(x) + '\t'+ str(y) + '\t' + str(z) + '\n') + f.close() + return 0 + +def pagerank(): + G = nx.read_edgelist('./100k.txt',nodetype=int, + data=(('weight',float),), create_using=nx.DiGraph()) + alpha=0.85 + personalization=None + max_iter=100 + tol=1.0e-6 + nstart=None + weight='weight' + dangling=None + + xdd = [] + ydd = [] + + if len(G) == 0: + return {} + + if not G.is_directed(): + D = G.to_directed() + else: + D = G + W = nx.stochastic_graph(D, weight=weight) + N = W.number_of_nodes() + if nstart is None: + x = dict.fromkeys(W, 1.0 / N) + else: + s = float(sum(nstart.values())) + x = dict((k, v / s) for k, v in nstart.items()) + + if personalization is None: + p = dict.fromkeys(W, 1.0 / N) + else: + missing = set(G) - set(personalization) + if missing: + raise NetworkXError('Error 404' % missing) + s = float(sum(personalization.values())) + p = dict((k, v / s) for k, v in personalization.items()) + + if dangling is None: + dangling_weights = p + else: + missing = set(G) - set(dangling) + if missing: + raise NetworkXError('Error 404' % missing) + s = float(sum(dangling.values())) + dangling_weights = dict((k, v/s) for k, v in dangling.items()) + dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] + for zdd in range(max_iter): + xdd.append(zdd) + xlast = x + x = dict.fromkeys(xlast.keys(), 0) + danglesum = alpha * sum(xlast[n] for n in dangling_nodes) + for n in x: + for nbr in W[n]: + x[nbr] += alpha * xlast[n] * W[n][nbr][weight] + x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n] + err = sum([abs(x[n] - xlast[n]) for n in x]) + ydd.append(err) + if err < N*tol: + return print(x) + +def pagerank_chart(): + G = nx.read_edgelist('./100k.txt',nodetype=int, + data=(('weight',float),), create_using=nx.DiGraph()) + alpha=0.85 + personalization=None + max_iter=100 + tol=1.0e-6 + nstart=None + weight='weight' + dangling=None + + xdd = [] + ydd = [] + + if len(G) == 0: + return {} + + if not G.is_directed(): + D = G.to_directed() + else: + D = G + W = nx.stochastic_graph(D, weight=weight) + N = W.number_of_nodes() + if nstart is None: + x = dict.fromkeys(W, 1.0 / N) + else: + s = float(sum(nstart.values())) + x = dict((k, v / s) for k, v in nstart.items()) + + if personalization is None: + p = dict.fromkeys(W, 1.0 / N) + else: + missing = set(G) - set(personalization) + if missing: + raise NetworkXError('Error 404' % missing) + s = float(sum(personalization.values())) + p = dict((k, v / s) for k, v in personalization.items()) + + if dangling is None: + dangling_weights = p + else: + missing = set(G) - set(dangling) + if missing: + raise NetworkXError('Error 404' % missing) + s = float(sum(dangling.values())) + dangling_weights = dict((k, v/s) for k, v in dangling.items()) + dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] + for zdd in range(max_iter): + xdd.append(zdd) + xlast = x + x = dict.fromkeys(xlast.keys(), 0) + danglesum = alpha * sum(xlast[n] for n in dangling_nodes) + for n in x: + for nbr in W[n]: + x[nbr] += alpha * xlast[n] * W[n][nbr][weight] + x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n] + err = sum([abs(x[n] - xlast[n]) for n in x]) + ydd.append(err) + if err < N*tol: + xdd1 = np.array(xdd) + ydd1 = np.array(ydd) + plt.plot(xdd1, ydd1) + fname = './test.pdf' + plt.savefig(fname) + proc=subprocess.Popen(shlex.split('lpr {f}'.format(f=fname))) + return 0 + +def main(): + create_files() + G = nx.read_edgelist('./20.txt',nodetype=int, + data=(('weight',float),), create_using=nx.DiGraph()) + print(G.edges(data=True)) + nx.draw(G) + plt.show() + + G = nx.read_edgelist('./100k.txt',nodetype=int, + data=(('weight',float),), create_using=nx.DiGraph()) + pr = nx.pagerank(G,0.4) + #print(pr) + + pagerank() + + pagerank_chart() + +if __name__ == "__main__": + main() \ No newline at end of file